LCOV - code coverage report
Current view: top level - test/cctest - test-regexp.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 1071 1072 99.9 %
Date: 2019-01-20 Functions: 55 56 98.2 %

          Line data    Source code
       1             : // Copyright 2012 the V8 project authors. All rights reserved.
       2             : // Redistribution and use in source and binary forms, with or without
       3             : // modification, are permitted provided that the following conditions are
       4             : // met:
       5             : //
       6             : //     * Redistributions of source code must retain the above copyright
       7             : //       notice, this list of conditions and the following disclaimer.
       8             : //     * Redistributions in binary form must reproduce the above
       9             : //       copyright notice, this list of conditions and the following
      10             : //       disclaimer in the documentation and/or other materials provided
      11             : //       with the distribution.
      12             : //     * Neither the name of Google Inc. nor the names of its
      13             : //       contributors may be used to endorse or promote products derived
      14             : //       from this software without specific prior written permission.
      15             : //
      16             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      17             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      18             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      19             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      20             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      21             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      22             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      23             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      24             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      25             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      26             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      27             : 
      28             : #include <cstdlib>
      29             : #include <memory>
      30             : #include <sstream>
      31             : 
      32             : #include "include/v8.h"
      33             : #include "src/api-inl.h"
      34             : #include "src/assembler-arch.h"
      35             : #include "src/ast/ast.h"
      36             : #include "src/char-predicates-inl.h"
      37             : #include "src/objects-inl.h"
      38             : #include "src/ostreams.h"
      39             : #include "src/regexp/jsregexp.h"
      40             : #include "src/regexp/regexp-macro-assembler-irregexp.h"
      41             : #include "src/regexp/regexp-macro-assembler.h"
      42             : #include "src/regexp/regexp-parser.h"
      43             : #include "src/splay-tree-inl.h"
      44             : #include "src/string-stream.h"
      45             : #include "src/unicode-inl.h"
      46             : #include "src/v8.h"
      47             : 
      48             : #ifdef V8_INTERPRETED_REGEXP
      49             : #include "src/regexp/interpreter-irregexp.h"
      50             : #else  // V8_INTERPRETED_REGEXP
      51             : #include "src/macro-assembler.h"
      52             : #if V8_TARGET_ARCH_ARM
      53             : #include "src/regexp/arm/regexp-macro-assembler-arm.h"
      54             : #endif
      55             : #if V8_TARGET_ARCH_ARM64
      56             : #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
      57             : #endif
      58             : #if V8_TARGET_ARCH_S390
      59             : #include "src/regexp/s390/regexp-macro-assembler-s390.h"
      60             : #endif
      61             : #if V8_TARGET_ARCH_PPC
      62             : #include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
      63             : #endif
      64             : #if V8_TARGET_ARCH_MIPS
      65             : #include "src/regexp/mips/regexp-macro-assembler-mips.h"
      66             : #endif
      67             : #if V8_TARGET_ARCH_MIPS64
      68             : #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
      69             : #endif
      70             : #if V8_TARGET_ARCH_X64
      71             : #include "src/regexp/x64/regexp-macro-assembler-x64.h"
      72             : #endif
      73             : #if V8_TARGET_ARCH_IA32
      74             : #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
      75             : #endif
      76             : #endif  // V8_INTERPRETED_REGEXP
      77             : #include "test/cctest/cctest.h"
      78             : 
      79             : namespace v8 {
      80             : namespace internal {
      81             : namespace test_regexp {
      82             : 
      83           5 : static bool CheckParse(const char* input) {
      84           5 :   v8::HandleScope scope(CcTest::isolate());
      85          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
      86           5 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
      87             :   RegExpCompileData result;
      88             :   return v8::internal::RegExpParser::ParseRegExp(
      89          10 :       CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result);
      90             : }
      91             : 
      92             : 
      93         765 : static void CheckParseEq(const char* input, const char* expected,
      94             :                          bool unicode = false) {
      95         765 :   v8::HandleScope scope(CcTest::isolate());
      96        1530 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
      97         765 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
      98             :   RegExpCompileData result;
      99             :   JSRegExp::Flags flags = JSRegExp::kNone;
     100         765 :   if (unicode) flags |= JSRegExp::kUnicode;
     101         765 :   CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
     102             :                                                 &reader, flags, &result));
     103         765 :   CHECK_NOT_NULL(result.tree);
     104         765 :   CHECK(result.error.is_null());
     105        1530 :   std::ostringstream os;
     106         765 :   result.tree->Print(os, &zone);
     107        1530 :   if (strcmp(expected, os.str().c_str()) != 0) {
     108           0 :     printf("%s | %s\n", expected, os.str().c_str());
     109             :   }
     110        2295 :   CHECK_EQ(0, strcmp(expected, os.str().c_str()));
     111         765 : }
     112             : 
     113             : 
     114         225 : static bool CheckSimple(const char* input) {
     115         225 :   v8::HandleScope scope(CcTest::isolate());
     116         450 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     117         225 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     118             :   RegExpCompileData result;
     119         225 :   CHECK(v8::internal::RegExpParser::ParseRegExp(
     120             :       CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
     121         225 :   CHECK_NOT_NULL(result.tree);
     122         225 :   CHECK(result.error.is_null());
     123         450 :   return result.simple;
     124             : }
     125             : 
     126             : struct MinMaxPair {
     127             :   int min_match;
     128             :   int max_match;
     129             : };
     130             : 
     131             : 
     132         240 : static MinMaxPair CheckMinMaxMatch(const char* input) {
     133         240 :   v8::HandleScope scope(CcTest::isolate());
     134         480 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     135         240 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     136             :   RegExpCompileData result;
     137         240 :   CHECK(v8::internal::RegExpParser::ParseRegExp(
     138             :       CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
     139         240 :   CHECK_NOT_NULL(result.tree);
     140         240 :   CHECK(result.error.is_null());
     141         240 :   int min_match = result.tree->min_match();
     142         240 :   int max_match = result.tree->max_match();
     143             :   MinMaxPair pair = { min_match, max_match };
     144         480 :   return pair;
     145             : }
     146             : 
     147             : 
     148             : #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
     149             : #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
     150             : #define CHECK_MIN_MAX(input, min, max)                                         \
     151             :   { MinMaxPair min_max = CheckMinMaxMatch(input);                              \
     152             :     CHECK_EQ(min, min_max.min_match);                                          \
     153             :     CHECK_EQ(max, min_max.max_match);                                          \
     154             :   }
     155             : 
     156       28342 : TEST(RegExpParser) {
     157           5 :   CHECK_PARSE_ERROR("?");
     158             : 
     159           5 :   CheckParseEq("abc", "'abc'");
     160           5 :   CheckParseEq("", "%");
     161           5 :   CheckParseEq("abc|def", "(| 'abc' 'def')");
     162           5 :   CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
     163           5 :   CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
     164           5 :   CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
     165           5 :   CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
     166           5 :   CheckParseEq("a*", "(# 0 - g 'a')");
     167           5 :   CheckParseEq("a*?", "(# 0 - n 'a')");
     168           5 :   CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
     169           5 :   CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
     170           5 :   CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
     171           5 :   CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
     172           5 :   CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
     173           5 :   CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
     174           5 :   CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
     175           5 :   CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
     176           5 :   CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
     177           5 :   CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
     178           5 :   CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
     179           5 :   CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
     180           5 :   CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
     181           5 :   CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
     182           5 :   CheckParseEq("(?:foo)", "(?: 'foo')");
     183           5 :   CheckParseEq("(?: foo )", "(?: ' foo ')");
     184           5 :   CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
     185           5 :   CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
     186           5 :   CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
     187           5 :   CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
     188           5 :   CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
     189           5 :   CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
     190           5 :   CheckParseEq("()", "(^ %)");
     191           5 :   CheckParseEq("(?=)", "(-> + %)");
     192           5 :   CheckParseEq("[]", "^[\\x00-\\u{10ffff}]");  // Doesn't compile on windows
     193           5 :   CheckParseEq("[^]", "[\\x00-\\u{10ffff}]");  // \uffff isn't in codepage 1252
     194           5 :   CheckParseEq("[x]", "[x]");
     195           5 :   CheckParseEq("[xyz]", "[x y z]");
     196           5 :   CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
     197           5 :   CheckParseEq("[-123]", "[- 1 2 3]");
     198           5 :   CheckParseEq("[^123]", "^[1 2 3]");
     199           5 :   CheckParseEq("]", "']'");
     200           5 :   CheckParseEq("}", "'}'");
     201           5 :   CheckParseEq("[a-b-c]", "[a-b - c]");
     202           5 :   CheckParseEq("[\\d]", "[0-9]");
     203           5 :   CheckParseEq("[x\\dz]", "[x 0-9 z]");
     204           5 :   CheckParseEq("[\\d-z]", "[0-9 - z]");
     205           5 :   CheckParseEq("[\\d-\\d]", "[0-9 0-9 -]");
     206           5 :   CheckParseEq("[z-\\d]", "[0-9 z -]");
     207             :   // Control character outside character class.
     208           5 :   CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
     209           5 :   CheckParseEq("\\c!", "'\\c!'");
     210           5 :   CheckParseEq("\\c_", "'\\c_'");
     211           5 :   CheckParseEq("\\c~", "'\\c~'");
     212           5 :   CheckParseEq("\\c1", "'\\c1'");
     213             :   // Control character inside character class.
     214           5 :   CheckParseEq("[\\c!]", "[\\ c !]");
     215           5 :   CheckParseEq("[\\c_]", "[\\x1f]");
     216           5 :   CheckParseEq("[\\c~]", "[\\ c ~]");
     217           5 :   CheckParseEq("[\\ca]", "[\\x01]");
     218           5 :   CheckParseEq("[\\cz]", "[\\x1a]");
     219           5 :   CheckParseEq("[\\cA]", "[\\x01]");
     220           5 :   CheckParseEq("[\\cZ]", "[\\x1a]");
     221           5 :   CheckParseEq("[\\c1]", "[\\x11]");
     222             : 
     223           5 :   CheckParseEq("[a\\]c]", "[a ] c]");
     224           5 :   CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
     225           5 :   CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #  ]");
     226           5 :   CheckParseEq("\\0", "'\\x00'");
     227           5 :   CheckParseEq("\\8", "'8'");
     228           5 :   CheckParseEq("\\9", "'9'");
     229           5 :   CheckParseEq("\\11", "'\\x09'");
     230           5 :   CheckParseEq("\\11a", "'\\x09a'");
     231           5 :   CheckParseEq("\\011", "'\\x09'");
     232           5 :   CheckParseEq("\\00011", "'\\x0011'");
     233           5 :   CheckParseEq("\\118", "'\\x098'");
     234           5 :   CheckParseEq("\\111", "'I'");
     235           5 :   CheckParseEq("\\1111", "'I1'");
     236           5 :   CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
     237           5 :   CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
     238           5 :   CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
     239           5 :   CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
     240             :   CheckParseEq("(x)(x)(x)\\1*",
     241             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     242           5 :                " (# 0 - g (<- 1)))");
     243             :   CheckParseEq("(x)(x)(x)\\2*",
     244             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     245           5 :                " (# 0 - g (<- 2)))");
     246             :   CheckParseEq("(x)(x)(x)\\3*",
     247             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     248           5 :                " (# 0 - g (<- 3)))");
     249             :   CheckParseEq("(x)(x)(x)\\4*",
     250             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     251           5 :                " (# 0 - g '\\x04'))");
     252             :   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
     253             :                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
     254           5 :                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
     255             :   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
     256             :                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
     257           5 :                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
     258           5 :   CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
     259           5 :   CheckParseEq("(a\\1)", "(^ 'a')");
     260           5 :   CheckParseEq("(\\1a)", "(^ 'a')");
     261           5 :   CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
     262           5 :   CheckParseEq("(?=a)?a", "'a'");
     263           5 :   CheckParseEq("(?=a){0,10}a", "'a'");
     264           5 :   CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
     265           5 :   CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
     266           5 :   CheckParseEq("(?!a)?a", "'a'");
     267           5 :   CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
     268           5 :   CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
     269             :   CheckParseEq("(?!\\1(a\\1)\\1)\\1",
     270           5 :                "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
     271             :   CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
     272           5 :                "(: (<- 1) (<- 2) (^ (: 'a' (?: (^ 'b')) (<- 2))) (<- 1))");
     273             :   CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
     274           5 :                "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
     275           5 :   CheckParseEq("[\\0]", "[\\x00]");
     276           5 :   CheckParseEq("[\\11]", "[\\x09]");
     277           5 :   CheckParseEq("[\\11a]", "[\\x09 a]");
     278           5 :   CheckParseEq("[\\011]", "[\\x09]");
     279           5 :   CheckParseEq("[\\00011]", "[\\x00 1 1]");
     280           5 :   CheckParseEq("[\\118]", "[\\x09 8]");
     281           5 :   CheckParseEq("[\\111]", "[I]");
     282           5 :   CheckParseEq("[\\1111]", "[I 1]");
     283           5 :   CheckParseEq("\\x34", "'\x34'");
     284           5 :   CheckParseEq("\\x60", "'\x60'");
     285           5 :   CheckParseEq("\\x3z", "'x3z'");
     286           5 :   CheckParseEq("\\c", "'\\c'");
     287           5 :   CheckParseEq("\\u0034", "'\x34'");
     288           5 :   CheckParseEq("\\u003z", "'u003z'");
     289           5 :   CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
     290           5 :   CheckParseEq("^^^$$$\\b\\b\\b\\b", "(: @^i @$i @b)");
     291           5 :   CheckParseEq("\\b\\b\\b\\b\\B\\B\\B\\B\\b\\b\\b\\b", "(: @b @B @b)");
     292           5 :   CheckParseEq("\\b\\B\\b", "(: @b @B @b)");
     293             : 
     294             :   // Unicode regexps
     295           5 :   CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
     296             :   CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
     297           5 :                true);
     298             :   CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
     299           5 :                true);
     300           5 :   CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
     301           5 :   CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
     302             : 
     303           5 :   CheckParseEq("\\ud808\\udf45*", "(# 0 - g '\\ud808\\udf45')", true);
     304             :   CheckParseEq("[\\ud808\\udf45-\\ud809\\udccc]", "[\\u{012345}-\\u{0124cc}]",
     305           5 :                true);
     306             : 
     307          10 :   CHECK_SIMPLE("", false);
     308           5 :   CHECK_SIMPLE("a", true);
     309          10 :   CHECK_SIMPLE("a|b", false);
     310          10 :   CHECK_SIMPLE("a\\n", false);
     311          10 :   CHECK_SIMPLE("^a", false);
     312          10 :   CHECK_SIMPLE("a$", false);
     313          10 :   CHECK_SIMPLE("a\\b!", false);
     314          10 :   CHECK_SIMPLE("a\\Bb", false);
     315          10 :   CHECK_SIMPLE("a*", false);
     316          10 :   CHECK_SIMPLE("a*?", false);
     317          10 :   CHECK_SIMPLE("a?", false);
     318          10 :   CHECK_SIMPLE("a??", false);
     319          10 :   CHECK_SIMPLE("a{0,1}?", false);
     320          10 :   CHECK_SIMPLE("a{1,1}?", false);
     321          10 :   CHECK_SIMPLE("a{1,2}?", false);
     322          10 :   CHECK_SIMPLE("a+?", false);
     323          10 :   CHECK_SIMPLE("(a)", false);
     324          10 :   CHECK_SIMPLE("(a)\\1", false);
     325          10 :   CHECK_SIMPLE("(\\1a)", false);
     326          10 :   CHECK_SIMPLE("\\1(a)", false);
     327          10 :   CHECK_SIMPLE("a\\s", false);
     328          10 :   CHECK_SIMPLE("a\\S", false);
     329          10 :   CHECK_SIMPLE("a\\d", false);
     330          10 :   CHECK_SIMPLE("a\\D", false);
     331          10 :   CHECK_SIMPLE("a\\w", false);
     332          10 :   CHECK_SIMPLE("a\\W", false);
     333          10 :   CHECK_SIMPLE("a.", false);
     334          10 :   CHECK_SIMPLE("a\\q", false);
     335          10 :   CHECK_SIMPLE("a[a]", false);
     336          10 :   CHECK_SIMPLE("a[^a]", false);
     337          10 :   CHECK_SIMPLE("a[a-z]", false);
     338          10 :   CHECK_SIMPLE("a[\\q]", false);
     339          10 :   CHECK_SIMPLE("a(?:b)", false);
     340          10 :   CHECK_SIMPLE("a(?=b)", false);
     341          10 :   CHECK_SIMPLE("a(?!b)", false);
     342          10 :   CHECK_SIMPLE("\\x60", false);
     343          10 :   CHECK_SIMPLE("\\u0060", false);
     344          10 :   CHECK_SIMPLE("\\cA", false);
     345          10 :   CHECK_SIMPLE("\\q", false);
     346          10 :   CHECK_SIMPLE("\\1112", false);
     347          10 :   CHECK_SIMPLE("\\0", false);
     348          10 :   CHECK_SIMPLE("(a)\\1", false);
     349          10 :   CHECK_SIMPLE("(?=a)?a", false);
     350          10 :   CHECK_SIMPLE("(?!a)?a\\1", false);
     351          10 :   CHECK_SIMPLE("(?:(?=a))a\\1", false);
     352             : 
     353           5 :   CheckParseEq("a{}", "'a{}'");
     354           5 :   CheckParseEq("a{,}", "'a{,}'");
     355           5 :   CheckParseEq("a{", "'a{'");
     356           5 :   CheckParseEq("a{z}", "'a{z}'");
     357           5 :   CheckParseEq("a{1z}", "'a{1z}'");
     358           5 :   CheckParseEq("a{12z}", "'a{12z}'");
     359           5 :   CheckParseEq("a{12,", "'a{12,'");
     360           5 :   CheckParseEq("a{12,3b", "'a{12,3b'");
     361           5 :   CheckParseEq("{}", "'{}'");
     362           5 :   CheckParseEq("{,}", "'{,}'");
     363           5 :   CheckParseEq("{", "'{'");
     364           5 :   CheckParseEq("{z}", "'{z}'");
     365           5 :   CheckParseEq("{1z}", "'{1z}'");
     366           5 :   CheckParseEq("{12z}", "'{12z}'");
     367           5 :   CheckParseEq("{12,", "'{12,'");
     368           5 :   CheckParseEq("{12,3b", "'{12,3b'");
     369             : 
     370           5 :   CHECK_MIN_MAX("a", 1, 1);
     371           5 :   CHECK_MIN_MAX("abc", 3, 3);
     372           5 :   CHECK_MIN_MAX("a[bc]d", 3, 3);
     373           5 :   CHECK_MIN_MAX("a|bc", 1, 2);
     374           5 :   CHECK_MIN_MAX("ab|c", 1, 2);
     375           5 :   CHECK_MIN_MAX("a||bc", 0, 2);
     376           5 :   CHECK_MIN_MAX("|", 0, 0);
     377           5 :   CHECK_MIN_MAX("(?:ab)", 2, 2);
     378           5 :   CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
     379           5 :   CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
     380           5 :   CHECK_MIN_MAX("(ab)", 2, 2);
     381           5 :   CHECK_MIN_MAX("(ab|cde)", 2, 3);
     382           5 :   CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
     383           5 :   CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
     384           5 :   CHECK_MIN_MAX("(?:ab)?", 0, 2);
     385           5 :   CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
     386           5 :   CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
     387           5 :   CHECK_MIN_MAX("a?", 0, 1);
     388           5 :   CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
     389           5 :   CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
     390           5 :   CHECK_MIN_MAX("a??", 0, 1);
     391           5 :   CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
     392           5 :   CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
     393           5 :   CHECK_MIN_MAX("(?:a?)?", 0, 1);
     394           5 :   CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
     395           5 :   CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
     396           5 :   CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
     397           5 :   CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
     398           5 :   CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
     399           5 :   CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
     400           5 :   CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
     401           5 :   CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
     402           5 :   CHECK_MIN_MAX("a{0}", 0, 0);
     403           5 :   CHECK_MIN_MAX("(?:a+){0}", 0, 0);
     404           5 :   CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
     405           5 :   CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
     406           5 :   CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
     407           5 :   CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
     408           5 :   CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
     409           5 :   CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
     410           5 :   CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
     411           5 :   CHECK_MIN_MAX("a\\bc", 2, 2);
     412           5 :   CHECK_MIN_MAX("a\\Bc", 2, 2);
     413           5 :   CHECK_MIN_MAX("a\\sc", 3, 3);
     414           5 :   CHECK_MIN_MAX("a\\Sc", 3, 3);
     415           5 :   CHECK_MIN_MAX("a(?=b)c", 2, 2);
     416           5 :   CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
     417           5 :   CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
     418             : 
     419             :   CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<a>",
     420           5 :                "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))", true);
     421             :   CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<b>",
     422           5 :                "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))", true);
     423             :   CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<c>",
     424           5 :                "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))", true);
     425           5 :   CheckParseEq("(?<a>a)\\k<a>", "(: (^ 'a') (<- 1))", true);
     426           5 :   CheckParseEq("(?<a>a\\k<a>)", "(^ 'a')", true);
     427           5 :   CheckParseEq("(?<a>\\k<a>a)", "(^ 'a')", true);
     428           5 :   CheckParseEq("(?<a>\\k<b>)(?<b>\\k<a>)", "(: (^ (<- 2)) (^ (<- 1)))", true);
     429           5 :   CheckParseEq("\\k<a>(?<a>a)", "(: (<- 1) (^ 'a'))", true);
     430             : 
     431           5 :   CheckParseEq("(?<\\u{03C0}>a)", "(^ 'a')", true);
     432           5 :   CheckParseEq("(?<\\u03C0>a)", "(^ 'a')", true);
     433           5 : }
     434             : 
     435       28342 : TEST(ParserRegression) {
     436           5 :   CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
     437           5 :   CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
     438           5 :   CheckParseEq("{", "'{'");
     439           5 :   CheckParseEq("a|", "(| 'a' %)");
     440           5 : }
     441             : 
     442         105 : static void ExpectError(const char* input, const char* expected,
     443             :                         bool unicode = false) {
     444         105 :   Isolate* isolate = CcTest::i_isolate();
     445             : 
     446         105 :   v8::HandleScope scope(CcTest::isolate());
     447         210 :   Zone zone(isolate->allocator(), ZONE_NAME);
     448         105 :   FlatStringReader reader(isolate, CStrVector(input));
     449             :   RegExpCompileData result;
     450             :   JSRegExp::Flags flags = JSRegExp::kNone;
     451         105 :   if (unicode) flags |= JSRegExp::kUnicode;
     452         105 :   CHECK(!v8::internal::RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
     453             :                                                  &result));
     454         105 :   CHECK_NULL(result.tree);
     455         105 :   CHECK(!result.error.is_null());
     456         105 :   std::unique_ptr<char[]> str = result.error->ToCString(ALLOW_NULLS);
     457         210 :   CHECK_EQ(0, strcmp(expected, str.get()));
     458         105 : }
     459             : 
     460             : 
     461       28342 : TEST(Errors) {
     462             :   const char* kEndBackslash = "\\ at end of pattern";
     463           5 :   ExpectError("\\", kEndBackslash);
     464             :   const char* kUnterminatedGroup = "Unterminated group";
     465           5 :   ExpectError("(foo", kUnterminatedGroup);
     466             :   const char* kInvalidGroup = "Invalid group";
     467           5 :   ExpectError("(?", kInvalidGroup);
     468             :   const char* kUnterminatedCharacterClass = "Unterminated character class";
     469           5 :   ExpectError("[", kUnterminatedCharacterClass);
     470           5 :   ExpectError("[a-", kUnterminatedCharacterClass);
     471             :   const char* kNothingToRepeat = "Nothing to repeat";
     472           5 :   ExpectError("*", kNothingToRepeat);
     473           5 :   ExpectError("?", kNothingToRepeat);
     474           5 :   ExpectError("+", kNothingToRepeat);
     475           5 :   ExpectError("{1}", kNothingToRepeat);
     476           5 :   ExpectError("{1,2}", kNothingToRepeat);
     477           5 :   ExpectError("{1,}", kNothingToRepeat);
     478             : 
     479             :   // Check that we don't allow more than kMaxCapture captures
     480             :   const int kMaxCaptures = 1 << 16;  // Must match RegExpParser::kMaxCaptures.
     481             :   const char* kTooManyCaptures = "Too many captures";
     482           5 :   std::ostringstream os;
     483      327690 :   for (int i = 0; i <= kMaxCaptures; i++) {
     484      327685 :     os << "()";
     485             :   }
     486          10 :   ExpectError(os.str().c_str(), kTooManyCaptures);
     487             : 
     488             :   const char* kInvalidCaptureName = "Invalid capture group name";
     489           5 :   ExpectError("(?<>.)", kInvalidCaptureName, true);
     490           5 :   ExpectError("(?<1>.)", kInvalidCaptureName, true);
     491           5 :   ExpectError("(?<_%>.)", kInvalidCaptureName, true);
     492           5 :   ExpectError("\\k<a", kInvalidCaptureName, true);
     493             :   const char* kDuplicateCaptureName = "Duplicate capture group name";
     494           5 :   ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
     495             :   const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
     496           5 :   ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
     497             :   const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
     498           5 :   ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
     499           5 :   ExpectError("(?<b>)\\k<a>", kInvalidCaptureReferenced, true);
     500             :   const char* kInvalidNamedReference = "Invalid named reference";
     501           5 :   ExpectError("\\ka", kInvalidNamedReference, true);
     502           5 : }
     503             : 
     504             : 
     505      327680 : static bool IsDigit(uc16 c) {
     506      655360 :   return ('0' <= c && c <= '9');
     507             : }
     508             : 
     509             : 
     510      327680 : static bool NotDigit(uc16 c) {
     511      327680 :   return !IsDigit(c);
     512             : }
     513             : 
     514             : 
     515      327680 : static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
     516             :   // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
     517             :   // WhiteSpace (7.2) and LineTerminator (7.3) values.
     518      983040 :   return v8::internal::IsWhiteSpaceOrLineTerminator(c);
     519             : }
     520             : 
     521             : 
     522      327680 : static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
     523      327680 :   return !IsWhiteSpaceOrLineTerminator(c);
     524             : }
     525             : 
     526             : 
     527      327680 : static bool NotWord(uc16 c) {
     528      327680 :   return !IsRegExpWord(c);
     529             : }
     530             : 
     531             : 
     532          35 : static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
     533          35 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     534    12481805 :   ZoneList<CharacterRange>* ranges =
     535          35 :       new(&zone) ZoneList<CharacterRange>(2, &zone);
     536          35 :   CharacterRange::AddClassEscape(c, ranges, &zone);
     537     2293795 :   for (uc32 i = 0; i < (1 << 16); i++) {
     538             :     bool in_class = false;
     539    23980550 :     for (int j = 0; !in_class && j < ranges->length(); j++) {
     540    22412810 :       CharacterRange& range = ranges->at(j);
     541    22412810 :       in_class = (range.from() <= i && i <= range.to());
     542             :     }
     543     2293760 :     CHECK_EQ(pred(i), in_class);
     544          35 :   }
     545          35 : }
     546             : 
     547             : 
     548       28342 : TEST(CharacterClassEscapes) {
     549           5 :   TestCharacterClassEscapes('.', IsRegExpNewline);
     550           5 :   TestCharacterClassEscapes('d', IsDigit);
     551           5 :   TestCharacterClassEscapes('D', NotDigit);
     552           5 :   TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
     553           5 :   TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
     554           5 :   TestCharacterClassEscapes('w', IsRegExpWord);
     555           5 :   TestCharacterClassEscapes('W', NotWord);
     556           5 : }
     557             : 
     558             : 
     559           5 : static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
     560             :                            bool is_one_byte, Zone* zone) {
     561             :   Isolate* isolate = CcTest::i_isolate();
     562           5 :   FlatStringReader reader(isolate, CStrVector(input));
     563             :   RegExpCompileData compile_data;
     564             :   JSRegExp::Flags flags = JSRegExp::kNone;
     565           5 :   if (multiline) flags = JSRegExp::kMultiline;
     566           5 :   if (unicode) flags = JSRegExp::kUnicode;
     567           5 :   if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
     568           5 :                                                &reader, flags, &compile_data))
     569             :     return nullptr;
     570             :   Handle<String> pattern = isolate->factory()
     571             :                                ->NewStringFromUtf8(CStrVector(input))
     572          10 :                                .ToHandleChecked();
     573             :   Handle<String> sample_subject =
     574          10 :       isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
     575             :   RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern,
     576           5 :                         sample_subject, is_one_byte);
     577           5 :   return compile_data.node;
     578             : }
     579             : 
     580             : 
     581           5 : static void Execute(const char* input, bool multiline, bool unicode,
     582             :                     bool is_one_byte, bool dot_output = false) {
     583           5 :   v8::HandleScope scope(CcTest::isolate());
     584          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     585           5 :   RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
     586           5 :   USE(node);
     587             : #ifdef DEBUG
     588             :   if (dot_output) {
     589             :     RegExpEngine::DotPrint(input, node, false);
     590             :   }
     591             : #endif  // DEBUG
     592           5 : }
     593             : 
     594             : 
     595             : class TestConfig {
     596             :  public:
     597             :   typedef int Key;
     598             :   typedef int Value;
     599             :   static const int kNoKey;
     600             :   static int NoValue() { return 0; }
     601             :   static inline int Compare(int a, int b) {
     602     4448805 :     if (a < b)
     603             :       return -1;
     604     2208405 :     else if (a > b)
     605             :       return 1;
     606             :     else
     607             :       return 0;
     608             :   }
     609             : };
     610             : 
     611             : 
     612             : const int TestConfig::kNoKey = 0;
     613             : 
     614             : 
     615             : static unsigned PseudoRandom(int i, int j) {
     616        5940 :   return ~(~((i * 781) ^ (j * 329)));
     617             : }
     618             : 
     619             : 
     620       28342 : TEST(SplayTreeSimple) {
     621             :   static const unsigned kLimit = 1000;
     622           5 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     623             :   ZoneSplayTree<TestConfig> tree(&zone);
     624             :   bool seen[kLimit];
     625           5 :   for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
     626             : #define CHECK_MAPS_EQUAL() do {                                      \
     627             :     for (unsigned k = 0; k < kLimit; k++)                            \
     628             :       CHECK_EQ(seen[k], tree.Find(k, &loc));                         \
     629             :   } while (false)
     630         250 :   for (int i = 0; i < 50; i++) {
     631         815 :     for (int j = 0; j < 50; j++) {
     632        1065 :       int next = PseudoRandom(i, j) % kLimit;
     633        1065 :       if (seen[next]) {
     634             :         // We've already seen this one.  Check the value and remove
     635             :         // it.
     636             :         ZoneSplayTree<TestConfig>::Locator loc;
     637          85 :         CHECK(tree.Find(next, &loc));
     638          85 :         CHECK_EQ(next, loc.key());
     639          85 :         CHECK_EQ(3 * next, loc.value());
     640          85 :         tree.Remove(next);
     641          85 :         seen[next] = false;
     642       85085 :         CHECK_MAPS_EQUAL();
     643             :       } else {
     644             :         // Check that it wasn't there already and then add it.
     645             :         ZoneSplayTree<TestConfig>::Locator loc;
     646         980 :         CHECK(!tree.Find(next, &loc));
     647         980 :         CHECK(tree.Insert(next, &loc));
     648         980 :         CHECK_EQ(next, loc.key());
     649         980 :         loc.set_value(3 * next);
     650         980 :         seen[next] = true;
     651      980980 :         CHECK_MAPS_EQUAL();
     652             :       }
     653        1065 :       int val = PseudoRandom(j, i) % kLimit;
     654        1065 :       if (seen[val]) {
     655             :         ZoneSplayTree<TestConfig>::Locator loc;
     656         135 :         CHECK(tree.FindGreatestLessThan(val, &loc));
     657         270 :         CHECK_EQ(loc.key(), val);
     658             :         break;
     659             :       }
     660        1860 :       val = PseudoRandom(i + j, i - j) % kLimit;
     661         930 :       if (seen[val]) {
     662             :         ZoneSplayTree<TestConfig>::Locator loc;
     663         115 :         CHECK(tree.FindLeastGreaterThan(val, &loc));
     664         230 :         CHECK_EQ(loc.key(), val);
     665             :         break;
     666             :       }
     667             :     }
     668           5 :   }
     669           5 : }
     670             : 
     671             : 
     672       28342 : TEST(DispatchTableConstruction) {
     673             :   // Initialize test data.
     674             :   static const int kLimit = 1000;
     675             :   static const int kRangeCount = 8;
     676             :   static const int kRangeSize = 16;
     677             :   uc16 ranges[kRangeCount][2 * kRangeSize];
     678          45 :   for (int i = 0; i < kRangeCount; i++) {
     679          40 :     Vector<uc16> range(ranges[i], 2 * kRangeSize);
     680        1320 :     for (int j = 0; j < 2 * kRangeSize; j++) {
     681        3840 :       range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
     682             :     }
     683             :     range.Sort();
     684        1280 :     for (int j = 1; j < 2 * kRangeSize; j++) {
     685        3720 :       CHECK(range[j-1] <= range[j]);
     686             :     }
     687             :   }
     688             :   // Enter test data into dispatch table.
     689           5 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     690             :   DispatchTable table(&zone);
     691          45 :   for (int i = 0; i < kRangeCount; i++) {
     692          40 :     uc16* range = ranges[i];
     693         680 :     for (int j = 0; j < 2 * kRangeSize; j += 2)
     694         640 :       table.AddRange(CharacterRange::Range(range[j], range[j + 1]), i, &zone);
     695             :   }
     696             :   // Check that the table looks as we would expect
     697        5000 :   for (int p = 0; p < kLimit; p++) {
     698        5000 :     OutSet* outs = table.Get(p);
     699       45000 :     for (int j = 0; j < kRangeCount; j++) {
     700       40000 :       uc16* range = ranges[j];
     701             :       bool is_on = false;
     702      518600 :       for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
     703      478600 :         is_on = (range[k] <= p && p <= range[k + 1]);
     704       40000 :       CHECK_EQ(is_on, outs->Get(j));
     705             :     }
     706           5 :   }
     707           5 : }
     708             : 
     709             : 
     710             : // Test of debug-only syntax.
     711             : #ifdef DEBUG
     712             : 
     713             : TEST(ParsePossessiveRepetition) {
     714             :   bool old_flag_value = FLAG_regexp_possessive_quantifier;
     715             : 
     716             :   // Enable possessive quantifier syntax.
     717             :   FLAG_regexp_possessive_quantifier = true;
     718             : 
     719             :   CheckParseEq("a*+", "(# 0 - p 'a')");
     720             :   CheckParseEq("a++", "(# 1 - p 'a')");
     721             :   CheckParseEq("a?+", "(# 0 1 p 'a')");
     722             :   CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
     723             :   CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
     724             : 
     725             :   // Disable possessive quantifier syntax.
     726             :   FLAG_regexp_possessive_quantifier = false;
     727             : 
     728             :   CHECK_PARSE_ERROR("a*+");
     729             :   CHECK_PARSE_ERROR("a++");
     730             :   CHECK_PARSE_ERROR("a?+");
     731             :   CHECK_PARSE_ERROR("a{10,20}+");
     732             :   CHECK_PARSE_ERROR("a{10,20}+b");
     733             : 
     734             :   FLAG_regexp_possessive_quantifier = old_flag_value;
     735             : }
     736             : 
     737             : #endif
     738             : 
     739             : // Tests of interpreter.
     740             : 
     741             : 
     742             : #ifndef V8_INTERPRETED_REGEXP
     743             : 
     744             : #if V8_TARGET_ARCH_IA32
     745             : typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
     746             : #elif V8_TARGET_ARCH_X64
     747             : typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
     748             : #elif V8_TARGET_ARCH_ARM
     749             : typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
     750             : #elif V8_TARGET_ARCH_ARM64
     751             : typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
     752             : #elif V8_TARGET_ARCH_S390
     753             : typedef RegExpMacroAssemblerS390 ArchRegExpMacroAssembler;
     754             : #elif V8_TARGET_ARCH_PPC
     755             : typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
     756             : #elif V8_TARGET_ARCH_MIPS
     757             : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
     758             : #elif V8_TARGET_ARCH_MIPS64
     759             : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
     760             : #elif V8_TARGET_ARCH_X87
     761             : typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
     762             : #endif
     763             : 
     764             : class ContextInitializer {
     765             :  public:
     766          55 :   ContextInitializer()
     767             :       : scope_(CcTest::isolate()),
     768         110 :         env_(v8::Context::New(CcTest::isolate())) {
     769          55 :     env_->Enter();
     770          55 :   }
     771          55 :   ~ContextInitializer() {
     772          55 :     env_->Exit();
     773             :   }
     774             :  private:
     775             :   v8::HandleScope scope_;
     776             :   v8::Local<v8::Context> env_;
     777             : };
     778             : 
     779          70 : static ArchRegExpMacroAssembler::Result Execute(Code code, String input,
     780             :                                                 int start_offset,
     781             :                                                 Address input_start,
     782             :                                                 Address input_end,
     783             :                                                 int* captures) {
     784             :   return NativeRegExpMacroAssembler::Execute(
     785             :       code, input, start_offset, reinterpret_cast<byte*>(input_start),
     786          70 :       reinterpret_cast<byte*>(input_end), captures, 0, CcTest::i_isolate());
     787             : }
     788             : 
     789       28342 : TEST(MacroAssemblerNativeSuccess) {
     790           5 :   v8::V8::Initialize();
     791           5 :   ContextInitializer initializer;
     792             :   Isolate* isolate = CcTest::i_isolate();
     793             :   Factory* factory = isolate->factory();
     794          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     795             : 
     796             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
     797          10 :                              4);
     798             : 
     799           5 :   m.Succeed();
     800             : 
     801           5 :   Handle<String> source = factory->NewStringFromStaticChars("");
     802           5 :   Handle<Object> code_object = m.GetCode(source);
     803           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
     804             : 
     805           5 :   int captures[4] = {42, 37, 87, 117};
     806           5 :   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
     807           5 :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
     808             :   Address start_adr = seq_input->GetCharsAddress();
     809             : 
     810             :   NativeRegExpMacroAssembler::Result result =
     811             :       Execute(*code,
     812             :               *input,
     813             :               0,
     814             :               start_adr,
     815             :               start_adr + seq_input->length(),
     816          10 :               captures);
     817             : 
     818           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
     819           5 :   CHECK_EQ(-1, captures[0]);
     820           5 :   CHECK_EQ(-1, captures[1]);
     821           5 :   CHECK_EQ(-1, captures[2]);
     822           5 :   CHECK_EQ(-1, captures[3]);
     823           5 : }
     824             : 
     825             : 
     826       28342 : TEST(MacroAssemblerNativeSimple) {
     827           5 :   v8::V8::Initialize();
     828           5 :   ContextInitializer initializer;
     829             :   Isolate* isolate = CcTest::i_isolate();
     830             :   Factory* factory = isolate->factory();
     831          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     832             : 
     833             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
     834          10 :                              4);
     835             : 
     836           5 :   Label fail, backtrack;
     837           5 :   m.PushBacktrack(&fail);
     838           5 :   m.CheckNotAtStart(0, nullptr);
     839           5 :   m.LoadCurrentCharacter(2, nullptr);
     840           5 :   m.CheckNotCharacter('o', nullptr);
     841           5 :   m.LoadCurrentCharacter(1, nullptr, false);
     842           5 :   m.CheckNotCharacter('o', nullptr);
     843           5 :   m.LoadCurrentCharacter(0, nullptr, false);
     844           5 :   m.CheckNotCharacter('f', nullptr);
     845           5 :   m.WriteCurrentPositionToRegister(0, 0);
     846           5 :   m.WriteCurrentPositionToRegister(1, 3);
     847           5 :   m.AdvanceCurrentPosition(3);
     848           5 :   m.PushBacktrack(&backtrack);
     849           5 :   m.Succeed();
     850           5 :   m.Bind(&backtrack);
     851           5 :   m.Backtrack();
     852           5 :   m.Bind(&fail);
     853           5 :   m.Fail();
     854             : 
     855           5 :   Handle<String> source = factory->NewStringFromStaticChars("^foo");
     856           5 :   Handle<Object> code_object = m.GetCode(source);
     857           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
     858             : 
     859           5 :   int captures[4] = {42, 37, 87, 117};
     860           5 :   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
     861           5 :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
     862             :   Address start_adr = seq_input->GetCharsAddress();
     863             : 
     864             :   NativeRegExpMacroAssembler::Result result =
     865             :       Execute(*code,
     866             :               *input,
     867             :               0,
     868             :               start_adr,
     869             :               start_adr + input->length(),
     870          10 :               captures);
     871             : 
     872           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
     873           5 :   CHECK_EQ(0, captures[0]);
     874           5 :   CHECK_EQ(3, captures[1]);
     875           5 :   CHECK_EQ(-1, captures[2]);
     876           5 :   CHECK_EQ(-1, captures[3]);
     877             : 
     878           5 :   input = factory->NewStringFromStaticChars("barbarbar");
     879           5 :   seq_input = Handle<SeqOneByteString>::cast(input);
     880             :   start_adr = seq_input->GetCharsAddress();
     881             : 
     882             :   result = Execute(*code,
     883             :                    *input,
     884             :                    0,
     885             :                    start_adr,
     886             :                    start_adr + input->length(),
     887          10 :                    captures);
     888             : 
     889           5 :   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
     890           5 : }
     891             : 
     892             : 
     893       28342 : TEST(MacroAssemblerNativeSimpleUC16) {
     894           5 :   v8::V8::Initialize();
     895           5 :   ContextInitializer initializer;
     896             :   Isolate* isolate = CcTest::i_isolate();
     897             :   Factory* factory = isolate->factory();
     898          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     899             : 
     900             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
     901          10 :                              4);
     902             : 
     903           5 :   Label fail, backtrack;
     904           5 :   m.PushBacktrack(&fail);
     905           5 :   m.CheckNotAtStart(0, nullptr);
     906           5 :   m.LoadCurrentCharacter(2, nullptr);
     907           5 :   m.CheckNotCharacter('o', nullptr);
     908           5 :   m.LoadCurrentCharacter(1, nullptr, false);
     909           5 :   m.CheckNotCharacter('o', nullptr);
     910           5 :   m.LoadCurrentCharacter(0, nullptr, false);
     911           5 :   m.CheckNotCharacter('f', nullptr);
     912           5 :   m.WriteCurrentPositionToRegister(0, 0);
     913           5 :   m.WriteCurrentPositionToRegister(1, 3);
     914           5 :   m.AdvanceCurrentPosition(3);
     915           5 :   m.PushBacktrack(&backtrack);
     916           5 :   m.Succeed();
     917           5 :   m.Bind(&backtrack);
     918           5 :   m.Backtrack();
     919           5 :   m.Bind(&fail);
     920           5 :   m.Fail();
     921             : 
     922           5 :   Handle<String> source = factory->NewStringFromStaticChars("^foo");
     923           5 :   Handle<Object> code_object = m.GetCode(source);
     924           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
     925             : 
     926           5 :   int captures[4] = {42, 37, 87, 117};
     927             :   const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
     928           5 :                               static_cast<uc16>(0x2603)};
     929             :   Handle<String> input = factory->NewStringFromTwoByte(
     930          10 :       Vector<const uc16>(input_data, 6)).ToHandleChecked();
     931           5 :   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
     932             :   Address start_adr = seq_input->GetCharsAddress();
     933             : 
     934             :   NativeRegExpMacroAssembler::Result result =
     935             :       Execute(*code,
     936             :               *input,
     937             :               0,
     938             :               start_adr,
     939             :               start_adr + input->length(),
     940          10 :               captures);
     941             : 
     942           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
     943           5 :   CHECK_EQ(0, captures[0]);
     944           5 :   CHECK_EQ(3, captures[1]);
     945           5 :   CHECK_EQ(-1, captures[2]);
     946           5 :   CHECK_EQ(-1, captures[3]);
     947             : 
     948             :   const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
     949           5 :                                static_cast<uc16>(0x2603)};
     950             :   input = factory->NewStringFromTwoByte(
     951          10 :       Vector<const uc16>(input_data2, 9)).ToHandleChecked();
     952           5 :   seq_input = Handle<SeqTwoByteString>::cast(input);
     953             :   start_adr = seq_input->GetCharsAddress();
     954             : 
     955             :   result = Execute(*code,
     956             :                    *input,
     957             :                    0,
     958             :                    start_adr,
     959           5 :                    start_adr + input->length() * 2,
     960          10 :                    captures);
     961             : 
     962           5 :   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
     963           5 : }
     964             : 
     965             : 
     966       28342 : TEST(MacroAssemblerNativeBacktrack) {
     967           5 :   v8::V8::Initialize();
     968           5 :   ContextInitializer initializer;
     969             :   Isolate* isolate = CcTest::i_isolate();
     970             :   Factory* factory = isolate->factory();
     971          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     972             : 
     973             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
     974          10 :                              0);
     975             : 
     976           5 :   Label fail;
     977           5 :   Label backtrack;
     978           5 :   m.LoadCurrentCharacter(10, &fail);
     979           5 :   m.Succeed();
     980           5 :   m.Bind(&fail);
     981           5 :   m.PushBacktrack(&backtrack);
     982           5 :   m.LoadCurrentCharacter(10, nullptr);
     983           5 :   m.Succeed();
     984           5 :   m.Bind(&backtrack);
     985           5 :   m.Fail();
     986             : 
     987           5 :   Handle<String> source = factory->NewStringFromStaticChars("..........");
     988           5 :   Handle<Object> code_object = m.GetCode(source);
     989           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
     990             : 
     991           5 :   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
     992           5 :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
     993             :   Address start_adr = seq_input->GetCharsAddress();
     994             : 
     995             :   NativeRegExpMacroAssembler::Result result = Execute(
     996          10 :       *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
     997             : 
     998           5 :   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
     999           5 : }
    1000             : 
    1001             : 
    1002       28342 : TEST(MacroAssemblerNativeBackReferenceLATIN1) {
    1003           5 :   v8::V8::Initialize();
    1004           5 :   ContextInitializer initializer;
    1005             :   Isolate* isolate = CcTest::i_isolate();
    1006             :   Factory* factory = isolate->factory();
    1007          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1008             : 
    1009             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1010          10 :                              4);
    1011             : 
    1012           5 :   m.WriteCurrentPositionToRegister(0, 0);
    1013           5 :   m.AdvanceCurrentPosition(2);
    1014           5 :   m.WriteCurrentPositionToRegister(1, 0);
    1015           5 :   Label nomatch;
    1016           5 :   m.CheckNotBackReference(0, false, &nomatch);
    1017           5 :   m.Fail();
    1018           5 :   m.Bind(&nomatch);
    1019           5 :   m.AdvanceCurrentPosition(2);
    1020           5 :   Label missing_match;
    1021           5 :   m.CheckNotBackReference(0, false, &missing_match);
    1022           5 :   m.WriteCurrentPositionToRegister(2, 0);
    1023           5 :   m.Succeed();
    1024           5 :   m.Bind(&missing_match);
    1025           5 :   m.Fail();
    1026             : 
    1027           5 :   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
    1028           5 :   Handle<Object> code_object = m.GetCode(source);
    1029           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
    1030             : 
    1031           5 :   Handle<String> input = factory->NewStringFromStaticChars("fooofo");
    1032           5 :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1033             :   Address start_adr = seq_input->GetCharsAddress();
    1034             : 
    1035             :   int output[4];
    1036             :   NativeRegExpMacroAssembler::Result result =
    1037             :       Execute(*code,
    1038             :               *input,
    1039             :               0,
    1040             :               start_adr,
    1041             :               start_adr + input->length(),
    1042          10 :               output);
    1043             : 
    1044           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1045           5 :   CHECK_EQ(0, output[0]);
    1046           5 :   CHECK_EQ(2, output[1]);
    1047           5 :   CHECK_EQ(6, output[2]);
    1048           5 :   CHECK_EQ(-1, output[3]);
    1049           5 : }
    1050             : 
    1051             : 
    1052       28342 : TEST(MacroAssemblerNativeBackReferenceUC16) {
    1053           5 :   v8::V8::Initialize();
    1054           5 :   ContextInitializer initializer;
    1055             :   Isolate* isolate = CcTest::i_isolate();
    1056             :   Factory* factory = isolate->factory();
    1057          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1058             : 
    1059             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
    1060          10 :                              4);
    1061             : 
    1062           5 :   m.WriteCurrentPositionToRegister(0, 0);
    1063           5 :   m.AdvanceCurrentPosition(2);
    1064           5 :   m.WriteCurrentPositionToRegister(1, 0);
    1065           5 :   Label nomatch;
    1066           5 :   m.CheckNotBackReference(0, false, &nomatch);
    1067           5 :   m.Fail();
    1068           5 :   m.Bind(&nomatch);
    1069           5 :   m.AdvanceCurrentPosition(2);
    1070           5 :   Label missing_match;
    1071           5 :   m.CheckNotBackReference(0, false, &missing_match);
    1072           5 :   m.WriteCurrentPositionToRegister(2, 0);
    1073           5 :   m.Succeed();
    1074           5 :   m.Bind(&missing_match);
    1075           5 :   m.Fail();
    1076             : 
    1077           5 :   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
    1078           5 :   Handle<Object> code_object = m.GetCode(source);
    1079           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
    1080             : 
    1081           5 :   const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
    1082             :   Handle<String> input = factory->NewStringFromTwoByte(
    1083          10 :       Vector<const uc16>(input_data, 6)).ToHandleChecked();
    1084           5 :   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
    1085             :   Address start_adr = seq_input->GetCharsAddress();
    1086             : 
    1087             :   int output[4];
    1088             :   NativeRegExpMacroAssembler::Result result =
    1089             :       Execute(*code,
    1090             :               *input,
    1091             :               0,
    1092             :               start_adr,
    1093           5 :               start_adr + input->length() * 2,
    1094          10 :               output);
    1095             : 
    1096           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1097           5 :   CHECK_EQ(0, output[0]);
    1098           5 :   CHECK_EQ(2, output[1]);
    1099           5 :   CHECK_EQ(6, output[2]);
    1100           5 :   CHECK_EQ(-1, output[3]);
    1101           5 : }
    1102             : 
    1103             : 
    1104             : 
    1105       28342 : TEST(MacroAssemblernativeAtStart) {
    1106           5 :   v8::V8::Initialize();
    1107           5 :   ContextInitializer initializer;
    1108             :   Isolate* isolate = CcTest::i_isolate();
    1109             :   Factory* factory = isolate->factory();
    1110          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1111             : 
    1112             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1113          10 :                              0);
    1114             : 
    1115           5 :   Label not_at_start, newline, fail;
    1116           5 :   m.CheckNotAtStart(0, &not_at_start);
    1117             :   // Check that prevchar = '\n' and current = 'f'.
    1118           5 :   m.CheckCharacter('\n', &newline);
    1119           5 :   m.Bind(&fail);
    1120           5 :   m.Fail();
    1121           5 :   m.Bind(&newline);
    1122           5 :   m.LoadCurrentCharacter(0, &fail);
    1123           5 :   m.CheckNotCharacter('f', &fail);
    1124           5 :   m.Succeed();
    1125             : 
    1126           5 :   m.Bind(&not_at_start);
    1127             :   // Check that prevchar = 'o' and current = 'b'.
    1128           5 :   Label prevo;
    1129           5 :   m.CheckCharacter('o', &prevo);
    1130           5 :   m.Fail();
    1131           5 :   m.Bind(&prevo);
    1132           5 :   m.LoadCurrentCharacter(0, &fail);
    1133           5 :   m.CheckNotCharacter('b', &fail);
    1134           5 :   m.Succeed();
    1135             : 
    1136           5 :   Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
    1137           5 :   Handle<Object> code_object = m.GetCode(source);
    1138           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
    1139             : 
    1140           5 :   Handle<String> input = factory->NewStringFromStaticChars("foobar");
    1141           5 :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1142             :   Address start_adr = seq_input->GetCharsAddress();
    1143             : 
    1144             :   NativeRegExpMacroAssembler::Result result = Execute(
    1145          10 :       *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
    1146             : 
    1147           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1148             : 
    1149             :   result = Execute(*code, *input, 3, start_adr + 3, start_adr + input->length(),
    1150          10 :                    nullptr);
    1151             : 
    1152           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1153           5 : }
    1154             : 
    1155             : 
    1156       28342 : TEST(MacroAssemblerNativeBackRefNoCase) {
    1157           5 :   v8::V8::Initialize();
    1158           5 :   ContextInitializer initializer;
    1159             :   Isolate* isolate = CcTest::i_isolate();
    1160             :   Factory* factory = isolate->factory();
    1161          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1162             : 
    1163             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1164          10 :                              4);
    1165             : 
    1166           5 :   Label fail, succ;
    1167             : 
    1168           5 :   m.WriteCurrentPositionToRegister(0, 0);
    1169           5 :   m.WriteCurrentPositionToRegister(2, 0);
    1170           5 :   m.AdvanceCurrentPosition(3);
    1171           5 :   m.WriteCurrentPositionToRegister(3, 0);
    1172           5 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail);  // Match "AbC".
    1173           5 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail);  // Match "ABC".
    1174           5 :   Label expected_fail;
    1175           5 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
    1176           5 :   m.Bind(&fail);
    1177           5 :   m.Fail();
    1178             : 
    1179           5 :   m.Bind(&expected_fail);
    1180           5 :   m.AdvanceCurrentPosition(3);  // Skip "xYz"
    1181           5 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
    1182           5 :   m.Fail();
    1183             : 
    1184           5 :   m.Bind(&succ);
    1185           5 :   m.WriteCurrentPositionToRegister(1, 0);
    1186           5 :   m.Succeed();
    1187             : 
    1188             :   Handle<String> source =
    1189           5 :       factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
    1190           5 :   Handle<Object> code_object = m.GetCode(source);
    1191           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
    1192             : 
    1193           5 :   Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
    1194           5 :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1195             :   Address start_adr = seq_input->GetCharsAddress();
    1196             : 
    1197             :   int output[4];
    1198             :   NativeRegExpMacroAssembler::Result result =
    1199             :       Execute(*code,
    1200             :               *input,
    1201             :               0,
    1202             :               start_adr,
    1203             :               start_adr + input->length(),
    1204          10 :               output);
    1205             : 
    1206           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1207           5 :   CHECK_EQ(0, output[0]);
    1208           5 :   CHECK_EQ(12, output[1]);
    1209           5 :   CHECK_EQ(0, output[2]);
    1210           5 :   CHECK_EQ(3, output[3]);
    1211           5 : }
    1212             : 
    1213             : 
    1214             : 
    1215       28342 : TEST(MacroAssemblerNativeRegisters) {
    1216           5 :   v8::V8::Initialize();
    1217           5 :   ContextInitializer initializer;
    1218             :   Isolate* isolate = CcTest::i_isolate();
    1219             :   Factory* factory = isolate->factory();
    1220          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1221             : 
    1222             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1223          10 :                              6);
    1224             : 
    1225           5 :   uc16 foo_chars[3] = {'f', 'o', 'o'};
    1226             :   Vector<const uc16> foo(foo_chars, 3);
    1227             : 
    1228             :   enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
    1229           5 :   Label fail;
    1230           5 :   Label backtrack;
    1231           5 :   m.WriteCurrentPositionToRegister(out1, 0);  // Output: [0]
    1232           5 :   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
    1233           5 :   m.PushBacktrack(&backtrack);
    1234           5 :   m.WriteStackPointerToRegister(sp);
    1235             :   // Fill stack and registers
    1236           5 :   m.AdvanceCurrentPosition(2);
    1237           5 :   m.WriteCurrentPositionToRegister(out1, 0);
    1238           5 :   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
    1239           5 :   m.PushBacktrack(&fail);
    1240             :   // Drop backtrack stack frames.
    1241           5 :   m.ReadStackPointerFromRegister(sp);
    1242             :   // And take the first backtrack (to &backtrack)
    1243           5 :   m.Backtrack();
    1244             : 
    1245           5 :   m.PushCurrentPosition();
    1246           5 :   m.AdvanceCurrentPosition(2);
    1247           5 :   m.PopCurrentPosition();
    1248             : 
    1249           5 :   m.Bind(&backtrack);
    1250           5 :   m.PopRegister(out1);
    1251           5 :   m.ReadCurrentPositionFromRegister(out1);
    1252           5 :   m.AdvanceCurrentPosition(3);
    1253           5 :   m.WriteCurrentPositionToRegister(out2, 0);  // [0,3]
    1254             : 
    1255           5 :   Label loop;
    1256           5 :   m.SetRegister(loop_cnt, 0);  // loop counter
    1257           5 :   m.Bind(&loop);
    1258           5 :   m.AdvanceRegister(loop_cnt, 1);
    1259           5 :   m.AdvanceCurrentPosition(1);
    1260           5 :   m.IfRegisterLT(loop_cnt, 3, &loop);
    1261           5 :   m.WriteCurrentPositionToRegister(out3, 0);  // [0,3,6]
    1262             : 
    1263           5 :   Label loop2;
    1264           5 :   m.SetRegister(loop_cnt, 2);  // loop counter
    1265           5 :   m.Bind(&loop2);
    1266           5 :   m.AdvanceRegister(loop_cnt, -1);
    1267           5 :   m.AdvanceCurrentPosition(1);
    1268           5 :   m.IfRegisterGE(loop_cnt, 0, &loop2);
    1269           5 :   m.WriteCurrentPositionToRegister(out4, 0);  // [0,3,6,9]
    1270             : 
    1271           5 :   Label loop3;
    1272           5 :   Label exit_loop3;
    1273           5 :   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
    1274           5 :   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
    1275           5 :   m.ReadCurrentPositionFromRegister(out3);
    1276           5 :   m.Bind(&loop3);
    1277           5 :   m.AdvanceCurrentPosition(1);
    1278           5 :   m.CheckGreedyLoop(&exit_loop3);
    1279           5 :   m.GoTo(&loop3);
    1280           5 :   m.Bind(&exit_loop3);
    1281           5 :   m.PopCurrentPosition();
    1282           5 :   m.WriteCurrentPositionToRegister(out5, 0);  // [0,3,6,9,9,-1]
    1283             : 
    1284           5 :   m.Succeed();
    1285             : 
    1286           5 :   m.Bind(&fail);
    1287           5 :   m.Fail();
    1288             : 
    1289           5 :   Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
    1290           5 :   Handle<Object> code_object = m.GetCode(source);
    1291           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
    1292             : 
    1293             :   // String long enough for test (content doesn't matter).
    1294           5 :   Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
    1295           5 :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1296             :   Address start_adr = seq_input->GetCharsAddress();
    1297             : 
    1298             :   int output[6];
    1299             :   NativeRegExpMacroAssembler::Result result =
    1300             :       Execute(*code,
    1301             :               *input,
    1302             :               0,
    1303             :               start_adr,
    1304             :               start_adr + input->length(),
    1305          10 :               output);
    1306             : 
    1307           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1308           5 :   CHECK_EQ(0, output[0]);
    1309           5 :   CHECK_EQ(3, output[1]);
    1310           5 :   CHECK_EQ(6, output[2]);
    1311           5 :   CHECK_EQ(9, output[3]);
    1312           5 :   CHECK_EQ(9, output[4]);
    1313           5 :   CHECK_EQ(-1, output[5]);
    1314           5 : }
    1315             : 
    1316             : 
    1317       28342 : TEST(MacroAssemblerStackOverflow) {
    1318           5 :   v8::V8::Initialize();
    1319           5 :   ContextInitializer initializer;
    1320             :   Isolate* isolate = CcTest::i_isolate();
    1321             :   Factory* factory = isolate->factory();
    1322          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1323             : 
    1324             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1325          10 :                              0);
    1326             : 
    1327           5 :   Label loop;
    1328           5 :   m.Bind(&loop);
    1329           5 :   m.PushBacktrack(&loop);
    1330           5 :   m.GoTo(&loop);
    1331             : 
    1332             :   Handle<String> source =
    1333           5 :       factory->NewStringFromStaticChars("<stack overflow test>");
    1334           5 :   Handle<Object> code_object = m.GetCode(source);
    1335           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
    1336             : 
    1337             :   // String long enough for test (content doesn't matter).
    1338           5 :   Handle<String> input = factory->NewStringFromStaticChars("dummy");
    1339           5 :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1340             :   Address start_adr = seq_input->GetCharsAddress();
    1341             : 
    1342             :   NativeRegExpMacroAssembler::Result result = Execute(
    1343          10 :       *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
    1344             : 
    1345           5 :   CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
    1346           5 :   CHECK(isolate->has_pending_exception());
    1347           5 :   isolate->clear_pending_exception();
    1348           5 : }
    1349             : 
    1350             : 
    1351       28342 : TEST(MacroAssemblerNativeLotsOfRegisters) {
    1352           5 :   v8::V8::Initialize();
    1353           5 :   ContextInitializer initializer;
    1354             :   Isolate* isolate = CcTest::i_isolate();
    1355             :   Factory* factory = isolate->factory();
    1356          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1357             : 
    1358             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1359          10 :                              2);
    1360             : 
    1361             :   // At least 2048, to ensure the allocated space for registers
    1362             :   // span one full page.
    1363             :   const int large_number = 8000;
    1364           5 :   m.WriteCurrentPositionToRegister(large_number, 42);
    1365           5 :   m.WriteCurrentPositionToRegister(0, 0);
    1366           5 :   m.WriteCurrentPositionToRegister(1, 1);
    1367           5 :   Label done;
    1368           5 :   m.CheckNotBackReference(0, false, &done);  // Performs a system-stack push.
    1369           5 :   m.Bind(&done);
    1370           5 :   m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
    1371           5 :   m.PopRegister(1);
    1372           5 :   m.Succeed();
    1373             : 
    1374             :   Handle<String> source =
    1375           5 :       factory->NewStringFromStaticChars("<huge register space test>");
    1376           5 :   Handle<Object> code_object = m.GetCode(source);
    1377           5 :   Handle<Code> code = Handle<Code>::cast(code_object);
    1378             : 
    1379             :   // String long enough for test (content doesn't matter).
    1380           5 :   Handle<String> input = factory->NewStringFromStaticChars("sample text");
    1381           5 :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1382             :   Address start_adr = seq_input->GetCharsAddress();
    1383             : 
    1384             :   int captures[2];
    1385             :   NativeRegExpMacroAssembler::Result result =
    1386             :       Execute(*code,
    1387             :               *input,
    1388             :               0,
    1389             :               start_adr,
    1390             :               start_adr + input->length(),
    1391          10 :               captures);
    1392             : 
    1393           5 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1394           5 :   CHECK_EQ(0, captures[0]);
    1395           5 :   CHECK_EQ(42, captures[1]);
    1396             : 
    1397           5 :   isolate->clear_pending_exception();
    1398           5 : }
    1399             : 
    1400             : #else  // V8_INTERPRETED_REGEXP
    1401             : 
    1402             : TEST(MacroAssembler) {
    1403             :   byte codes[1024];
    1404             :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1405             :   RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024),
    1406             :                                  &zone);
    1407             :   // ^f(o)o.
    1408             :   Label start, fail, backtrack;
    1409             : 
    1410             :   m.SetRegister(4, 42);
    1411             :   m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
    1412             :   m.AdvanceRegister(4, 42);
    1413             :   m.GoTo(&start);
    1414             :   m.Fail();
    1415             :   m.Bind(&start);
    1416             :   m.PushBacktrack(&fail);
    1417             :   m.CheckNotAtStart(0, nullptr);
    1418             :   m.LoadCurrentCharacter(0, nullptr);
    1419             :   m.CheckNotCharacter('f', nullptr);
    1420             :   m.LoadCurrentCharacter(1, nullptr);
    1421             :   m.CheckNotCharacter('o', nullptr);
    1422             :   m.LoadCurrentCharacter(2, nullptr);
    1423             :   m.CheckNotCharacter('o', nullptr);
    1424             :   m.WriteCurrentPositionToRegister(0, 0);
    1425             :   m.WriteCurrentPositionToRegister(1, 3);
    1426             :   m.WriteCurrentPositionToRegister(2, 1);
    1427             :   m.WriteCurrentPositionToRegister(3, 2);
    1428             :   m.AdvanceCurrentPosition(3);
    1429             :   m.PushBacktrack(&backtrack);
    1430             :   m.Succeed();
    1431             :   m.Bind(&backtrack);
    1432             :   m.ClearRegisters(2, 3);
    1433             :   m.Backtrack();
    1434             :   m.Bind(&fail);
    1435             :   m.PopRegister(0);
    1436             :   m.Fail();
    1437             : 
    1438             :   Isolate* isolate = CcTest::i_isolate();
    1439             :   Factory* factory = isolate->factory();
    1440             :   HandleScope scope(isolate);
    1441             : 
    1442             :   Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
    1443             :   Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
    1444             :   int captures[5];
    1445             : 
    1446             :   const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
    1447             :   Handle<String> f1_16 = factory->NewStringFromTwoByte(
    1448             :       Vector<const uc16>(str1, 6)).ToHandleChecked();
    1449             : 
    1450             :   CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
    1451             :   CHECK_EQ(0, captures[0]);
    1452             :   CHECK_EQ(3, captures[1]);
    1453             :   CHECK_EQ(1, captures[2]);
    1454             :   CHECK_EQ(2, captures[3]);
    1455             :   CHECK_EQ(84, captures[4]);
    1456             : 
    1457             :   const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
    1458             :   Handle<String> f2_16 = factory->NewStringFromTwoByte(
    1459             :       Vector<const uc16>(str2, 6)).ToHandleChecked();
    1460             : 
    1461             :   CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
    1462             :   CHECK_EQ(42, captures[0]);
    1463             : }
    1464             : 
    1465             : #endif  // V8_INTERPRETED_REGEXP
    1466             : 
    1467             : 
    1468       28342 : TEST(AddInverseToTable) {
    1469             :   static const int kLimit = 1000;
    1470             :   static const int kRangeCount = 16;
    1471          55 :   for (int t = 0; t < 10; t++) {
    1472          50 :     Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1473             :     ZoneList<CharacterRange>* ranges =
    1474          50 :         new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
    1475         850 :     for (int i = 0; i < kRangeCount; i++) {
    1476        1600 :       int from = PseudoRandom(t + 87, i + 25) % kLimit;
    1477        1600 :       int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
    1478         800 :       if (to > kLimit) to = kLimit;
    1479         800 :       ranges->Add(CharacterRange::Range(from, to), &zone);
    1480             :     }
    1481             :     DispatchTable table(&zone);
    1482             :     DispatchTableConstructor cons(&table, false, &zone);
    1483             :     cons.set_choice_index(0);
    1484          50 :     cons.AddInverse(ranges);
    1485       50050 :     for (int i = 0; i < kLimit; i++) {
    1486             :       bool is_on = false;
    1487      675615 :       for (int j = 0; !is_on && j < kRangeCount; j++)
    1488             :         is_on = ranges->at(j).Contains(i);
    1489       50000 :       OutSet* set = table.Get(i);
    1490       50000 :       CHECK_EQ(is_on, set->Get(0) == false);
    1491             :     }
    1492          50 :   }
    1493           5 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1494             :   ZoneList<CharacterRange>* ranges =
    1495           5 :       new(&zone) ZoneList<CharacterRange>(1, &zone);
    1496           5 :   ranges->Add(CharacterRange::Range(0xFFF0, 0xFFFE), &zone);
    1497             :   DispatchTable table(&zone);
    1498             :   DispatchTableConstructor cons(&table, false, &zone);
    1499             :   cons.set_choice_index(0);
    1500           5 :   cons.AddInverse(ranges);
    1501           5 :   CHECK(!table.Get(0xFFFE)->Get(0));
    1502          10 :   CHECK(table.Get(0xFFFF)->Get(0));
    1503           5 : }
    1504             : 
    1505             : 
    1506    10485380 : static uc32 canonicalize(uc32 c) {
    1507             :   unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
    1508    10485380 :   int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, nullptr);
    1509    10485380 :   if (count == 0) {
    1510             :     return c;
    1511             :   } else {
    1512        4995 :     CHECK_EQ(1, count);
    1513        4995 :     return canon[0];
    1514             :   }
    1515             : }
    1516             : 
    1517             : 
    1518       28342 : TEST(LatinCanonicalize) {
    1519             :   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
    1520         130 :   for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
    1521         130 :     unibrow::uchar upper = lower + ('A' - 'a');
    1522         130 :     CHECK_EQ(canonicalize(lower), canonicalize(upper));
    1523             :     unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1524         130 :     int length = un_canonicalize.get(lower, '\0', uncanon);
    1525         130 :     CHECK_EQ(2, length);
    1526         130 :     CHECK_EQ(upper, uncanon[0]);
    1527         130 :     CHECK_EQ(lower, uncanon[1]);
    1528             :   }
    1529    10485120 :   for (uc32 c = 128; c < (1 << 21); c++)
    1530    10485120 :     CHECK_GE(canonicalize(c), 128);
    1531             : #ifndef V8_INTL_SUPPORT
    1532             :   unibrow::Mapping<unibrow::ToUppercase> to_upper;
    1533             :   // Canonicalization is only defined for the Basic Multilingual Plane.
    1534             :   for (uc32 c = 0; c < (1 << 16); c++) {
    1535             :     unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
    1536             :     int length = to_upper.get(c, '\0', upper);
    1537             :     if (length == 0) {
    1538             :       length = 1;
    1539             :       upper[0] = c;
    1540             :     }
    1541             :     uc32 u = upper[0];
    1542             :     if (length > 1 || (c >= 128 && u < 128))
    1543             :       u = c;
    1544             :     CHECK_EQ(u, canonicalize(c));
    1545             :   }
    1546             : #endif
    1547           5 : }
    1548             : 
    1549             : 
    1550      324330 : static uc32 CanonRangeEnd(uc32 c) {
    1551             :   unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
    1552      324330 :   int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, nullptr);
    1553      324330 :   if (count == 0) {
    1554             :     return c;
    1555             :   } else {
    1556         220 :     CHECK_EQ(1, count);
    1557         220 :     return canon[0];
    1558             :   }
    1559             : }
    1560             : 
    1561             : 
    1562       28342 : TEST(RangeCanonicalization) {
    1563             :   // Check that we arrive at the same result when using the basic
    1564             :   // range canonicalization primitives as when using immediate
    1565             :   // canonicalization.
    1566             :   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
    1567             :   int block_start = 0;
    1568      324335 :   while (block_start <= 0xFFFF) {
    1569      324330 :     uc32 block_end = CanonRangeEnd(block_start);
    1570      324330 :     unsigned block_length = block_end - block_start + 1;
    1571      324330 :     if (block_length > 1) {
    1572             :       unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1573         220 :       int first_length = un_canonicalize.get(block_start, '\0', first);
    1574        3570 :       for (unsigned i = 1; i < block_length; i++) {
    1575             :         unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1576        3350 :         int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
    1577        3350 :         CHECK_EQ(first_length, succ_length);
    1578        6700 :         for (int j = 0; j < succ_length; j++) {
    1579        6700 :           int calc = first[j] + i;
    1580        6700 :           int found = succ[j];
    1581        6700 :           CHECK_EQ(calc, found);
    1582             :         }
    1583             :       }
    1584             :     }
    1585      324330 :     block_start = block_start + block_length;
    1586             :   }
    1587           5 : }
    1588             : 
    1589             : 
    1590       28342 : TEST(UncanonicalizeEquivalence) {
    1591             :   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
    1592             :   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1593      327680 :   for (int i = 0; i < (1 << 16); i++) {
    1594      327680 :     int length = un_canonicalize.get(i, '\0', chars);
    1595      347750 :     for (int j = 0; j < length; j++) {
    1596             :       unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1597       20070 :       int length2 = un_canonicalize.get(chars[j], '\0', chars2);
    1598       20070 :       CHECK_EQ(length, length2);
    1599       40930 :       for (int k = 0; k < length; k++)
    1600       40930 :         CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
    1601             :     }
    1602             :   }
    1603           5 : }
    1604             : 
    1605             : 
    1606          55 : static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
    1607             :                                       Vector<CharacterRange> expected) {
    1608          55 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1609          55 :   int count = expected.length();
    1610          55 :   ZoneList<CharacterRange>* list =
    1611          55 :       new(&zone) ZoneList<CharacterRange>(count, &zone);
    1612          55 :   list->Add(input, &zone);
    1613          55 :   CharacterRange::AddCaseEquivalents(isolate, &zone, list, false);
    1614          55 :   list->Remove(0);  // Remove the input before checking results.
    1615          55 :   CHECK_EQ(count, list->length());
    1616          55 :   for (int i = 0; i < list->length(); i++) {
    1617         110 :     CHECK_EQ(expected[i].from(), list->at(i).from());
    1618          55 :     CHECK_EQ(expected[i].to(), list->at(i).to());
    1619          55 :   }
    1620          55 : }
    1621             : 
    1622             : 
    1623          55 : static void TestSimpleRangeCaseIndependence(Isolate* isolate,
    1624             :                                             CharacterRange input,
    1625             :                                             CharacterRange expected) {
    1626             :   EmbeddedVector<CharacterRange, 1> vector;
    1627          55 :   vector[0] = expected;
    1628          55 :   TestRangeCaseIndependence(isolate, input, vector);
    1629          55 : }
    1630             : 
    1631             : 
    1632       28342 : TEST(CharacterRangeCaseIndependence) {
    1633             :   Isolate* isolate = CcTest::i_isolate();
    1634             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
    1635           5 :                                   CharacterRange::Singleton('A'));
    1636             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
    1637           5 :                                   CharacterRange::Singleton('Z'));
    1638             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'),
    1639           5 :                                   CharacterRange::Range('A', 'Z'));
    1640             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'),
    1641           5 :                                   CharacterRange::Range('C', 'F'));
    1642             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'),
    1643           5 :                                   CharacterRange::Range('A', 'B'));
    1644             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'),
    1645           5 :                                   CharacterRange::Range('Y', 'Z'));
    1646             :   TestSimpleRangeCaseIndependence(isolate,
    1647             :                                   CharacterRange::Range('a' - 1, 'z' + 1),
    1648           5 :                                   CharacterRange::Range('A', 'Z'));
    1649             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'),
    1650           5 :                                   CharacterRange::Range('a', 'z'));
    1651             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'),
    1652           5 :                                   CharacterRange::Range('c', 'f'));
    1653             :   TestSimpleRangeCaseIndependence(isolate,
    1654             :                                   CharacterRange::Range('A' - 1, 'Z' + 1),
    1655           5 :                                   CharacterRange::Range('a', 'z'));
    1656             :   // Here we need to add [l-z] to complete the case independence of
    1657             :   // [A-Za-z] but we expect [a-z] to be added since we always add a
    1658             :   // whole block at a time.
    1659             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'),
    1660           5 :                                   CharacterRange::Range('a', 'z'));
    1661           5 : }
    1662             : 
    1663             : 
    1664    66805525 : static bool InClass(uc32 c, ZoneList<CharacterRange>* ranges) {
    1665    22282160 :   if (ranges == nullptr) return false;
    1666    66764570 :   for (int i = 0; i < ranges->length(); i++) {
    1667    27811745 :     CharacterRange range = ranges->at(i);
    1668    27811745 :     if (range.from() <= c && c <= range.to())
    1669             :       return true;
    1670             :   }
    1671             :   return false;
    1672             : }
    1673             : 
    1674             : 
    1675       28342 : TEST(UnicodeRangeSplitter) {
    1676           5 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1677             :   ZoneList<CharacterRange>* base =
    1678           5 :       new(&zone) ZoneList<CharacterRange>(1, &zone);
    1679           5 :   base->Add(CharacterRange::Everything(), &zone);
    1680           5 :   UnicodeRangeSplitter splitter(&zone, base);
    1681             :   // BMP
    1682      276485 :   for (uc32 c = 0; c < 0xD800; c++) {
    1683      276480 :     CHECK(InClass(c, splitter.bmp()));
    1684      276480 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1685      276480 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1686      276480 :     CHECK(!InClass(c, splitter.non_bmp()));
    1687             :   }
    1688             :   // Lead surrogates
    1689        5115 :   for (uc32 c = 0xD800; c < 0xDBFF; c++) {
    1690        5115 :     CHECK(!InClass(c, splitter.bmp()));
    1691        5115 :     CHECK(InClass(c, splitter.lead_surrogates()));
    1692        5115 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1693        5115 :     CHECK(!InClass(c, splitter.non_bmp()));
    1694             :   }
    1695             :   // Trail surrogates
    1696        5115 :   for (uc32 c = 0xDC00; c < 0xDFFF; c++) {
    1697        5115 :     CHECK(!InClass(c, splitter.bmp()));
    1698        5115 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1699        5115 :     CHECK(InClass(c, splitter.trail_surrogates()));
    1700        5115 :     CHECK(!InClass(c, splitter.non_bmp()));
    1701             :   }
    1702             :   // BMP
    1703       40955 :   for (uc32 c = 0xE000; c < 0xFFFF; c++) {
    1704       40955 :     CHECK(InClass(c, splitter.bmp()));
    1705       40955 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1706       40955 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1707       40955 :     CHECK(!InClass(c, splitter.non_bmp()));
    1708             :   }
    1709             :   // Non-BMP
    1710     5242875 :   for (uc32 c = 0x10000; c < 0x10FFFF; c++) {
    1711     5242875 :     CHECK(!InClass(c, splitter.bmp()));
    1712     5242875 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1713     5242875 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1714     5242875 :     CHECK(InClass(c, splitter.non_bmp()));
    1715           5 :   }
    1716           5 : }
    1717             : 
    1718             : 
    1719       28342 : TEST(CanonicalizeCharacterSets) {
    1720           5 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1721          20 :   ZoneList<CharacterRange>* list =
    1722           5 :       new(&zone) ZoneList<CharacterRange>(4, &zone);
    1723             :   CharacterSet set(list);
    1724             : 
    1725           5 :   list->Add(CharacterRange::Range(10, 20), &zone);
    1726           5 :   list->Add(CharacterRange::Range(30, 40), &zone);
    1727           5 :   list->Add(CharacterRange::Range(50, 60), &zone);
    1728           5 :   set.Canonicalize();
    1729           5 :   CHECK_EQ(3, list->length());
    1730           5 :   CHECK_EQ(10, list->at(0).from());
    1731           5 :   CHECK_EQ(20, list->at(0).to());
    1732           5 :   CHECK_EQ(30, list->at(1).from());
    1733           5 :   CHECK_EQ(40, list->at(1).to());
    1734           5 :   CHECK_EQ(50, list->at(2).from());
    1735           5 :   CHECK_EQ(60, list->at(2).to());
    1736             : 
    1737             :   list->Rewind(0);
    1738           5 :   list->Add(CharacterRange::Range(10, 20), &zone);
    1739           5 :   list->Add(CharacterRange::Range(50, 60), &zone);
    1740           5 :   list->Add(CharacterRange::Range(30, 40), &zone);
    1741           5 :   set.Canonicalize();
    1742           5 :   CHECK_EQ(3, list->length());
    1743           5 :   CHECK_EQ(10, list->at(0).from());
    1744           5 :   CHECK_EQ(20, list->at(0).to());
    1745           5 :   CHECK_EQ(30, list->at(1).from());
    1746           5 :   CHECK_EQ(40, list->at(1).to());
    1747           5 :   CHECK_EQ(50, list->at(2).from());
    1748           5 :   CHECK_EQ(60, list->at(2).to());
    1749             : 
    1750             :   list->Rewind(0);
    1751           5 :   list->Add(CharacterRange::Range(30, 40), &zone);
    1752           5 :   list->Add(CharacterRange::Range(10, 20), &zone);
    1753           5 :   list->Add(CharacterRange::Range(25, 25), &zone);
    1754           5 :   list->Add(CharacterRange::Range(100, 100), &zone);
    1755           5 :   list->Add(CharacterRange::Range(1, 1), &zone);
    1756           5 :   set.Canonicalize();
    1757           5 :   CHECK_EQ(5, list->length());
    1758           5 :   CHECK_EQ(1, list->at(0).from());
    1759           5 :   CHECK_EQ(1, list->at(0).to());
    1760           5 :   CHECK_EQ(10, list->at(1).from());
    1761           5 :   CHECK_EQ(20, list->at(1).to());
    1762           5 :   CHECK_EQ(25, list->at(2).from());
    1763           5 :   CHECK_EQ(25, list->at(2).to());
    1764           5 :   CHECK_EQ(30, list->at(3).from());
    1765           5 :   CHECK_EQ(40, list->at(3).to());
    1766           5 :   CHECK_EQ(100, list->at(4).from());
    1767           5 :   CHECK_EQ(100, list->at(4).to());
    1768             : 
    1769             :   list->Rewind(0);
    1770           5 :   list->Add(CharacterRange::Range(10, 19), &zone);
    1771           5 :   list->Add(CharacterRange::Range(21, 30), &zone);
    1772           5 :   list->Add(CharacterRange::Range(20, 20), &zone);
    1773           5 :   set.Canonicalize();
    1774           5 :   CHECK_EQ(1, list->length());
    1775           5 :   CHECK_EQ(10, list->at(0).from());
    1776           5 :   CHECK_EQ(30, list->at(0).to());
    1777           5 : }
    1778             : 
    1779             : 
    1780       28342 : TEST(CharacterRangeMerge) {
    1781           5 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1782           5 :   ZoneList<CharacterRange> l1(4, &zone);
    1783           5 :   ZoneList<CharacterRange> l2(4, &zone);
    1784             :   // Create all combinations of intersections of ranges, both singletons and
    1785             :   // longer.
    1786             : 
    1787             :   int offset = 0;
    1788             : 
    1789             :   // The five kinds of singleton intersections:
    1790             :   //     X
    1791             :   //   Y      - outside before
    1792             :   //    Y     - outside touching start
    1793             :   //     Y    - overlap
    1794             :   //      Y   - outside touching end
    1795             :   //       Y  - outside after
    1796             : 
    1797          30 :   for (int i = 0; i < 5; i++) {
    1798          25 :     l1.Add(CharacterRange::Singleton(offset + 2), &zone);
    1799          25 :     l2.Add(CharacterRange::Singleton(offset + i), &zone);
    1800          25 :     offset += 6;
    1801             :   }
    1802             : 
    1803             :   // The seven kinds of singleton/non-singleton intersections:
    1804             :   //    XXX
    1805             :   //  Y        - outside before
    1806             :   //   Y       - outside touching start
    1807             :   //    Y      - inside touching start
    1808             :   //     Y     - entirely inside
    1809             :   //      Y    - inside touching end
    1810             :   //       Y   - outside touching end
    1811             :   //        Y  - disjoint after
    1812             : 
    1813          35 :   for (int i = 0; i < 7; i++) {
    1814          35 :     l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
    1815          35 :     l2.Add(CharacterRange::Singleton(offset + i), &zone);
    1816          35 :     offset += 8;
    1817             :   }
    1818             : 
    1819             :   // The eleven kinds of non-singleton intersections:
    1820             :   //
    1821             :   //       XXXXXXXX
    1822             :   // YYYY                  - outside before.
    1823             :   //   YYYY                - outside touching start.
    1824             :   //     YYYY              - overlapping start
    1825             :   //       YYYY            - inside touching start
    1826             :   //         YYYY          - entirely inside
    1827             :   //           YYYY        - inside touching end
    1828             :   //             YYYY      - overlapping end
    1829             :   //               YYYY    - outside touching end
    1830             :   //                 YYYY  - outside after
    1831             :   //       YYYYYYYY        - identical
    1832             :   //     YYYYYYYYYYYY      - containing entirely.
    1833             : 
    1834          45 :   for (int i = 0; i < 9; i++) {
    1835          45 :     l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);  // Length 8.
    1836          45 :     l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
    1837          45 :     offset += 22;
    1838             :   }
    1839           5 :   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
    1840           5 :   l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
    1841             :   offset += 22;
    1842           5 :   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
    1843           5 :   l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
    1844           5 :   offset += 22;
    1845             : 
    1846             :   // Different kinds of multi-range overlap:
    1847             :   // XXXXXXXXXXXXXXXXXXXXXX         XXXXXXXXXXXXXXXXXXXXXX
    1848             :   //   YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y
    1849             : 
    1850           5 :   l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
    1851           5 :   l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
    1852          35 :   for (int i = 0; i < 6; i++) {
    1853          30 :     l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
    1854          30 :     l2.Add(CharacterRange::Singleton(offset + 8), &zone);
    1855          30 :     offset += 9;
    1856             :   }
    1857             : 
    1858           5 :   CHECK(CharacterRange::IsCanonical(&l1));
    1859           5 :   CHECK(CharacterRange::IsCanonical(&l2));
    1860             : 
    1861           5 :   ZoneList<CharacterRange> first_only(4, &zone);
    1862           5 :   ZoneList<CharacterRange> second_only(4, &zone);
    1863           5 :   ZoneList<CharacterRange> both(4, &zone);
    1864           5 : }
    1865             : 
    1866             : 
    1867       28342 : TEST(Graph) {
    1868           5 :   Execute("\\b\\w+\\b", false, true, true);
    1869           5 : }
    1870             : 
    1871             : 
    1872             : namespace {
    1873             : 
    1874             : int* global_use_counts = nullptr;
    1875             : 
    1876          46 : void MockUseCounterCallback(v8::Isolate* isolate,
    1877             :                             v8::Isolate::UseCounterFeature feature) {
    1878          46 :   ++global_use_counts[feature];
    1879          46 : }
    1880             : }
    1881             : 
    1882             : 
    1883             : // Test that ES2015 RegExp compatibility fixes are in place, that they
    1884             : // are not overly broad, and the appropriate UseCounters are incremented
    1885       28342 : TEST(UseCountRegExp) {
    1886           5 :   v8::Isolate* isolate = CcTest::isolate();
    1887           5 :   v8::HandleScope scope(isolate);
    1888          10 :   LocalContext env;
    1889           5 :   int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
    1890           5 :   global_use_counts = use_counts;
    1891           5 :   CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
    1892             : 
    1893             :   // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
    1894             :   v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
    1895           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1896           5 :   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1897           5 :   CHECK(resultSticky->IsUndefined());
    1898             : 
    1899             :   // re.sticky has approriate value and doesn't touch UseCounter
    1900             :   v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
    1901           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1902           5 :   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1903           5 :   CHECK(resultReSticky->IsFalse());
    1904             : 
    1905             :   // When the getter is caleld on another object, throw an exception
    1906             :   // and don't increment the UseCounter
    1907             :   v8::Local<v8::Value> resultStickyError = CompileRun(
    1908             :       "var exception;"
    1909             :       "try { "
    1910             :       "  Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
    1911             :       "      .get.call(null);"
    1912             :       "} catch (e) {"
    1913             :       "  exception = e;"
    1914             :       "}"
    1915             :       "exception");
    1916           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1917           5 :   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1918           5 :   CHECK(resultStickyError->IsObject());
    1919             : 
    1920             :   // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
    1921             :   // a UseCounter is incremented to track it.
    1922             :   v8::Local<v8::Value> resultToString =
    1923             :       CompileRun("RegExp.prototype.toString().length");
    1924           5 :   CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1925           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1926           5 :   CHECK(resultToString->IsInt32());
    1927          10 :   CHECK_EQ(6,
    1928             :            resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
    1929             : 
    1930             :   // .toString() works on normal RegExps
    1931             :   v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
    1932           5 :   CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1933           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1934           5 :   CHECK(resultReToString->IsInt32());
    1935          10 :   CHECK_EQ(
    1936             :       3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
    1937             : 
    1938             :   // .toString() throws on non-RegExps that aren't RegExp.prototype
    1939             :   v8::Local<v8::Value> resultToStringError = CompileRun(
    1940             :       "var exception;"
    1941             :       "try { RegExp.prototype.toString.call(null) }"
    1942             :       "catch (e) { exception = e; }"
    1943             :       "exception");
    1944           5 :   CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1945           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1946          10 :   CHECK(resultToStringError->IsObject());
    1947           5 : }
    1948             : 
    1949          15 : class UncachedExternalString
    1950             :     : public v8::String::ExternalOneByteStringResource {
    1951             :  public:
    1952          26 :   const char* data() const override { return "abcdefghijklmnopqrstuvwxyz"; }
    1953          20 :   size_t length() const override { return 26; }
    1954           5 :   bool IsCacheable() const override { return false; }
    1955             : };
    1956             : 
    1957       28342 : TEST(UncachedExternalString) {
    1958           5 :   v8::Isolate* isolate = CcTest::isolate();
    1959           5 :   v8::HandleScope scope(isolate);
    1960          10 :   LocalContext env;
    1961             :   v8::Local<v8::String> external =
    1962           5 :       v8::String::NewExternalOneByte(isolate, new UncachedExternalString())
    1963           5 :           .ToLocalChecked();
    1964           5 :   CHECK(v8::Utils::OpenHandle(*external)->map() ==
    1965             :         ReadOnlyRoots(CcTest::i_isolate())
    1966             :             .uncached_external_one_byte_string_map());
    1967           5 :   v8::Local<v8::Object> global = env->Global();
    1968          20 :   global->Set(env.local(), v8_str("external"), external).FromJust();
    1969             :   CompileRun("var re = /y(.)/; re.test('ab');");
    1970          10 :   ExpectString("external.substring(1).match(re)[1]", "z");
    1971           5 : }
    1972             : 
    1973             : }  // namespace test_regexp
    1974             : }  // namespace internal
    1975       85011 : }  // namespace v8

Generated by: LCOV version 1.10