LCOV - code coverage report
Current view: top level - test/cctest - test-regexp.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 1021 1022 99.9 %
Date: 2019-04-17 Functions: 49 50 98.0 %

          Line data    Source code
       1             : // Copyright 2012 the V8 project authors. All rights reserved.
       2             : // Redistribution and use in source and binary forms, with or without
       3             : // modification, are permitted provided that the following conditions are
       4             : // met:
       5             : //
       6             : //     * Redistributions of source code must retain the above copyright
       7             : //       notice, this list of conditions and the following disclaimer.
       8             : //     * Redistributions in binary form must reproduce the above
       9             : //       copyright notice, this list of conditions and the following
      10             : //       disclaimer in the documentation and/or other materials provided
      11             : //       with the distribution.
      12             : //     * Neither the name of Google Inc. nor the names of its
      13             : //       contributors may be used to endorse or promote products derived
      14             : //       from this software without specific prior written permission.
      15             : //
      16             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      17             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      18             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      19             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      20             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      21             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      22             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      23             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      24             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      25             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      26             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      27             : 
      28             : #include <cstdlib>
      29             : #include <memory>
      30             : #include <sstream>
      31             : 
      32             : #include "include/v8.h"
      33             : #include "src/api-inl.h"
      34             : #include "src/assembler-arch.h"
      35             : #include "src/ast/ast.h"
      36             : #include "src/char-predicates-inl.h"
      37             : #include "src/macro-assembler.h"
      38             : #include "src/objects-inl.h"
      39             : #include "src/ostreams.h"
      40             : #include "src/regexp/interpreter-irregexp.h"
      41             : #include "src/regexp/jsregexp.h"
      42             : #include "src/regexp/regexp-macro-assembler-irregexp.h"
      43             : #include "src/regexp/regexp-macro-assembler.h"
      44             : #include "src/regexp/regexp-parser.h"
      45             : #include "src/splay-tree-inl.h"
      46             : #include "src/string-stream.h"
      47             : #include "src/unicode-inl.h"
      48             : #include "src/v8.h"
      49             : #include "src/zone/zone-list-inl.h"
      50             : 
      51             : #if V8_TARGET_ARCH_ARM
      52             : #include "src/regexp/arm/regexp-macro-assembler-arm.h"
      53             : #elif V8_TARGET_ARCH_ARM64
      54             : #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
      55             : #elif V8_TARGET_ARCH_S390
      56             : #include "src/regexp/s390/regexp-macro-assembler-s390.h"
      57             : #elif V8_TARGET_ARCH_PPC
      58             : #include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
      59             : #elif V8_TARGET_ARCH_MIPS
      60             : #include "src/regexp/mips/regexp-macro-assembler-mips.h"
      61             : #elif V8_TARGET_ARCH_MIPS64
      62             : #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
      63             : #elif V8_TARGET_ARCH_X64
      64             : #include "src/regexp/x64/regexp-macro-assembler-x64.h"
      65             : #elif V8_TARGET_ARCH_IA32
      66             : #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
      67             : #else
      68             : #error Unknown architecture.
      69             : #endif
      70             : 
      71             : #include "test/cctest/cctest.h"
      72             : 
      73             : namespace v8 {
      74             : namespace internal {
      75             : namespace test_regexp {
      76             : 
      77           5 : static bool CheckParse(const char* input) {
      78          10 :   v8::HandleScope scope(CcTest::isolate());
      79          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
      80           5 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
      81             :   RegExpCompileData result;
      82           5 :   return v8::internal::RegExpParser::ParseRegExp(
      83          10 :       CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result);
      84             : }
      85             : 
      86             : 
      87         765 : static void CheckParseEq(const char* input, const char* expected,
      88             :                          bool unicode = false) {
      89        1530 :   v8::HandleScope scope(CcTest::isolate());
      90        1530 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
      91         765 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
      92             :   RegExpCompileData result;
      93             :   JSRegExp::Flags flags = JSRegExp::kNone;
      94         765 :   if (unicode) flags |= JSRegExp::kUnicode;
      95         765 :   CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
      96             :                                                 &reader, flags, &result));
      97         765 :   CHECK_NOT_NULL(result.tree);
      98         765 :   CHECK(result.error.is_null());
      99        1530 :   std::ostringstream os;
     100         765 :   result.tree->Print(os, &zone);
     101        1530 :   if (strcmp(expected, os.str().c_str()) != 0) {
     102           0 :     printf("%s | %s\n", expected, os.str().c_str());
     103             :   }
     104        1530 :   CHECK_EQ(0, strcmp(expected, os.str().c_str()));
     105         765 : }
     106             : 
     107             : 
     108         225 : static bool CheckSimple(const char* input) {
     109         450 :   v8::HandleScope scope(CcTest::isolate());
     110         450 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     111         225 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     112             :   RegExpCompileData result;
     113         225 :   CHECK(v8::internal::RegExpParser::ParseRegExp(
     114             :       CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
     115         225 :   CHECK_NOT_NULL(result.tree);
     116         225 :   CHECK(result.error.is_null());
     117         450 :   return result.simple;
     118             : }
     119             : 
     120             : struct MinMaxPair {
     121             :   int min_match;
     122             :   int max_match;
     123             : };
     124             : 
     125             : 
     126         240 : static MinMaxPair CheckMinMaxMatch(const char* input) {
     127         480 :   v8::HandleScope scope(CcTest::isolate());
     128         480 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     129         240 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     130             :   RegExpCompileData result;
     131         240 :   CHECK(v8::internal::RegExpParser::ParseRegExp(
     132             :       CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
     133         240 :   CHECK_NOT_NULL(result.tree);
     134         240 :   CHECK(result.error.is_null());
     135         240 :   int min_match = result.tree->min_match();
     136         240 :   int max_match = result.tree->max_match();
     137             :   MinMaxPair pair = { min_match, max_match };
     138         480 :   return pair;
     139             : }
     140             : 
     141             : 
     142             : #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
     143             : #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
     144             : #define CHECK_MIN_MAX(input, min, max)                                         \
     145             :   { MinMaxPair min_max = CheckMinMaxMatch(input);                              \
     146             :     CHECK_EQ(min, min_max.min_match);                                          \
     147             :     CHECK_EQ(max, min_max.max_match);                                          \
     148             :   }
     149             : 
     150       26644 : TEST(RegExpParser) {
     151           5 :   CHECK_PARSE_ERROR("?");
     152             : 
     153           5 :   CheckParseEq("abc", "'abc'");
     154           5 :   CheckParseEq("", "%");
     155           5 :   CheckParseEq("abc|def", "(| 'abc' 'def')");
     156           5 :   CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
     157           5 :   CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
     158           5 :   CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
     159           5 :   CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
     160           5 :   CheckParseEq("a*", "(# 0 - g 'a')");
     161           5 :   CheckParseEq("a*?", "(# 0 - n 'a')");
     162           5 :   CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
     163           5 :   CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
     164           5 :   CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
     165           5 :   CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
     166           5 :   CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
     167           5 :   CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
     168           5 :   CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
     169           5 :   CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
     170           5 :   CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
     171           5 :   CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
     172           5 :   CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
     173           5 :   CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
     174           5 :   CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
     175           5 :   CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
     176           5 :   CheckParseEq("(?:foo)", "(?: 'foo')");
     177           5 :   CheckParseEq("(?: foo )", "(?: ' foo ')");
     178           5 :   CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
     179           5 :   CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
     180           5 :   CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
     181           5 :   CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
     182           5 :   CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
     183           5 :   CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
     184           5 :   CheckParseEq("()", "(^ %)");
     185           5 :   CheckParseEq("(?=)", "(-> + %)");
     186           5 :   CheckParseEq("[]", "^[\\x00-\\u{10ffff}]");  // Doesn't compile on windows
     187           5 :   CheckParseEq("[^]", "[\\x00-\\u{10ffff}]");  // \uffff isn't in codepage 1252
     188           5 :   CheckParseEq("[x]", "[x]");
     189           5 :   CheckParseEq("[xyz]", "[x y z]");
     190           5 :   CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
     191           5 :   CheckParseEq("[-123]", "[- 1 2 3]");
     192           5 :   CheckParseEq("[^123]", "^[1 2 3]");
     193           5 :   CheckParseEq("]", "']'");
     194           5 :   CheckParseEq("}", "'}'");
     195           5 :   CheckParseEq("[a-b-c]", "[a-b - c]");
     196           5 :   CheckParseEq("[\\d]", "[0-9]");
     197           5 :   CheckParseEq("[x\\dz]", "[x 0-9 z]");
     198           5 :   CheckParseEq("[\\d-z]", "[0-9 - z]");
     199           5 :   CheckParseEq("[\\d-\\d]", "[0-9 0-9 -]");
     200           5 :   CheckParseEq("[z-\\d]", "[0-9 z -]");
     201             :   // Control character outside character class.
     202           5 :   CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
     203           5 :   CheckParseEq("\\c!", "'\\c!'");
     204           5 :   CheckParseEq("\\c_", "'\\c_'");
     205           5 :   CheckParseEq("\\c~", "'\\c~'");
     206           5 :   CheckParseEq("\\c1", "'\\c1'");
     207             :   // Control character inside character class.
     208           5 :   CheckParseEq("[\\c!]", "[\\ c !]");
     209           5 :   CheckParseEq("[\\c_]", "[\\x1f]");
     210           5 :   CheckParseEq("[\\c~]", "[\\ c ~]");
     211           5 :   CheckParseEq("[\\ca]", "[\\x01]");
     212           5 :   CheckParseEq("[\\cz]", "[\\x1a]");
     213           5 :   CheckParseEq("[\\cA]", "[\\x01]");
     214           5 :   CheckParseEq("[\\cZ]", "[\\x1a]");
     215           5 :   CheckParseEq("[\\c1]", "[\\x11]");
     216             : 
     217           5 :   CheckParseEq("[a\\]c]", "[a ] c]");
     218           5 :   CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
     219           5 :   CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #  ]");
     220           5 :   CheckParseEq("\\0", "'\\x00'");
     221           5 :   CheckParseEq("\\8", "'8'");
     222           5 :   CheckParseEq("\\9", "'9'");
     223           5 :   CheckParseEq("\\11", "'\\x09'");
     224           5 :   CheckParseEq("\\11a", "'\\x09a'");
     225           5 :   CheckParseEq("\\011", "'\\x09'");
     226           5 :   CheckParseEq("\\00011", "'\\x0011'");
     227           5 :   CheckParseEq("\\118", "'\\x098'");
     228           5 :   CheckParseEq("\\111", "'I'");
     229           5 :   CheckParseEq("\\1111", "'I1'");
     230           5 :   CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
     231           5 :   CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
     232           5 :   CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
     233           5 :   CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
     234             :   CheckParseEq("(x)(x)(x)\\1*",
     235             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     236           5 :                " (# 0 - g (<- 1)))");
     237             :   CheckParseEq("(x)(x)(x)\\2*",
     238             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     239           5 :                " (# 0 - g (<- 2)))");
     240             :   CheckParseEq("(x)(x)(x)\\3*",
     241             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     242           5 :                " (# 0 - g (<- 3)))");
     243             :   CheckParseEq("(x)(x)(x)\\4*",
     244             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     245           5 :                " (# 0 - g '\\x04'))");
     246             :   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
     247             :                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
     248           5 :                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
     249             :   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
     250             :                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
     251           5 :                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
     252           5 :   CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
     253           5 :   CheckParseEq("(a\\1)", "(^ 'a')");
     254           5 :   CheckParseEq("(\\1a)", "(^ 'a')");
     255           5 :   CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
     256           5 :   CheckParseEq("(?=a)?a", "'a'");
     257           5 :   CheckParseEq("(?=a){0,10}a", "'a'");
     258           5 :   CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
     259           5 :   CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
     260           5 :   CheckParseEq("(?!a)?a", "'a'");
     261           5 :   CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
     262           5 :   CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
     263             :   CheckParseEq("(?!\\1(a\\1)\\1)\\1",
     264           5 :                "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
     265             :   CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
     266           5 :                "(: (<- 1) (<- 2) (^ (: 'a' (?: (^ 'b')) (<- 2))) (<- 1))");
     267             :   CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
     268           5 :                "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
     269           5 :   CheckParseEq("[\\0]", "[\\x00]");
     270           5 :   CheckParseEq("[\\11]", "[\\x09]");
     271           5 :   CheckParseEq("[\\11a]", "[\\x09 a]");
     272           5 :   CheckParseEq("[\\011]", "[\\x09]");
     273           5 :   CheckParseEq("[\\00011]", "[\\x00 1 1]");
     274           5 :   CheckParseEq("[\\118]", "[\\x09 8]");
     275           5 :   CheckParseEq("[\\111]", "[I]");
     276           5 :   CheckParseEq("[\\1111]", "[I 1]");
     277           5 :   CheckParseEq("\\x34", "'\x34'");
     278           5 :   CheckParseEq("\\x60", "'\x60'");
     279           5 :   CheckParseEq("\\x3z", "'x3z'");
     280           5 :   CheckParseEq("\\c", "'\\c'");
     281           5 :   CheckParseEq("\\u0034", "'\x34'");
     282           5 :   CheckParseEq("\\u003z", "'u003z'");
     283           5 :   CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
     284           5 :   CheckParseEq("^^^$$$\\b\\b\\b\\b", "(: @^i @$i @b)");
     285           5 :   CheckParseEq("\\b\\b\\b\\b\\B\\B\\B\\B\\b\\b\\b\\b", "(: @b @B @b)");
     286           5 :   CheckParseEq("\\b\\B\\b", "(: @b @B @b)");
     287             : 
     288             :   // Unicode regexps
     289           5 :   CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
     290             :   CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
     291           5 :                true);
     292             :   CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
     293           5 :                true);
     294           5 :   CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
     295           5 :   CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
     296             : 
     297           5 :   CheckParseEq("\\ud808\\udf45*", "(# 0 - g '\\ud808\\udf45')", true);
     298             :   CheckParseEq("[\\ud808\\udf45-\\ud809\\udccc]", "[\\u{012345}-\\u{0124cc}]",
     299           5 :                true);
     300             : 
     301           5 :   CHECK_SIMPLE("", false);
     302           5 :   CHECK_SIMPLE("a", true);
     303           5 :   CHECK_SIMPLE("a|b", false);
     304           5 :   CHECK_SIMPLE("a\\n", false);
     305           5 :   CHECK_SIMPLE("^a", false);
     306           5 :   CHECK_SIMPLE("a$", false);
     307           5 :   CHECK_SIMPLE("a\\b!", false);
     308           5 :   CHECK_SIMPLE("a\\Bb", false);
     309           5 :   CHECK_SIMPLE("a*", false);
     310           5 :   CHECK_SIMPLE("a*?", false);
     311           5 :   CHECK_SIMPLE("a?", false);
     312           5 :   CHECK_SIMPLE("a??", false);
     313           5 :   CHECK_SIMPLE("a{0,1}?", false);
     314           5 :   CHECK_SIMPLE("a{1,1}?", false);
     315           5 :   CHECK_SIMPLE("a{1,2}?", false);
     316           5 :   CHECK_SIMPLE("a+?", false);
     317           5 :   CHECK_SIMPLE("(a)", false);
     318           5 :   CHECK_SIMPLE("(a)\\1", false);
     319           5 :   CHECK_SIMPLE("(\\1a)", false);
     320           5 :   CHECK_SIMPLE("\\1(a)", false);
     321           5 :   CHECK_SIMPLE("a\\s", false);
     322           5 :   CHECK_SIMPLE("a\\S", false);
     323           5 :   CHECK_SIMPLE("a\\d", false);
     324           5 :   CHECK_SIMPLE("a\\D", false);
     325           5 :   CHECK_SIMPLE("a\\w", false);
     326           5 :   CHECK_SIMPLE("a\\W", false);
     327           5 :   CHECK_SIMPLE("a.", false);
     328           5 :   CHECK_SIMPLE("a\\q", false);
     329           5 :   CHECK_SIMPLE("a[a]", false);
     330           5 :   CHECK_SIMPLE("a[^a]", false);
     331           5 :   CHECK_SIMPLE("a[a-z]", false);
     332           5 :   CHECK_SIMPLE("a[\\q]", false);
     333           5 :   CHECK_SIMPLE("a(?:b)", false);
     334           5 :   CHECK_SIMPLE("a(?=b)", false);
     335           5 :   CHECK_SIMPLE("a(?!b)", false);
     336           5 :   CHECK_SIMPLE("\\x60", false);
     337           5 :   CHECK_SIMPLE("\\u0060", false);
     338           5 :   CHECK_SIMPLE("\\cA", false);
     339           5 :   CHECK_SIMPLE("\\q", false);
     340           5 :   CHECK_SIMPLE("\\1112", false);
     341           5 :   CHECK_SIMPLE("\\0", false);
     342           5 :   CHECK_SIMPLE("(a)\\1", false);
     343           5 :   CHECK_SIMPLE("(?=a)?a", false);
     344           5 :   CHECK_SIMPLE("(?!a)?a\\1", false);
     345           5 :   CHECK_SIMPLE("(?:(?=a))a\\1", false);
     346             : 
     347           5 :   CheckParseEq("a{}", "'a{}'");
     348           5 :   CheckParseEq("a{,}", "'a{,}'");
     349           5 :   CheckParseEq("a{", "'a{'");
     350           5 :   CheckParseEq("a{z}", "'a{z}'");
     351           5 :   CheckParseEq("a{1z}", "'a{1z}'");
     352           5 :   CheckParseEq("a{12z}", "'a{12z}'");
     353           5 :   CheckParseEq("a{12,", "'a{12,'");
     354           5 :   CheckParseEq("a{12,3b", "'a{12,3b'");
     355           5 :   CheckParseEq("{}", "'{}'");
     356           5 :   CheckParseEq("{,}", "'{,}'");
     357           5 :   CheckParseEq("{", "'{'");
     358           5 :   CheckParseEq("{z}", "'{z}'");
     359           5 :   CheckParseEq("{1z}", "'{1z}'");
     360           5 :   CheckParseEq("{12z}", "'{12z}'");
     361           5 :   CheckParseEq("{12,", "'{12,'");
     362           5 :   CheckParseEq("{12,3b", "'{12,3b'");
     363             : 
     364           5 :   CHECK_MIN_MAX("a", 1, 1);
     365           5 :   CHECK_MIN_MAX("abc", 3, 3);
     366           5 :   CHECK_MIN_MAX("a[bc]d", 3, 3);
     367           5 :   CHECK_MIN_MAX("a|bc", 1, 2);
     368           5 :   CHECK_MIN_MAX("ab|c", 1, 2);
     369           5 :   CHECK_MIN_MAX("a||bc", 0, 2);
     370           5 :   CHECK_MIN_MAX("|", 0, 0);
     371           5 :   CHECK_MIN_MAX("(?:ab)", 2, 2);
     372           5 :   CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
     373           5 :   CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
     374           5 :   CHECK_MIN_MAX("(ab)", 2, 2);
     375           5 :   CHECK_MIN_MAX("(ab|cde)", 2, 3);
     376           5 :   CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
     377           5 :   CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
     378           5 :   CHECK_MIN_MAX("(?:ab)?", 0, 2);
     379           5 :   CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
     380           5 :   CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
     381           5 :   CHECK_MIN_MAX("a?", 0, 1);
     382           5 :   CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
     383           5 :   CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
     384           5 :   CHECK_MIN_MAX("a??", 0, 1);
     385           5 :   CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
     386           5 :   CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
     387           5 :   CHECK_MIN_MAX("(?:a?)?", 0, 1);
     388           5 :   CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
     389           5 :   CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
     390           5 :   CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
     391           5 :   CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
     392           5 :   CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
     393           5 :   CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
     394           5 :   CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
     395           5 :   CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
     396           5 :   CHECK_MIN_MAX("a{0}", 0, 0);
     397           5 :   CHECK_MIN_MAX("(?:a+){0}", 0, 0);
     398           5 :   CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
     399           5 :   CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
     400           5 :   CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
     401           5 :   CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
     402           5 :   CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
     403           5 :   CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
     404           5 :   CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
     405           5 :   CHECK_MIN_MAX("a\\bc", 2, 2);
     406           5 :   CHECK_MIN_MAX("a\\Bc", 2, 2);
     407           5 :   CHECK_MIN_MAX("a\\sc", 3, 3);
     408           5 :   CHECK_MIN_MAX("a\\Sc", 3, 3);
     409           5 :   CHECK_MIN_MAX("a(?=b)c", 2, 2);
     410           5 :   CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
     411           5 :   CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
     412             : 
     413             :   CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<a>",
     414           5 :                "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))", true);
     415             :   CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<b>",
     416           5 :                "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))", true);
     417             :   CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<c>",
     418           5 :                "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))", true);
     419           5 :   CheckParseEq("(?<a>a)\\k<a>", "(: (^ 'a') (<- 1))", true);
     420           5 :   CheckParseEq("(?<a>a\\k<a>)", "(^ 'a')", true);
     421           5 :   CheckParseEq("(?<a>\\k<a>a)", "(^ 'a')", true);
     422           5 :   CheckParseEq("(?<a>\\k<b>)(?<b>\\k<a>)", "(: (^ (<- 2)) (^ (<- 1)))", true);
     423           5 :   CheckParseEq("\\k<a>(?<a>a)", "(: (<- 1) (^ 'a'))", true);
     424             : 
     425           5 :   CheckParseEq("(?<\\u{03C0}>a)", "(^ 'a')", true);
     426           5 :   CheckParseEq("(?<\\u03C0>a)", "(^ 'a')", true);
     427           5 : }
     428             : 
     429       26644 : TEST(ParserRegression) {
     430           5 :   CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
     431           5 :   CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
     432           5 :   CheckParseEq("{", "'{'");
     433           5 :   CheckParseEq("a|", "(| 'a' %)");
     434           5 : }
     435             : 
     436         105 : static void ExpectError(const char* input, const char* expected,
     437             :                         bool unicode = false) {
     438             :   Isolate* isolate = CcTest::i_isolate();
     439             : 
     440         210 :   v8::HandleScope scope(CcTest::isolate());
     441         210 :   Zone zone(isolate->allocator(), ZONE_NAME);
     442         105 :   FlatStringReader reader(isolate, CStrVector(input));
     443             :   RegExpCompileData result;
     444             :   JSRegExp::Flags flags = JSRegExp::kNone;
     445         105 :   if (unicode) flags |= JSRegExp::kUnicode;
     446         105 :   CHECK(!v8::internal::RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
     447             :                                                  &result));
     448         105 :   CHECK_NULL(result.tree);
     449         105 :   CHECK(!result.error.is_null());
     450         105 :   std::unique_ptr<char[]> str = result.error->ToCString(ALLOW_NULLS);
     451         105 :   CHECK_EQ(0, strcmp(expected, str.get()));
     452         105 : }
     453             : 
     454             : 
     455       26644 : TEST(Errors) {
     456             :   const char* kEndBackslash = "\\ at end of pattern";
     457           5 :   ExpectError("\\", kEndBackslash);
     458             :   const char* kUnterminatedGroup = "Unterminated group";
     459           5 :   ExpectError("(foo", kUnterminatedGroup);
     460             :   const char* kInvalidGroup = "Invalid group";
     461           5 :   ExpectError("(?", kInvalidGroup);
     462             :   const char* kUnterminatedCharacterClass = "Unterminated character class";
     463           5 :   ExpectError("[", kUnterminatedCharacterClass);
     464           5 :   ExpectError("[a-", kUnterminatedCharacterClass);
     465             :   const char* kNothingToRepeat = "Nothing to repeat";
     466           5 :   ExpectError("*", kNothingToRepeat);
     467           5 :   ExpectError("?", kNothingToRepeat);
     468           5 :   ExpectError("+", kNothingToRepeat);
     469           5 :   ExpectError("{1}", kNothingToRepeat);
     470           5 :   ExpectError("{1,2}", kNothingToRepeat);
     471           5 :   ExpectError("{1,}", kNothingToRepeat);
     472             : 
     473             :   // Check that we don't allow more than kMaxCapture captures
     474             :   const int kMaxCaptures = 1 << 16;  // Must match RegExpParser::kMaxCaptures.
     475             :   const char* kTooManyCaptures = "Too many captures";
     476          10 :   std::ostringstream os;
     477      655375 :   for (int i = 0; i <= kMaxCaptures; i++) {
     478      327685 :     os << "()";
     479             :   }
     480          10 :   ExpectError(os.str().c_str(), kTooManyCaptures);
     481             : 
     482             :   const char* kInvalidCaptureName = "Invalid capture group name";
     483           5 :   ExpectError("(?<>.)", kInvalidCaptureName, true);
     484           5 :   ExpectError("(?<1>.)", kInvalidCaptureName, true);
     485           5 :   ExpectError("(?<_%>.)", kInvalidCaptureName, true);
     486           5 :   ExpectError("\\k<a", kInvalidCaptureName, true);
     487             :   const char* kDuplicateCaptureName = "Duplicate capture group name";
     488           5 :   ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
     489             :   const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
     490           5 :   ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
     491             :   const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
     492           5 :   ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
     493           5 :   ExpectError("(?<b>)\\k<a>", kInvalidCaptureReferenced, true);
     494             :   const char* kInvalidNamedReference = "Invalid named reference";
     495           5 :   ExpectError("\\ka", kInvalidNamedReference, true);
     496           5 : }
     497             : 
     498             : 
     499      327680 : static bool IsDigit(uc16 c) {
     500      655360 :   return ('0' <= c && c <= '9');
     501             : }
     502             : 
     503             : 
     504      327680 : static bool NotDigit(uc16 c) {
     505      327680 :   return !IsDigit(c);
     506             : }
     507             : 
     508             : 
     509      327680 : static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
     510             :   // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
     511             :   // WhiteSpace (7.2) and LineTerminator (7.3) values.
     512      983040 :   return v8::internal::IsWhiteSpaceOrLineTerminator(c);
     513             : }
     514             : 
     515             : 
     516      327680 : static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
     517      327680 :   return !IsWhiteSpaceOrLineTerminator(c);
     518             : }
     519             : 
     520             : 
     521      327680 : static bool NotWord(uc16 c) {
     522      327680 :   return !IsRegExpWord(c);
     523             : }
     524             : 
     525             : 
     526          35 : static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
     527          70 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     528             :   ZoneList<CharacterRange>* ranges =
     529             :       new(&zone) ZoneList<CharacterRange>(2, &zone);
     530          35 :   CharacterRange::AddClassEscape(c, ranges, &zone);
     531     4587555 :   for (uc32 i = 0; i < (1 << 16); i++) {
     532             :     bool in_class = false;
     533    37773055 :     for (int j = 0; !in_class && j < ranges->length(); j++) {
     534             :       CharacterRange& range = ranges->at(j);
     535    11498745 :       in_class = (range.from() <= i && i <= range.to());
     536             :     }
     537     2293760 :     CHECK_EQ(pred(i), in_class);
     538             :   }
     539          35 : }
     540             : 
     541             : 
     542       26644 : TEST(CharacterClassEscapes) {
     543           5 :   TestCharacterClassEscapes('.', IsRegExpNewline);
     544           5 :   TestCharacterClassEscapes('d', IsDigit);
     545           5 :   TestCharacterClassEscapes('D', NotDigit);
     546           5 :   TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
     547           5 :   TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
     548           5 :   TestCharacterClassEscapes('w', IsRegExpWord);
     549           5 :   TestCharacterClassEscapes('W', NotWord);
     550           5 : }
     551             : 
     552             : 
     553           5 : static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
     554             :                            bool is_one_byte, Zone* zone) {
     555             :   Isolate* isolate = CcTest::i_isolate();
     556           5 :   FlatStringReader reader(isolate, CStrVector(input));
     557             :   RegExpCompileData compile_data;
     558             :   JSRegExp::Flags flags = JSRegExp::kNone;
     559           5 :   if (multiline) flags = JSRegExp::kMultiline;
     560           5 :   if (unicode) flags = JSRegExp::kUnicode;
     561           5 :   if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
     562             :                                                &reader, flags, &compile_data))
     563             :     return nullptr;
     564             :   Handle<String> pattern = isolate->factory()
     565          10 :                                ->NewStringFromUtf8(CStrVector(input))
     566           5 :                                .ToHandleChecked();
     567             :   Handle<String> sample_subject =
     568          10 :       isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
     569             :   RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern,
     570           5 :                         sample_subject, is_one_byte);
     571           5 :   return compile_data.node;
     572             : }
     573             : 
     574             : 
     575           5 : static void Execute(const char* input, bool multiline, bool unicode,
     576             :                     bool is_one_byte, bool dot_output = false) {
     577          10 :   v8::HandleScope scope(CcTest::isolate());
     578          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     579           5 :   RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
     580             :   USE(node);
     581             : #ifdef DEBUG
     582             :   if (dot_output) {
     583             :     RegExpEngine::DotPrint(input, node, false);
     584             :   }
     585             : #endif  // DEBUG
     586           5 : }
     587             : 
     588             : 
     589             : class TestConfig {
     590             :  public:
     591             :   typedef int Key;
     592             :   typedef int Value;
     593             :   static const int kNoKey;
     594             :   static int NoValue() { return 0; }
     595             :   static inline int Compare(int a, int b) {
     596     4448805 :     if (a < b)
     597             :       return -1;
     598     2208405 :     else if (a > b)
     599             :       return 1;
     600             :     else
     601             :       return 0;
     602             :   }
     603             : };
     604             : 
     605             : 
     606             : const int TestConfig::kNoKey = 0;
     607             : 
     608             : 
     609             : static unsigned PseudoRandom(int i, int j) {
     610        5940 :   return ~(~((i * 781) ^ (j * 329)));
     611             : }
     612             : 
     613             : 
     614       26644 : TEST(SplayTreeSimple) {
     615             :   static const unsigned kLimit = 1000;
     616          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     617             :   ZoneSplayTree<TestConfig> tree(&zone);
     618             :   bool seen[kLimit];
     619        5005 :   for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
     620             : #define CHECK_MAPS_EQUAL() do {                                      \
     621             :     for (unsigned k = 0; k < kLimit; k++)                            \
     622             :       CHECK_EQ(seen[k], tree.Find(k, &loc));                         \
     623             :   } while (false)
     624         505 :   for (int i = 0; i < 50; i++) {
     625        1880 :     for (int j = 0; j < 50; j++) {
     626        1065 :       int next = PseudoRandom(i, j) % kLimit;
     627        1065 :       if (seen[next]) {
     628             :         // We've already seen this one.  Check the value and remove
     629             :         // it.
     630             :         ZoneSplayTree<TestConfig>::Locator loc;
     631          85 :         CHECK(tree.Find(next, &loc));
     632          85 :         CHECK_EQ(next, loc.key());
     633          85 :         CHECK_EQ(3 * next, loc.value());
     634          85 :         tree.Remove(next);
     635          85 :         seen[next] = false;
     636      170085 :         CHECK_MAPS_EQUAL();
     637             :       } else {
     638             :         // Check that it wasn't there already and then add it.
     639             :         ZoneSplayTree<TestConfig>::Locator loc;
     640         980 :         CHECK(!tree.Find(next, &loc));
     641         980 :         CHECK(tree.Insert(next, &loc));
     642         980 :         CHECK_EQ(next, loc.key());
     643         980 :         loc.set_value(3 * next);
     644         980 :         seen[next] = true;
     645     1960980 :         CHECK_MAPS_EQUAL();
     646             :       }
     647        1065 :       int val = PseudoRandom(j, i) % kLimit;
     648        1065 :       if (seen[val]) {
     649             :         ZoneSplayTree<TestConfig>::Locator loc;
     650         135 :         CHECK(tree.FindGreatestLessThan(val, &loc));
     651         270 :         CHECK_EQ(loc.key(), val);
     652             :         break;
     653             :       }
     654        1860 :       val = PseudoRandom(i + j, i - j) % kLimit;
     655         930 :       if (seen[val]) {
     656             :         ZoneSplayTree<TestConfig>::Locator loc;
     657         115 :         CHECK(tree.FindLeastGreaterThan(val, &loc));
     658         230 :         CHECK_EQ(loc.key(), val);
     659             :         break;
     660             :       }
     661             :     }
     662             :   }
     663           5 : }
     664             : 
     665             : 
     666       26644 : TEST(DispatchTableConstruction) {
     667             :   // Initialize test data.
     668             :   static const int kLimit = 1000;
     669             :   static const int kRangeCount = 8;
     670             :   static const int kRangeSize = 16;
     671             :   uc16 ranges[kRangeCount][2 * kRangeSize];
     672          85 :   for (int i = 0; i < kRangeCount; i++) {
     673          40 :     Vector<uc16> range(ranges[i], 2 * kRangeSize);
     674        2600 :     for (int j = 0; j < 2 * kRangeSize; j++) {
     675        3840 :       range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
     676             :     }
     677             :     range.Sort();
     678        2520 :     for (int j = 1; j < 2 * kRangeSize; j++) {
     679        3720 :       CHECK(range[j-1] <= range[j]);
     680             :     }
     681             :   }
     682             :   // Enter test data into dispatch table.
     683          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     684             :   DispatchTable table(&zone);
     685          85 :   for (int i = 0; i < kRangeCount; i++) {
     686          40 :     uc16* range = ranges[i];
     687        1320 :     for (int j = 0; j < 2 * kRangeSize; j += 2)
     688         640 :       table.AddRange(CharacterRange::Range(range[j], range[j + 1]), i, &zone);
     689             :   }
     690             :   // Check that the table looks as we would expect
     691       10005 :   for (int p = 0; p < kLimit; p++) {
     692        5000 :     OutSet* outs = table.Get(p);
     693       85000 :     for (int j = 0; j < kRangeCount; j++) {
     694       40000 :       uc16* range = ranges[j];
     695             :       bool is_on = false;
     696      997200 :       for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
     697      478600 :         is_on = (range[k] <= p && p <= range[k + 1]);
     698       40000 :       CHECK_EQ(is_on, outs->Get(j));
     699             :     }
     700             :   }
     701           5 : }
     702             : 
     703             : 
     704             : // Test of debug-only syntax.
     705             : #ifdef DEBUG
     706             : 
     707             : TEST(ParsePossessiveRepetition) {
     708             :   bool old_flag_value = FLAG_regexp_possessive_quantifier;
     709             : 
     710             :   // Enable possessive quantifier syntax.
     711             :   FLAG_regexp_possessive_quantifier = true;
     712             : 
     713             :   CheckParseEq("a*+", "(# 0 - p 'a')");
     714             :   CheckParseEq("a++", "(# 1 - p 'a')");
     715             :   CheckParseEq("a?+", "(# 0 1 p 'a')");
     716             :   CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
     717             :   CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
     718             : 
     719             :   // Disable possessive quantifier syntax.
     720             :   FLAG_regexp_possessive_quantifier = false;
     721             : 
     722             :   CHECK_PARSE_ERROR("a*+");
     723             :   CHECK_PARSE_ERROR("a++");
     724             :   CHECK_PARSE_ERROR("a?+");
     725             :   CHECK_PARSE_ERROR("a{10,20}+");
     726             :   CHECK_PARSE_ERROR("a{10,20}+b");
     727             : 
     728             :   FLAG_regexp_possessive_quantifier = old_flag_value;
     729             : }
     730             : 
     731             : #endif
     732             : 
     733             : // Tests of interpreter.
     734             : 
     735             : #if V8_TARGET_ARCH_IA32
     736             : typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
     737             : #elif V8_TARGET_ARCH_X64
     738             : typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
     739             : #elif V8_TARGET_ARCH_ARM
     740             : typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
     741             : #elif V8_TARGET_ARCH_ARM64
     742             : typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
     743             : #elif V8_TARGET_ARCH_S390
     744             : typedef RegExpMacroAssemblerS390 ArchRegExpMacroAssembler;
     745             : #elif V8_TARGET_ARCH_PPC
     746             : typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
     747             : #elif V8_TARGET_ARCH_MIPS
     748             : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
     749             : #elif V8_TARGET_ARCH_MIPS64
     750             : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
     751             : #elif V8_TARGET_ARCH_X87
     752             : typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
     753             : #endif
     754             : 
     755             : class ContextInitializer {
     756             :  public:
     757          44 :   ContextInitializer()
     758             :       : scope_(CcTest::isolate()),
     759          88 :         env_(v8::Context::New(CcTest::isolate())) {
     760          44 :     env_->Enter();
     761          44 :   }
     762          44 :   ~ContextInitializer() {
     763          44 :     env_->Exit();
     764             :   }
     765             :  private:
     766             :   v8::HandleScope scope_;
     767             :   v8::Local<v8::Context> env_;
     768             : };
     769             : 
     770             : static ArchRegExpMacroAssembler::Result Execute(Code code, String input,
     771             :                                                 int start_offset,
     772             :                                                 Address input_start,
     773             :                                                 Address input_end,
     774             :                                                 int* captures) {
     775             :   return static_cast<NativeRegExpMacroAssembler::Result>(
     776          56 :       NativeRegExpMacroAssembler::Execute(code, input, start_offset,
     777             :                                           reinterpret_cast<byte*>(input_start),
     778             :                                           reinterpret_cast<byte*>(input_end),
     779          56 :                                           captures, 0, CcTest::i_isolate()));
     780             : }
     781             : 
     782       26643 : TEST(MacroAssemblerNativeSuccess) {
     783           4 :   v8::V8::Initialize();
     784           4 :   ContextInitializer initializer;
     785             :   Isolate* isolate = CcTest::i_isolate();
     786             :   Factory* factory = isolate->factory();
     787           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     788             : 
     789             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
     790           8 :                              4);
     791             : 
     792           4 :   m.Succeed();
     793             : 
     794           4 :   Handle<String> source = factory->NewStringFromStaticChars("");
     795           4 :   Handle<Object> code_object = m.GetCode(source);
     796             :   Handle<Code> code = Handle<Code>::cast(code_object);
     797             : 
     798           4 :   int captures[4] = {42, 37, 87, 117};
     799           4 :   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
     800             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
     801             :   Address start_adr = seq_input->GetCharsAddress();
     802             : 
     803             :   NativeRegExpMacroAssembler::Result result =
     804           4 :       Execute(*code,
     805             :               *input,
     806             :               0,
     807             :               start_adr,
     808             :               start_adr + seq_input->length(),
     809             :               captures);
     810             : 
     811           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
     812           4 :   CHECK_EQ(-1, captures[0]);
     813           4 :   CHECK_EQ(-1, captures[1]);
     814           4 :   CHECK_EQ(-1, captures[2]);
     815           4 :   CHECK_EQ(-1, captures[3]);
     816           4 : }
     817             : 
     818             : 
     819       26643 : TEST(MacroAssemblerNativeSimple) {
     820           4 :   v8::V8::Initialize();
     821           4 :   ContextInitializer initializer;
     822             :   Isolate* isolate = CcTest::i_isolate();
     823             :   Factory* factory = isolate->factory();
     824           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     825             : 
     826             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
     827           8 :                              4);
     828             : 
     829           4 :   Label fail, backtrack;
     830           4 :   m.PushBacktrack(&fail);
     831           4 :   m.CheckNotAtStart(0, nullptr);
     832           4 :   m.LoadCurrentCharacter(2, nullptr);
     833           4 :   m.CheckNotCharacter('o', nullptr);
     834           4 :   m.LoadCurrentCharacter(1, nullptr, false);
     835           4 :   m.CheckNotCharacter('o', nullptr);
     836           4 :   m.LoadCurrentCharacter(0, nullptr, false);
     837           4 :   m.CheckNotCharacter('f', nullptr);
     838           4 :   m.WriteCurrentPositionToRegister(0, 0);
     839           4 :   m.WriteCurrentPositionToRegister(1, 3);
     840           4 :   m.AdvanceCurrentPosition(3);
     841           4 :   m.PushBacktrack(&backtrack);
     842           4 :   m.Succeed();
     843           4 :   m.Bind(&backtrack);
     844           4 :   m.Backtrack();
     845           4 :   m.Bind(&fail);
     846           4 :   m.Fail();
     847             : 
     848           4 :   Handle<String> source = factory->NewStringFromStaticChars("^foo");
     849           4 :   Handle<Object> code_object = m.GetCode(source);
     850             :   Handle<Code> code = Handle<Code>::cast(code_object);
     851             : 
     852           4 :   int captures[4] = {42, 37, 87, 117};
     853           4 :   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
     854             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
     855             :   Address start_adr = seq_input->GetCharsAddress();
     856             : 
     857             :   NativeRegExpMacroAssembler::Result result =
     858           4 :       Execute(*code,
     859             :               *input,
     860             :               0,
     861             :               start_adr,
     862             :               start_adr + input->length(),
     863             :               captures);
     864             : 
     865           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
     866           4 :   CHECK_EQ(0, captures[0]);
     867           4 :   CHECK_EQ(3, captures[1]);
     868           4 :   CHECK_EQ(-1, captures[2]);
     869           4 :   CHECK_EQ(-1, captures[3]);
     870             : 
     871           4 :   input = factory->NewStringFromStaticChars("barbarbar");
     872             :   seq_input = Handle<SeqOneByteString>::cast(input);
     873             :   start_adr = seq_input->GetCharsAddress();
     874             : 
     875           4 :   result = Execute(*code,
     876             :                    *input,
     877             :                    0,
     878             :                    start_adr,
     879             :                    start_adr + input->length(),
     880             :                    captures);
     881             : 
     882           4 :   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
     883           4 : }
     884             : 
     885             : 
     886       26643 : TEST(MacroAssemblerNativeSimpleUC16) {
     887           4 :   v8::V8::Initialize();
     888           4 :   ContextInitializer initializer;
     889             :   Isolate* isolate = CcTest::i_isolate();
     890             :   Factory* factory = isolate->factory();
     891           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     892             : 
     893             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
     894           8 :                              4);
     895             : 
     896           4 :   Label fail, backtrack;
     897           4 :   m.PushBacktrack(&fail);
     898           4 :   m.CheckNotAtStart(0, nullptr);
     899           4 :   m.LoadCurrentCharacter(2, nullptr);
     900           4 :   m.CheckNotCharacter('o', nullptr);
     901           4 :   m.LoadCurrentCharacter(1, nullptr, false);
     902           4 :   m.CheckNotCharacter('o', nullptr);
     903           4 :   m.LoadCurrentCharacter(0, nullptr, false);
     904           4 :   m.CheckNotCharacter('f', nullptr);
     905           4 :   m.WriteCurrentPositionToRegister(0, 0);
     906           4 :   m.WriteCurrentPositionToRegister(1, 3);
     907           4 :   m.AdvanceCurrentPosition(3);
     908           4 :   m.PushBacktrack(&backtrack);
     909           4 :   m.Succeed();
     910           4 :   m.Bind(&backtrack);
     911           4 :   m.Backtrack();
     912           4 :   m.Bind(&fail);
     913           4 :   m.Fail();
     914             : 
     915           4 :   Handle<String> source = factory->NewStringFromStaticChars("^foo");
     916           4 :   Handle<Object> code_object = m.GetCode(source);
     917             :   Handle<Code> code = Handle<Code>::cast(code_object);
     918             : 
     919           4 :   int captures[4] = {42, 37, 87, 117};
     920             :   const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
     921           4 :                               static_cast<uc16>(0x2603)};
     922           8 :   Handle<String> input = factory->NewStringFromTwoByte(
     923           4 :       Vector<const uc16>(input_data, 6)).ToHandleChecked();
     924             :   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
     925             :   Address start_adr = seq_input->GetCharsAddress();
     926             : 
     927             :   NativeRegExpMacroAssembler::Result result =
     928           4 :       Execute(*code,
     929             :               *input,
     930             :               0,
     931             :               start_adr,
     932             :               start_adr + input->length(),
     933             :               captures);
     934             : 
     935           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
     936           4 :   CHECK_EQ(0, captures[0]);
     937           4 :   CHECK_EQ(3, captures[1]);
     938           4 :   CHECK_EQ(-1, captures[2]);
     939           4 :   CHECK_EQ(-1, captures[3]);
     940             : 
     941             :   const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
     942           4 :                                static_cast<uc16>(0x2603)};
     943           8 :   input = factory->NewStringFromTwoByte(
     944           4 :       Vector<const uc16>(input_data2, 9)).ToHandleChecked();
     945             :   seq_input = Handle<SeqTwoByteString>::cast(input);
     946             :   start_adr = seq_input->GetCharsAddress();
     947             : 
     948           4 :   result = Execute(*code,
     949             :                    *input,
     950             :                    0,
     951             :                    start_adr,
     952           4 :                    start_adr + input->length() * 2,
     953             :                    captures);
     954             : 
     955           4 :   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
     956           4 : }
     957             : 
     958             : 
     959       26643 : TEST(MacroAssemblerNativeBacktrack) {
     960           4 :   v8::V8::Initialize();
     961           4 :   ContextInitializer initializer;
     962             :   Isolate* isolate = CcTest::i_isolate();
     963             :   Factory* factory = isolate->factory();
     964           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     965             : 
     966             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
     967           8 :                              0);
     968             : 
     969           4 :   Label fail;
     970           4 :   Label backtrack;
     971           4 :   m.LoadCurrentCharacter(10, &fail);
     972           4 :   m.Succeed();
     973           4 :   m.Bind(&fail);
     974           4 :   m.PushBacktrack(&backtrack);
     975           4 :   m.LoadCurrentCharacter(10, nullptr);
     976           4 :   m.Succeed();
     977           4 :   m.Bind(&backtrack);
     978           4 :   m.Fail();
     979             : 
     980           4 :   Handle<String> source = factory->NewStringFromStaticChars("..........");
     981           4 :   Handle<Object> code_object = m.GetCode(source);
     982             :   Handle<Code> code = Handle<Code>::cast(code_object);
     983             : 
     984           4 :   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
     985             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
     986             :   Address start_adr = seq_input->GetCharsAddress();
     987             : 
     988           4 :   NativeRegExpMacroAssembler::Result result = Execute(
     989             :       *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
     990             : 
     991           4 :   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
     992           4 : }
     993             : 
     994             : 
     995       26643 : TEST(MacroAssemblerNativeBackReferenceLATIN1) {
     996           4 :   v8::V8::Initialize();
     997           4 :   ContextInitializer initializer;
     998             :   Isolate* isolate = CcTest::i_isolate();
     999             :   Factory* factory = isolate->factory();
    1000           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1001             : 
    1002             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1003           8 :                              4);
    1004             : 
    1005           4 :   m.WriteCurrentPositionToRegister(0, 0);
    1006           4 :   m.AdvanceCurrentPosition(2);
    1007           4 :   m.WriteCurrentPositionToRegister(1, 0);
    1008           4 :   Label nomatch;
    1009           4 :   m.CheckNotBackReference(0, false, &nomatch);
    1010           4 :   m.Fail();
    1011           4 :   m.Bind(&nomatch);
    1012           4 :   m.AdvanceCurrentPosition(2);
    1013           4 :   Label missing_match;
    1014           4 :   m.CheckNotBackReference(0, false, &missing_match);
    1015           4 :   m.WriteCurrentPositionToRegister(2, 0);
    1016           4 :   m.Succeed();
    1017           4 :   m.Bind(&missing_match);
    1018           4 :   m.Fail();
    1019             : 
    1020           4 :   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
    1021           4 :   Handle<Object> code_object = m.GetCode(source);
    1022             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1023             : 
    1024           4 :   Handle<String> input = factory->NewStringFromStaticChars("fooofo");
    1025             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1026             :   Address start_adr = seq_input->GetCharsAddress();
    1027             : 
    1028             :   int output[4];
    1029             :   NativeRegExpMacroAssembler::Result result =
    1030           4 :       Execute(*code,
    1031             :               *input,
    1032             :               0,
    1033             :               start_adr,
    1034             :               start_adr + input->length(),
    1035             :               output);
    1036             : 
    1037           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1038           4 :   CHECK_EQ(0, output[0]);
    1039           4 :   CHECK_EQ(2, output[1]);
    1040           4 :   CHECK_EQ(6, output[2]);
    1041           4 :   CHECK_EQ(-1, output[3]);
    1042           4 : }
    1043             : 
    1044             : 
    1045       26643 : TEST(MacroAssemblerNativeBackReferenceUC16) {
    1046           4 :   v8::V8::Initialize();
    1047           4 :   ContextInitializer initializer;
    1048             :   Isolate* isolate = CcTest::i_isolate();
    1049             :   Factory* factory = isolate->factory();
    1050           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1051             : 
    1052             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
    1053           8 :                              4);
    1054             : 
    1055           4 :   m.WriteCurrentPositionToRegister(0, 0);
    1056           4 :   m.AdvanceCurrentPosition(2);
    1057           4 :   m.WriteCurrentPositionToRegister(1, 0);
    1058           4 :   Label nomatch;
    1059           4 :   m.CheckNotBackReference(0, false, &nomatch);
    1060           4 :   m.Fail();
    1061           4 :   m.Bind(&nomatch);
    1062           4 :   m.AdvanceCurrentPosition(2);
    1063           4 :   Label missing_match;
    1064           4 :   m.CheckNotBackReference(0, false, &missing_match);
    1065           4 :   m.WriteCurrentPositionToRegister(2, 0);
    1066           4 :   m.Succeed();
    1067           4 :   m.Bind(&missing_match);
    1068           4 :   m.Fail();
    1069             : 
    1070           4 :   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
    1071           4 :   Handle<Object> code_object = m.GetCode(source);
    1072             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1073             : 
    1074           4 :   const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
    1075           8 :   Handle<String> input = factory->NewStringFromTwoByte(
    1076           4 :       Vector<const uc16>(input_data, 6)).ToHandleChecked();
    1077             :   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
    1078             :   Address start_adr = seq_input->GetCharsAddress();
    1079             : 
    1080             :   int output[4];
    1081             :   NativeRegExpMacroAssembler::Result result =
    1082           4 :       Execute(*code,
    1083             :               *input,
    1084             :               0,
    1085             :               start_adr,
    1086           4 :               start_adr + input->length() * 2,
    1087             :               output);
    1088             : 
    1089           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1090           4 :   CHECK_EQ(0, output[0]);
    1091           4 :   CHECK_EQ(2, output[1]);
    1092           4 :   CHECK_EQ(6, output[2]);
    1093           4 :   CHECK_EQ(-1, output[3]);
    1094           4 : }
    1095             : 
    1096             : 
    1097             : 
    1098       26643 : TEST(MacroAssemblernativeAtStart) {
    1099           4 :   v8::V8::Initialize();
    1100           4 :   ContextInitializer initializer;
    1101             :   Isolate* isolate = CcTest::i_isolate();
    1102             :   Factory* factory = isolate->factory();
    1103           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1104             : 
    1105             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1106           8 :                              0);
    1107             : 
    1108           4 :   Label not_at_start, newline, fail;
    1109           4 :   m.CheckNotAtStart(0, &not_at_start);
    1110             :   // Check that prevchar = '\n' and current = 'f'.
    1111           4 :   m.CheckCharacter('\n', &newline);
    1112           4 :   m.Bind(&fail);
    1113           4 :   m.Fail();
    1114           4 :   m.Bind(&newline);
    1115           4 :   m.LoadCurrentCharacter(0, &fail);
    1116           4 :   m.CheckNotCharacter('f', &fail);
    1117           4 :   m.Succeed();
    1118             : 
    1119           4 :   m.Bind(&not_at_start);
    1120             :   // Check that prevchar = 'o' and current = 'b'.
    1121           4 :   Label prevo;
    1122           4 :   m.CheckCharacter('o', &prevo);
    1123           4 :   m.Fail();
    1124           4 :   m.Bind(&prevo);
    1125           4 :   m.LoadCurrentCharacter(0, &fail);
    1126           4 :   m.CheckNotCharacter('b', &fail);
    1127           4 :   m.Succeed();
    1128             : 
    1129           4 :   Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
    1130           4 :   Handle<Object> code_object = m.GetCode(source);
    1131             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1132             : 
    1133           4 :   Handle<String> input = factory->NewStringFromStaticChars("foobar");
    1134             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1135             :   Address start_adr = seq_input->GetCharsAddress();
    1136             : 
    1137           4 :   NativeRegExpMacroAssembler::Result result = Execute(
    1138             :       *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
    1139             : 
    1140           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1141             : 
    1142           4 :   result = Execute(*code, *input, 3, start_adr + 3, start_adr + input->length(),
    1143             :                    nullptr);
    1144             : 
    1145           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1146           4 : }
    1147             : 
    1148             : 
    1149       26643 : TEST(MacroAssemblerNativeBackRefNoCase) {
    1150           4 :   v8::V8::Initialize();
    1151           4 :   ContextInitializer initializer;
    1152             :   Isolate* isolate = CcTest::i_isolate();
    1153             :   Factory* factory = isolate->factory();
    1154           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1155             : 
    1156             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1157           8 :                              4);
    1158             : 
    1159           4 :   Label fail, succ;
    1160             : 
    1161           4 :   m.WriteCurrentPositionToRegister(0, 0);
    1162           4 :   m.WriteCurrentPositionToRegister(2, 0);
    1163           4 :   m.AdvanceCurrentPosition(3);
    1164           4 :   m.WriteCurrentPositionToRegister(3, 0);
    1165           4 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail);  // Match "AbC".
    1166           4 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail);  // Match "ABC".
    1167           4 :   Label expected_fail;
    1168           4 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
    1169           4 :   m.Bind(&fail);
    1170           4 :   m.Fail();
    1171             : 
    1172           4 :   m.Bind(&expected_fail);
    1173           4 :   m.AdvanceCurrentPosition(3);  // Skip "xYz"
    1174           4 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
    1175           4 :   m.Fail();
    1176             : 
    1177           4 :   m.Bind(&succ);
    1178           4 :   m.WriteCurrentPositionToRegister(1, 0);
    1179           4 :   m.Succeed();
    1180             : 
    1181             :   Handle<String> source =
    1182           4 :       factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
    1183           4 :   Handle<Object> code_object = m.GetCode(source);
    1184             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1185             : 
    1186           4 :   Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
    1187             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1188             :   Address start_adr = seq_input->GetCharsAddress();
    1189             : 
    1190             :   int output[4];
    1191             :   NativeRegExpMacroAssembler::Result result =
    1192           4 :       Execute(*code,
    1193             :               *input,
    1194             :               0,
    1195             :               start_adr,
    1196             :               start_adr + input->length(),
    1197             :               output);
    1198             : 
    1199           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1200           4 :   CHECK_EQ(0, output[0]);
    1201           4 :   CHECK_EQ(12, output[1]);
    1202           4 :   CHECK_EQ(0, output[2]);
    1203           4 :   CHECK_EQ(3, output[3]);
    1204           4 : }
    1205             : 
    1206             : 
    1207             : 
    1208       26643 : TEST(MacroAssemblerNativeRegisters) {
    1209           4 :   v8::V8::Initialize();
    1210           4 :   ContextInitializer initializer;
    1211             :   Isolate* isolate = CcTest::i_isolate();
    1212             :   Factory* factory = isolate->factory();
    1213           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1214             : 
    1215             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1216           8 :                              6);
    1217             : 
    1218           4 :   uc16 foo_chars[3] = {'f', 'o', 'o'};
    1219             :   Vector<const uc16> foo(foo_chars, 3);
    1220             : 
    1221             :   enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
    1222           4 :   Label fail;
    1223           4 :   Label backtrack;
    1224           4 :   m.WriteCurrentPositionToRegister(out1, 0);  // Output: [0]
    1225           4 :   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
    1226           4 :   m.PushBacktrack(&backtrack);
    1227           4 :   m.WriteStackPointerToRegister(sp);
    1228             :   // Fill stack and registers
    1229           4 :   m.AdvanceCurrentPosition(2);
    1230           4 :   m.WriteCurrentPositionToRegister(out1, 0);
    1231           4 :   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
    1232           4 :   m.PushBacktrack(&fail);
    1233             :   // Drop backtrack stack frames.
    1234           4 :   m.ReadStackPointerFromRegister(sp);
    1235             :   // And take the first backtrack (to &backtrack)
    1236           4 :   m.Backtrack();
    1237             : 
    1238           4 :   m.PushCurrentPosition();
    1239           4 :   m.AdvanceCurrentPosition(2);
    1240           4 :   m.PopCurrentPosition();
    1241             : 
    1242           4 :   m.Bind(&backtrack);
    1243           4 :   m.PopRegister(out1);
    1244           4 :   m.ReadCurrentPositionFromRegister(out1);
    1245           4 :   m.AdvanceCurrentPosition(3);
    1246           4 :   m.WriteCurrentPositionToRegister(out2, 0);  // [0,3]
    1247             : 
    1248           4 :   Label loop;
    1249           4 :   m.SetRegister(loop_cnt, 0);  // loop counter
    1250           4 :   m.Bind(&loop);
    1251           4 :   m.AdvanceRegister(loop_cnt, 1);
    1252           4 :   m.AdvanceCurrentPosition(1);
    1253           4 :   m.IfRegisterLT(loop_cnt, 3, &loop);
    1254           4 :   m.WriteCurrentPositionToRegister(out3, 0);  // [0,3,6]
    1255             : 
    1256           4 :   Label loop2;
    1257           4 :   m.SetRegister(loop_cnt, 2);  // loop counter
    1258           4 :   m.Bind(&loop2);
    1259           4 :   m.AdvanceRegister(loop_cnt, -1);
    1260           4 :   m.AdvanceCurrentPosition(1);
    1261           4 :   m.IfRegisterGE(loop_cnt, 0, &loop2);
    1262           4 :   m.WriteCurrentPositionToRegister(out4, 0);  // [0,3,6,9]
    1263             : 
    1264           4 :   Label loop3;
    1265           4 :   Label exit_loop3;
    1266           4 :   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
    1267           4 :   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
    1268           4 :   m.ReadCurrentPositionFromRegister(out3);
    1269           4 :   m.Bind(&loop3);
    1270           4 :   m.AdvanceCurrentPosition(1);
    1271           4 :   m.CheckGreedyLoop(&exit_loop3);
    1272           4 :   m.GoTo(&loop3);
    1273           4 :   m.Bind(&exit_loop3);
    1274           4 :   m.PopCurrentPosition();
    1275           4 :   m.WriteCurrentPositionToRegister(out5, 0);  // [0,3,6,9,9,-1]
    1276             : 
    1277           4 :   m.Succeed();
    1278             : 
    1279           4 :   m.Bind(&fail);
    1280           4 :   m.Fail();
    1281             : 
    1282           4 :   Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
    1283           4 :   Handle<Object> code_object = m.GetCode(source);
    1284             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1285             : 
    1286             :   // String long enough for test (content doesn't matter).
    1287           4 :   Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
    1288             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1289             :   Address start_adr = seq_input->GetCharsAddress();
    1290             : 
    1291             :   int output[6];
    1292             :   NativeRegExpMacroAssembler::Result result =
    1293           4 :       Execute(*code,
    1294             :               *input,
    1295             :               0,
    1296             :               start_adr,
    1297             :               start_adr + input->length(),
    1298             :               output);
    1299             : 
    1300           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1301           4 :   CHECK_EQ(0, output[0]);
    1302           4 :   CHECK_EQ(3, output[1]);
    1303           4 :   CHECK_EQ(6, output[2]);
    1304           4 :   CHECK_EQ(9, output[3]);
    1305           4 :   CHECK_EQ(9, output[4]);
    1306           4 :   CHECK_EQ(-1, output[5]);
    1307           4 : }
    1308             : 
    1309             : 
    1310       26643 : TEST(MacroAssemblerStackOverflow) {
    1311           4 :   v8::V8::Initialize();
    1312           4 :   ContextInitializer initializer;
    1313             :   Isolate* isolate = CcTest::i_isolate();
    1314             :   Factory* factory = isolate->factory();
    1315           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1316             : 
    1317             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1318           8 :                              0);
    1319             : 
    1320           4 :   Label loop;
    1321           4 :   m.Bind(&loop);
    1322           4 :   m.PushBacktrack(&loop);
    1323           4 :   m.GoTo(&loop);
    1324             : 
    1325             :   Handle<String> source =
    1326           4 :       factory->NewStringFromStaticChars("<stack overflow test>");
    1327           4 :   Handle<Object> code_object = m.GetCode(source);
    1328             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1329             : 
    1330             :   // String long enough for test (content doesn't matter).
    1331           4 :   Handle<String> input = factory->NewStringFromStaticChars("dummy");
    1332             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1333             :   Address start_adr = seq_input->GetCharsAddress();
    1334             : 
    1335           4 :   NativeRegExpMacroAssembler::Result result = Execute(
    1336             :       *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
    1337             : 
    1338           4 :   CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
    1339           4 :   CHECK(isolate->has_pending_exception());
    1340             :   isolate->clear_pending_exception();
    1341           4 : }
    1342             : 
    1343             : 
    1344       26643 : TEST(MacroAssemblerNativeLotsOfRegisters) {
    1345           4 :   v8::V8::Initialize();
    1346           4 :   ContextInitializer initializer;
    1347             :   Isolate* isolate = CcTest::i_isolate();
    1348             :   Factory* factory = isolate->factory();
    1349           8 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1350             : 
    1351             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1352           8 :                              2);
    1353             : 
    1354             :   // At least 2048, to ensure the allocated space for registers
    1355             :   // span one full page.
    1356             :   const int large_number = 8000;
    1357           4 :   m.WriteCurrentPositionToRegister(large_number, 42);
    1358           4 :   m.WriteCurrentPositionToRegister(0, 0);
    1359           4 :   m.WriteCurrentPositionToRegister(1, 1);
    1360           4 :   Label done;
    1361           4 :   m.CheckNotBackReference(0, false, &done);  // Performs a system-stack push.
    1362           4 :   m.Bind(&done);
    1363           4 :   m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
    1364           4 :   m.PopRegister(1);
    1365           4 :   m.Succeed();
    1366             : 
    1367             :   Handle<String> source =
    1368           4 :       factory->NewStringFromStaticChars("<huge register space test>");
    1369           4 :   Handle<Object> code_object = m.GetCode(source);
    1370             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1371             : 
    1372             :   // String long enough for test (content doesn't matter).
    1373           4 :   Handle<String> input = factory->NewStringFromStaticChars("sample text");
    1374             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1375             :   Address start_adr = seq_input->GetCharsAddress();
    1376             : 
    1377             :   int captures[2];
    1378             :   NativeRegExpMacroAssembler::Result result =
    1379           4 :       Execute(*code,
    1380             :               *input,
    1381             :               0,
    1382             :               start_adr,
    1383             :               start_adr + input->length(),
    1384             :               captures);
    1385             : 
    1386           4 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1387           4 :   CHECK_EQ(0, captures[0]);
    1388           4 :   CHECK_EQ(42, captures[1]);
    1389             : 
    1390             :   isolate->clear_pending_exception();
    1391           4 : }
    1392             : 
    1393       26644 : TEST(MacroAssembler) {
    1394          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1395          10 :   RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), &zone);
    1396             :   // ^f(o)o.
    1397           5 :   Label start, fail, backtrack;
    1398             : 
    1399           5 :   m.SetRegister(4, 42);
    1400           5 :   m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
    1401           5 :   m.AdvanceRegister(4, 42);
    1402           5 :   m.GoTo(&start);
    1403           5 :   m.Fail();
    1404           5 :   m.Bind(&start);
    1405           5 :   m.PushBacktrack(&fail);
    1406           5 :   m.CheckNotAtStart(0, nullptr);
    1407           5 :   m.LoadCurrentCharacter(0, nullptr);
    1408           5 :   m.CheckNotCharacter('f', nullptr);
    1409           5 :   m.LoadCurrentCharacter(1, nullptr);
    1410           5 :   m.CheckNotCharacter('o', nullptr);
    1411           5 :   m.LoadCurrentCharacter(2, nullptr);
    1412           5 :   m.CheckNotCharacter('o', nullptr);
    1413           5 :   m.WriteCurrentPositionToRegister(0, 0);
    1414           5 :   m.WriteCurrentPositionToRegister(1, 3);
    1415           5 :   m.WriteCurrentPositionToRegister(2, 1);
    1416           5 :   m.WriteCurrentPositionToRegister(3, 2);
    1417           5 :   m.AdvanceCurrentPosition(3);
    1418           5 :   m.PushBacktrack(&backtrack);
    1419           5 :   m.Succeed();
    1420           5 :   m.Bind(&backtrack);
    1421           5 :   m.ClearRegisters(2, 3);
    1422           5 :   m.Backtrack();
    1423           5 :   m.Bind(&fail);
    1424           5 :   m.PopRegister(0);
    1425           5 :   m.Fail();
    1426             : 
    1427             :   Isolate* isolate = CcTest::i_isolate();
    1428             :   Factory* factory = isolate->factory();
    1429             :   HandleScope scope(isolate);
    1430             : 
    1431           5 :   Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
    1432           5 :   Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
    1433             :   int captures[5];
    1434             : 
    1435           5 :   const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
    1436          10 :   Handle<String> f1_16 = factory->NewStringFromTwoByte(
    1437          10 :       Vector<const uc16>(str1, 6)).ToHandleChecked();
    1438             : 
    1439           5 :   CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
    1440           5 :   CHECK_EQ(0, captures[0]);
    1441           5 :   CHECK_EQ(3, captures[1]);
    1442           5 :   CHECK_EQ(1, captures[2]);
    1443           5 :   CHECK_EQ(2, captures[3]);
    1444           5 :   CHECK_EQ(84, captures[4]);
    1445             : 
    1446           5 :   const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
    1447          10 :   Handle<String> f2_16 = factory->NewStringFromTwoByte(
    1448          10 :       Vector<const uc16>(str2, 6)).ToHandleChecked();
    1449             : 
    1450           5 :   CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
    1451           5 :   CHECK_EQ(42, captures[0]);
    1452           5 : }
    1453             : 
    1454       26644 : TEST(AddInverseToTable) {
    1455             :   static const int kLimit = 1000;
    1456             :   static const int kRangeCount = 16;
    1457         105 :   for (int t = 0; t < 10; t++) {
    1458         100 :     Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1459             :     ZoneList<CharacterRange>* ranges =
    1460             :         new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
    1461        1650 :     for (int i = 0; i < kRangeCount; i++) {
    1462        1600 :       int from = PseudoRandom(t + 87, i + 25) % kLimit;
    1463        1600 :       int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
    1464         800 :       if (to > kLimit) to = kLimit;
    1465         800 :       ranges->Add(CharacterRange::Range(from, to), &zone);
    1466             :     }
    1467             :     DispatchTable table(&zone);
    1468             :     DispatchTableConstructor cons(&table, false, &zone);
    1469             :     cons.set_choice_index(0);
    1470          50 :     cons.AddInverse(ranges);
    1471      100050 :     for (int i = 0; i < kLimit; i++) {
    1472             :       bool is_on = false;
    1473     1401230 :       for (int j = 0; !is_on && j < kRangeCount; j++)
    1474             :         is_on = ranges->at(j).Contains(i);
    1475       50000 :       OutSet* set = table.Get(i);
    1476       50000 :       CHECK_EQ(is_on, set->Get(0) == false);
    1477             :     }
    1478             :   }
    1479          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1480             :   ZoneList<CharacterRange>* ranges =
    1481             :       new(&zone) ZoneList<CharacterRange>(1, &zone);
    1482           5 :   ranges->Add(CharacterRange::Range(0xFFF0, 0xFFFE), &zone);
    1483             :   DispatchTable table(&zone);
    1484             :   DispatchTableConstructor cons(&table, false, &zone);
    1485             :   cons.set_choice_index(0);
    1486           5 :   cons.AddInverse(ranges);
    1487           5 :   CHECK(!table.Get(0xFFFE)->Get(0));
    1488           5 :   CHECK(table.Get(0xFFFF)->Get(0));
    1489           5 : }
    1490             : 
    1491             : #ifndef V8_INTL_SUPPORT
    1492             : static uc32 canonicalize(uc32 c) {
    1493             :   unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
    1494             :   int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, nullptr);
    1495             :   if (count == 0) {
    1496             :     return c;
    1497             :   } else {
    1498             :     CHECK_EQ(1, count);
    1499             :     return canon[0];
    1500             :   }
    1501             : }
    1502             : 
    1503             : TEST(LatinCanonicalize) {
    1504             :   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
    1505             :   for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
    1506             :     unibrow::uchar upper = lower + ('A' - 'a');
    1507             :     CHECK_EQ(canonicalize(lower), canonicalize(upper));
    1508             :     unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1509             :     int length = un_canonicalize.get(lower, '\0', uncanon);
    1510             :     CHECK_EQ(2, length);
    1511             :     CHECK_EQ(upper, uncanon[0]);
    1512             :     CHECK_EQ(lower, uncanon[1]);
    1513             :   }
    1514             :   for (uc32 c = 128; c < (1 << 21); c++)
    1515             :     CHECK_GE(canonicalize(c), 128);
    1516             :   unibrow::Mapping<unibrow::ToUppercase> to_upper;
    1517             :   // Canonicalization is only defined for the Basic Multilingual Plane.
    1518             :   for (uc32 c = 0; c < (1 << 16); c++) {
    1519             :     unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
    1520             :     int length = to_upper.get(c, '\0', upper);
    1521             :     if (length == 0) {
    1522             :       length = 1;
    1523             :       upper[0] = c;
    1524             :     }
    1525             :     uc32 u = upper[0];
    1526             :     if (length > 1 || (c >= 128 && u < 128))
    1527             :       u = c;
    1528             :     CHECK_EQ(u, canonicalize(c));
    1529             :   }
    1530             : }
    1531             : 
    1532             : static uc32 CanonRangeEnd(uc32 c) {
    1533             :   unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
    1534             :   int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, nullptr);
    1535             :   if (count == 0) {
    1536             :     return c;
    1537             :   } else {
    1538             :     CHECK_EQ(1, count);
    1539             :     return canon[0];
    1540             :   }
    1541             : }
    1542             : 
    1543             : 
    1544             : TEST(RangeCanonicalization) {
    1545             :   // Check that we arrive at the same result when using the basic
    1546             :   // range canonicalization primitives as when using immediate
    1547             :   // canonicalization.
    1548             :   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
    1549             :   int block_start = 0;
    1550             :   while (block_start <= 0xFFFF) {
    1551             :     uc32 block_end = CanonRangeEnd(block_start);
    1552             :     unsigned block_length = block_end - block_start + 1;
    1553             :     if (block_length > 1) {
    1554             :       unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1555             :       int first_length = un_canonicalize.get(block_start, '\0', first);
    1556             :       for (unsigned i = 1; i < block_length; i++) {
    1557             :         unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1558             :         int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
    1559             :         CHECK_EQ(first_length, succ_length);
    1560             :         for (int j = 0; j < succ_length; j++) {
    1561             :           int calc = first[j] + i;
    1562             :           int found = succ[j];
    1563             :           CHECK_EQ(calc, found);
    1564             :         }
    1565             :       }
    1566             :     }
    1567             :     block_start = block_start + block_length;
    1568             :   }
    1569             : }
    1570             : 
    1571             : 
    1572             : TEST(UncanonicalizeEquivalence) {
    1573             :   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
    1574             :   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1575             :   for (int i = 0; i < (1 << 16); i++) {
    1576             :     int length = un_canonicalize.get(i, '\0', chars);
    1577             :     for (int j = 0; j < length; j++) {
    1578             :       unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1579             :       int length2 = un_canonicalize.get(chars[j], '\0', chars2);
    1580             :       CHECK_EQ(length, length2);
    1581             :       for (int k = 0; k < length; k++)
    1582             :         CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
    1583             :     }
    1584             :   }
    1585             : }
    1586             : 
    1587             : #endif
    1588             : 
    1589          30 : static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
    1590             :                                       Vector<CharacterRange> expected) {
    1591          60 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1592             :   int count = expected.length();
    1593             :   ZoneList<CharacterRange>* list =
    1594             :       new(&zone) ZoneList<CharacterRange>(count, &zone);
    1595          30 :   list->Add(input, &zone);
    1596          30 :   CharacterRange::AddCaseEquivalents(isolate, &zone, list, false);
    1597          30 :   list->Remove(0);  // Remove the input before checking results.
    1598          30 :   CHECK_EQ(count, list->length());
    1599          90 :   for (int i = 0; i < list->length(); i++) {
    1600          30 :     CHECK_EQ(expected[i].from(), list->at(i).from());
    1601          30 :     CHECK_EQ(expected[i].to(), list->at(i).to());
    1602             :   }
    1603          30 : }
    1604             : 
    1605             : 
    1606          30 : static void TestSimpleRangeCaseIndependence(Isolate* isolate,
    1607             :                                             CharacterRange input,
    1608             :                                             CharacterRange expected) {
    1609             :   EmbeddedVector<CharacterRange, 1> vector;
    1610          30 :   vector[0] = expected;
    1611          30 :   TestRangeCaseIndependence(isolate, input, vector);
    1612          30 : }
    1613             : 
    1614             : 
    1615       26644 : TEST(CharacterRangeCaseIndependence) {
    1616             :   Isolate* isolate = CcTest::i_isolate();
    1617             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
    1618           5 :                                   CharacterRange::Singleton('A'));
    1619             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
    1620           5 :                                   CharacterRange::Singleton('Z'));
    1621             : #ifndef V8_INTL_SUPPORT
    1622             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'),
    1623             :                                   CharacterRange::Range('A', 'Z'));
    1624             : #endif  // !V8_INTL_SUPPORT
    1625             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'),
    1626           5 :                                   CharacterRange::Range('C', 'F'));
    1627             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'),
    1628           5 :                                   CharacterRange::Range('A', 'B'));
    1629             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'),
    1630           5 :                                   CharacterRange::Range('Y', 'Z'));
    1631             : #ifndef V8_INTL_SUPPORT
    1632             :   TestSimpleRangeCaseIndependence(isolate,
    1633             :                                   CharacterRange::Range('a' - 1, 'z' + 1),
    1634             :                                   CharacterRange::Range('A', 'Z'));
    1635             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'),
    1636             :                                   CharacterRange::Range('a', 'z'));
    1637             : #endif  // !V8_INTL_SUPPORT
    1638             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'),
    1639           5 :                                   CharacterRange::Range('c', 'f'));
    1640             : #ifndef V8_INTL_SUPPORT
    1641             :   TestSimpleRangeCaseIndependence(isolate,
    1642             :                                   CharacterRange::Range('A' - 1, 'Z' + 1),
    1643             :                                   CharacterRange::Range('a', 'z'));
    1644             :   // Here we need to add [l-z] to complete the case independence of
    1645             :   // [A-Za-z] but we expect [a-z] to be added since we always add a
    1646             :   // whole block at a time.
    1647             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'),
    1648             :                                   CharacterRange::Range('a', 'z'));
    1649             : #endif  // !V8_INTL_SUPPORT
    1650           5 : }
    1651             : 
    1652             : 
    1653             : static bool InClass(uc32 c, ZoneList<CharacterRange>* ranges) {
    1654    22282160 :   if (ranges == nullptr) return false;
    1655    66764570 :   for (int i = 0; i < ranges->length(); i++) {
    1656    27811745 :     CharacterRange range = ranges->at(i);
    1657    27811745 :     if (range.from() <= c && c <= range.to())
    1658             :       return true;
    1659             :   }
    1660             :   return false;
    1661             : }
    1662             : 
    1663             : 
    1664       26644 : TEST(UnicodeRangeSplitter) {
    1665          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1666             :   ZoneList<CharacterRange>* base =
    1667             :       new(&zone) ZoneList<CharacterRange>(1, &zone);
    1668           5 :   base->Add(CharacterRange::Everything(), &zone);
    1669           5 :   UnicodeRangeSplitter splitter(&zone, base);
    1670             :   // BMP
    1671      552965 :   for (uc32 c = 0; c < 0xD800; c++) {
    1672      276480 :     CHECK(InClass(c, splitter.bmp()));
    1673      276480 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1674      276480 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1675      276480 :     CHECK(!InClass(c, splitter.non_bmp()));
    1676             :   }
    1677             :   // Lead surrogates
    1678       10235 :   for (uc32 c = 0xD800; c < 0xDBFF; c++) {
    1679        5115 :     CHECK(!InClass(c, splitter.bmp()));
    1680        5115 :     CHECK(InClass(c, splitter.lead_surrogates()));
    1681        5115 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1682        5115 :     CHECK(!InClass(c, splitter.non_bmp()));
    1683             :   }
    1684             :   // Trail surrogates
    1685       10235 :   for (uc32 c = 0xDC00; c < 0xDFFF; c++) {
    1686        5115 :     CHECK(!InClass(c, splitter.bmp()));
    1687        5115 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1688        5115 :     CHECK(InClass(c, splitter.trail_surrogates()));
    1689        5115 :     CHECK(!InClass(c, splitter.non_bmp()));
    1690             :   }
    1691             :   // BMP
    1692       81915 :   for (uc32 c = 0xE000; c < 0xFFFF; c++) {
    1693       40955 :     CHECK(InClass(c, splitter.bmp()));
    1694       40955 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1695       40955 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1696       40955 :     CHECK(!InClass(c, splitter.non_bmp()));
    1697             :   }
    1698             :   // Non-BMP
    1699    10485755 :   for (uc32 c = 0x10000; c < 0x10FFFF; c++) {
    1700     5242875 :     CHECK(!InClass(c, splitter.bmp()));
    1701     5242875 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1702     5242875 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1703     5242875 :     CHECK(InClass(c, splitter.non_bmp()));
    1704             :   }
    1705           5 : }
    1706             : 
    1707             : 
    1708       26644 : TEST(CanonicalizeCharacterSets) {
    1709          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1710             :   ZoneList<CharacterRange>* list =
    1711             :       new(&zone) ZoneList<CharacterRange>(4, &zone);
    1712             :   CharacterSet set(list);
    1713             : 
    1714           5 :   list->Add(CharacterRange::Range(10, 20), &zone);
    1715           5 :   list->Add(CharacterRange::Range(30, 40), &zone);
    1716           5 :   list->Add(CharacterRange::Range(50, 60), &zone);
    1717           5 :   set.Canonicalize();
    1718           5 :   CHECK_EQ(3, list->length());
    1719           5 :   CHECK_EQ(10, list->at(0).from());
    1720           5 :   CHECK_EQ(20, list->at(0).to());
    1721           5 :   CHECK_EQ(30, list->at(1).from());
    1722           5 :   CHECK_EQ(40, list->at(1).to());
    1723           5 :   CHECK_EQ(50, list->at(2).from());
    1724           5 :   CHECK_EQ(60, list->at(2).to());
    1725             : 
    1726             :   list->Rewind(0);
    1727           5 :   list->Add(CharacterRange::Range(10, 20), &zone);
    1728           5 :   list->Add(CharacterRange::Range(50, 60), &zone);
    1729           5 :   list->Add(CharacterRange::Range(30, 40), &zone);
    1730           5 :   set.Canonicalize();
    1731           5 :   CHECK_EQ(3, list->length());
    1732           5 :   CHECK_EQ(10, list->at(0).from());
    1733           5 :   CHECK_EQ(20, list->at(0).to());
    1734           5 :   CHECK_EQ(30, list->at(1).from());
    1735           5 :   CHECK_EQ(40, list->at(1).to());
    1736           5 :   CHECK_EQ(50, list->at(2).from());
    1737           5 :   CHECK_EQ(60, list->at(2).to());
    1738             : 
    1739             :   list->Rewind(0);
    1740           5 :   list->Add(CharacterRange::Range(30, 40), &zone);
    1741           5 :   list->Add(CharacterRange::Range(10, 20), &zone);
    1742           5 :   list->Add(CharacterRange::Range(25, 25), &zone);
    1743           5 :   list->Add(CharacterRange::Range(100, 100), &zone);
    1744           5 :   list->Add(CharacterRange::Range(1, 1), &zone);
    1745           5 :   set.Canonicalize();
    1746           5 :   CHECK_EQ(5, list->length());
    1747           5 :   CHECK_EQ(1, list->at(0).from());
    1748           5 :   CHECK_EQ(1, list->at(0).to());
    1749           5 :   CHECK_EQ(10, list->at(1).from());
    1750           5 :   CHECK_EQ(20, list->at(1).to());
    1751           5 :   CHECK_EQ(25, list->at(2).from());
    1752           5 :   CHECK_EQ(25, list->at(2).to());
    1753           5 :   CHECK_EQ(30, list->at(3).from());
    1754           5 :   CHECK_EQ(40, list->at(3).to());
    1755           5 :   CHECK_EQ(100, list->at(4).from());
    1756           5 :   CHECK_EQ(100, list->at(4).to());
    1757             : 
    1758             :   list->Rewind(0);
    1759           5 :   list->Add(CharacterRange::Range(10, 19), &zone);
    1760           5 :   list->Add(CharacterRange::Range(21, 30), &zone);
    1761           5 :   list->Add(CharacterRange::Range(20, 20), &zone);
    1762           5 :   set.Canonicalize();
    1763           5 :   CHECK_EQ(1, list->length());
    1764           5 :   CHECK_EQ(10, list->at(0).from());
    1765           5 :   CHECK_EQ(30, list->at(0).to());
    1766           5 : }
    1767             : 
    1768             : 
    1769       26644 : TEST(CharacterRangeMerge) {
    1770          10 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1771             :   ZoneList<CharacterRange> l1(4, &zone);
    1772             :   ZoneList<CharacterRange> l2(4, &zone);
    1773             :   // Create all combinations of intersections of ranges, both singletons and
    1774             :   // longer.
    1775             : 
    1776             :   int offset = 0;
    1777             : 
    1778             :   // The five kinds of singleton intersections:
    1779             :   //     X
    1780             :   //   Y      - outside before
    1781             :   //    Y     - outside touching start
    1782             :   //     Y    - overlap
    1783             :   //      Y   - outside touching end
    1784             :   //       Y  - outside after
    1785             : 
    1786          55 :   for (int i = 0; i < 5; i++) {
    1787          25 :     l1.Add(CharacterRange::Singleton(offset + 2), &zone);
    1788          25 :     l2.Add(CharacterRange::Singleton(offset + i), &zone);
    1789          25 :     offset += 6;
    1790             :   }
    1791             : 
    1792             :   // The seven kinds of singleton/non-singleton intersections:
    1793             :   //    XXX
    1794             :   //  Y        - outside before
    1795             :   //   Y       - outside touching start
    1796             :   //    Y      - inside touching start
    1797             :   //     Y     - entirely inside
    1798             :   //      Y    - inside touching end
    1799             :   //       Y   - outside touching end
    1800             :   //        Y  - disjoint after
    1801             : 
    1802          75 :   for (int i = 0; i < 7; i++) {
    1803          35 :     l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
    1804          35 :     l2.Add(CharacterRange::Singleton(offset + i), &zone);
    1805          35 :     offset += 8;
    1806             :   }
    1807             : 
    1808             :   // The eleven kinds of non-singleton intersections:
    1809             :   //
    1810             :   //       XXXXXXXX
    1811             :   // YYYY                  - outside before.
    1812             :   //   YYYY                - outside touching start.
    1813             :   //     YYYY              - overlapping start
    1814             :   //       YYYY            - inside touching start
    1815             :   //         YYYY          - entirely inside
    1816             :   //           YYYY        - inside touching end
    1817             :   //             YYYY      - overlapping end
    1818             :   //               YYYY    - outside touching end
    1819             :   //                 YYYY  - outside after
    1820             :   //       YYYYYYYY        - identical
    1821             :   //     YYYYYYYYYYYY      - containing entirely.
    1822             : 
    1823          95 :   for (int i = 0; i < 9; i++) {
    1824          45 :     l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);  // Length 8.
    1825          45 :     l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
    1826          45 :     offset += 22;
    1827             :   }
    1828           5 :   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
    1829           5 :   l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
    1830             :   offset += 22;
    1831           5 :   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
    1832           5 :   l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
    1833           5 :   offset += 22;
    1834             : 
    1835             :   // Different kinds of multi-range overlap:
    1836             :   // XXXXXXXXXXXXXXXXXXXXXX         XXXXXXXXXXXXXXXXXXXXXX
    1837             :   //   YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y
    1838             : 
    1839           5 :   l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
    1840           5 :   l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
    1841          65 :   for (int i = 0; i < 6; i++) {
    1842          30 :     l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
    1843          30 :     l2.Add(CharacterRange::Singleton(offset + 8), &zone);
    1844          30 :     offset += 9;
    1845             :   }
    1846             : 
    1847           5 :   CHECK(CharacterRange::IsCanonical(&l1));
    1848           5 :   CHECK(CharacterRange::IsCanonical(&l2));
    1849             : 
    1850             :   ZoneList<CharacterRange> first_only(4, &zone);
    1851             :   ZoneList<CharacterRange> second_only(4, &zone);
    1852             :   ZoneList<CharacterRange> both(4, &zone);
    1853           5 : }
    1854             : 
    1855             : 
    1856       26644 : TEST(Graph) {
    1857           5 :   Execute("\\b\\w+\\b", false, true, true);
    1858           5 : }
    1859             : 
    1860             : 
    1861             : namespace {
    1862             : 
    1863             : int* global_use_counts = nullptr;
    1864             : 
    1865          46 : void MockUseCounterCallback(v8::Isolate* isolate,
    1866             :                             v8::Isolate::UseCounterFeature feature) {
    1867          46 :   ++global_use_counts[feature];
    1868          46 : }
    1869             : }
    1870             : 
    1871             : 
    1872             : // Test that ES2015 RegExp compatibility fixes are in place, that they
    1873             : // are not overly broad, and the appropriate UseCounters are incremented
    1874       26644 : TEST(UseCountRegExp) {
    1875           5 :   v8::Isolate* isolate = CcTest::isolate();
    1876          10 :   v8::HandleScope scope(isolate);
    1877           5 :   LocalContext env;
    1878           5 :   int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
    1879           5 :   global_use_counts = use_counts;
    1880           5 :   CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
    1881             : 
    1882             :   // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
    1883             :   v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
    1884           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1885           5 :   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1886           5 :   CHECK(resultSticky->IsUndefined());
    1887             : 
    1888             :   // re.sticky has approriate value and doesn't touch UseCounter
    1889             :   v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
    1890           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1891           5 :   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1892           5 :   CHECK(resultReSticky->IsFalse());
    1893             : 
    1894             :   // When the getter is caleld on another object, throw an exception
    1895             :   // and don't increment the UseCounter
    1896             :   v8::Local<v8::Value> resultStickyError = CompileRun(
    1897             :       "var exception;"
    1898             :       "try { "
    1899             :       "  Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
    1900             :       "      .get.call(null);"
    1901             :       "} catch (e) {"
    1902             :       "  exception = e;"
    1903             :       "}"
    1904             :       "exception");
    1905           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1906           5 :   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1907           5 :   CHECK(resultStickyError->IsObject());
    1908             : 
    1909             :   // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
    1910             :   // a UseCounter is incremented to track it.
    1911             :   v8::Local<v8::Value> resultToString =
    1912             :       CompileRun("RegExp.prototype.toString().length");
    1913           5 :   CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1914           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1915           5 :   CHECK(resultToString->IsInt32());
    1916          10 :   CHECK_EQ(6,
    1917             :            resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
    1918             : 
    1919             :   // .toString() works on normal RegExps
    1920             :   v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
    1921           5 :   CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1922           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1923           5 :   CHECK(resultReToString->IsInt32());
    1924          10 :   CHECK_EQ(
    1925             :       3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
    1926             : 
    1927             :   // .toString() throws on non-RegExps that aren't RegExp.prototype
    1928             :   v8::Local<v8::Value> resultToStringError = CompileRun(
    1929             :       "var exception;"
    1930             :       "try { RegExp.prototype.toString.call(null) }"
    1931             :       "catch (e) { exception = e; }"
    1932             :       "exception");
    1933           5 :   CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1934           5 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1935           5 :   CHECK(resultToStringError->IsObject());
    1936           5 : }
    1937             : 
    1938          15 : class UncachedExternalString
    1939             :     : public v8::String::ExternalOneByteStringResource {
    1940             :  public:
    1941          25 :   const char* data() const override { return "abcdefghijklmnopqrstuvwxyz"; }
    1942          20 :   size_t length() const override { return 26; }
    1943           5 :   bool IsCacheable() const override { return false; }
    1944             : };
    1945             : 
    1946       26644 : TEST(UncachedExternalString) {
    1947           5 :   v8::Isolate* isolate = CcTest::isolate();
    1948          10 :   v8::HandleScope scope(isolate);
    1949           5 :   LocalContext env;
    1950             :   v8::Local<v8::String> external =
    1951          10 :       v8::String::NewExternalOneByte(isolate, new UncachedExternalString())
    1952             :           .ToLocalChecked();
    1953           5 :   CHECK(v8::Utils::OpenHandle(*external)->map() ==
    1954             :         ReadOnlyRoots(CcTest::i_isolate())
    1955             :             .uncached_external_one_byte_string_map());
    1956           5 :   v8::Local<v8::Object> global = env->Global();
    1957          15 :   global->Set(env.local(), v8_str("external"), external).FromJust();
    1958             :   CompileRun("var re = /y(.)/; re.test('ab');");
    1959           5 :   ExpectString("external.substring(1).match(re)[1]", "z");
    1960           5 : }
    1961             : 
    1962             : }  // namespace test_regexp
    1963             : }  // namespace internal
    1964       79917 : }  // namespace v8

Generated by: LCOV version 1.10