LCOV - code coverage report
Current view: top level - test/cctest - test-regexp.cc (source / functions) Hit Total Coverage
Test: app.info Lines: 1050 1053 99.7 %
Date: 2017-10-20 Functions: 57 58 98.3 %

          Line data    Source code
       1             : // Copyright 2012 the V8 project authors. All rights reserved.
       2             : // Redistribution and use in source and binary forms, with or without
       3             : // modification, are permitted provided that the following conditions are
       4             : // met:
       5             : //
       6             : //     * Redistributions of source code must retain the above copyright
       7             : //       notice, this list of conditions and the following disclaimer.
       8             : //     * Redistributions in binary form must reproduce the above
       9             : //       copyright notice, this list of conditions and the following
      10             : //       disclaimer in the documentation and/or other materials provided
      11             : //       with the distribution.
      12             : //     * Neither the name of Google Inc. nor the names of its
      13             : //       contributors may be used to endorse or promote products derived
      14             : //       from this software without specific prior written permission.
      15             : //
      16             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      17             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      18             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      19             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      20             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      21             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      22             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      23             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      24             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      25             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      26             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      27             : 
      28             : #include <cstdlib>
      29             : #include <memory>
      30             : #include <sstream>
      31             : 
      32             : #include "include/v8.h"
      33             : #include "src/v8.h"
      34             : 
      35             : #include "src/api.h"
      36             : #include "src/ast/ast.h"
      37             : #include "src/char-predicates-inl.h"
      38             : #include "src/objects-inl.h"
      39             : #include "src/ostreams.h"
      40             : #include "src/regexp/jsregexp.h"
      41             : #include "src/regexp/regexp-macro-assembler-irregexp.h"
      42             : #include "src/regexp/regexp-macro-assembler.h"
      43             : #include "src/regexp/regexp-parser.h"
      44             : #include "src/splay-tree-inl.h"
      45             : #include "src/string-stream.h"
      46             : #include "src/unicode-inl.h"
      47             : 
      48             : #ifdef V8_INTERPRETED_REGEXP
      49             : #include "src/regexp/interpreter-irregexp.h"
      50             : #else  // V8_INTERPRETED_REGEXP
      51             : #include "src/macro-assembler.h"
      52             : #if V8_TARGET_ARCH_ARM
      53             : #include "src/arm/assembler-arm.h"  // NOLINT
      54             : #include "src/arm/macro-assembler-arm.h"
      55             : #include "src/regexp/arm/regexp-macro-assembler-arm.h"
      56             : #endif
      57             : #if V8_TARGET_ARCH_ARM64
      58             : #include "src/arm64/assembler-arm64.h"
      59             : #include "src/arm64/macro-assembler-arm64.h"
      60             : #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
      61             : #endif
      62             : #if V8_TARGET_ARCH_S390
      63             : #include "src/regexp/s390/regexp-macro-assembler-s390.h"
      64             : #include "src/s390/assembler-s390.h"
      65             : #include "src/s390/macro-assembler-s390.h"
      66             : #endif
      67             : #if V8_TARGET_ARCH_PPC
      68             : #include "src/ppc/assembler-ppc.h"
      69             : #include "src/ppc/macro-assembler-ppc.h"
      70             : #include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
      71             : #endif
      72             : #if V8_TARGET_ARCH_MIPS
      73             : #include "src/mips/assembler-mips.h"
      74             : #include "src/mips/macro-assembler-mips.h"
      75             : #include "src/regexp/mips/regexp-macro-assembler-mips.h"
      76             : #endif
      77             : #if V8_TARGET_ARCH_MIPS64
      78             : #include "src/mips64/assembler-mips64.h"
      79             : #include "src/mips64/macro-assembler-mips64.h"
      80             : #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
      81             : #endif
      82             : #if V8_TARGET_ARCH_X64
      83             : #include "src/regexp/x64/regexp-macro-assembler-x64.h"
      84             : #include "src/x64/assembler-x64.h"
      85             : #include "src/x64/macro-assembler-x64.h"
      86             : #endif
      87             : #if V8_TARGET_ARCH_IA32
      88             : #include "src/ia32/assembler-ia32.h"
      89             : #include "src/ia32/macro-assembler-ia32.h"
      90             : #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
      91             : #endif
      92             : #endif  // V8_INTERPRETED_REGEXP
      93             : #include "test/cctest/cctest.h"
      94             : 
      95             : namespace v8 {
      96             : namespace internal {
      97             : namespace test_regexp {
      98             : 
      99          12 : static bool CheckParse(const char* input) {
     100          12 :   v8::HandleScope scope(CcTest::isolate());
     101          24 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     102          12 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     103             :   RegExpCompileData result;
     104             :   return v8::internal::RegExpParser::ParseRegExp(
     105          24 :       CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result);
     106             : }
     107             : 
     108             : 
     109        1812 : static void CheckParseEq(const char* input, const char* expected,
     110             :                          bool unicode = false) {
     111        1812 :   v8::HandleScope scope(CcTest::isolate());
     112        3624 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     113        1812 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     114             :   RegExpCompileData result;
     115             :   JSRegExp::Flags flags = JSRegExp::kNone;
     116        1812 :   if (unicode) flags |= JSRegExp::kUnicode;
     117        1812 :   CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
     118             :                                                 &reader, flags, &result));
     119        1812 :   CHECK_NOT_NULL(result.tree);
     120        1812 :   CHECK(result.error.is_null());
     121        3624 :   std::ostringstream os;
     122        1812 :   result.tree->Print(os, &zone);
     123        3624 :   if (strcmp(expected, os.str().c_str()) != 0) {
     124           0 :     printf("%s | %s\n", expected, os.str().c_str());
     125             :   }
     126        5436 :   CHECK_EQ(0, strcmp(expected, os.str().c_str()));
     127        1812 : }
     128             : 
     129             : 
     130         540 : static bool CheckSimple(const char* input) {
     131         540 :   v8::HandleScope scope(CcTest::isolate());
     132        1080 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     133         540 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     134             :   RegExpCompileData result;
     135         540 :   CHECK(v8::internal::RegExpParser::ParseRegExp(
     136             :       CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
     137         540 :   CHECK_NOT_NULL(result.tree);
     138         540 :   CHECK(result.error.is_null());
     139        1080 :   return result.simple;
     140             : }
     141             : 
     142             : struct MinMaxPair {
     143             :   int min_match;
     144             :   int max_match;
     145             : };
     146             : 
     147             : 
     148         576 : static MinMaxPair CheckMinMaxMatch(const char* input) {
     149         576 :   v8::HandleScope scope(CcTest::isolate());
     150        1152 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     151         576 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     152             :   RegExpCompileData result;
     153         576 :   CHECK(v8::internal::RegExpParser::ParseRegExp(
     154             :       CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
     155         576 :   CHECK_NOT_NULL(result.tree);
     156         576 :   CHECK(result.error.is_null());
     157         576 :   int min_match = result.tree->min_match();
     158         576 :   int max_match = result.tree->max_match();
     159             :   MinMaxPair pair = { min_match, max_match };
     160        1152 :   return pair;
     161             : }
     162             : 
     163             : 
     164             : #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
     165             : #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
     166             : #define CHECK_MIN_MAX(input, min, max)                                         \
     167             :   { MinMaxPair min_max = CheckMinMaxMatch(input);                              \
     168             :     CHECK_EQ(min, min_max.min_match);                                          \
     169             :     CHECK_EQ(max, min_max.max_match);                                          \
     170             :   }
     171             : 
     172             : 
     173          12 : void TestRegExpParser(bool lookbehind) {
     174          12 :   FLAG_harmony_regexp_lookbehind = lookbehind;
     175             : 
     176          12 :   CHECK_PARSE_ERROR("?");
     177             : 
     178          12 :   CheckParseEq("abc", "'abc'");
     179          12 :   CheckParseEq("", "%");
     180          12 :   CheckParseEq("abc|def", "(| 'abc' 'def')");
     181          12 :   CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
     182          12 :   CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
     183          12 :   CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
     184          12 :   CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
     185          12 :   CheckParseEq("a*", "(# 0 - g 'a')");
     186          12 :   CheckParseEq("a*?", "(# 0 - n 'a')");
     187          12 :   CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
     188          12 :   CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
     189          12 :   CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
     190          12 :   CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
     191          12 :   CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
     192          12 :   CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
     193          12 :   CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
     194          12 :   CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
     195          12 :   CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
     196          12 :   CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
     197          12 :   CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
     198          12 :   CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
     199          12 :   CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
     200          12 :   CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
     201          12 :   CheckParseEq("(?:foo)", "(?: 'foo')");
     202          12 :   CheckParseEq("(?: foo )", "(?: ' foo ')");
     203          12 :   CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
     204          12 :   CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
     205          12 :   CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
     206          12 :   CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
     207          12 :   if (lookbehind) {
     208          12 :     CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
     209          12 :     CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
     210             :   } else {
     211           0 :     CHECK_PARSE_ERROR("foo(?<=bar)baz");
     212           0 :     CHECK_PARSE_ERROR("foo(?<!bar)baz");
     213             :   }
     214          12 :   CheckParseEq("()", "(^ %)");
     215          12 :   CheckParseEq("(?=)", "(-> + %)");
     216          12 :   CheckParseEq("[]", "^[\\x00-\\u{10ffff}]");  // Doesn't compile on windows
     217          12 :   CheckParseEq("[^]", "[\\x00-\\u{10ffff}]");  // \uffff isn't in codepage 1252
     218          12 :   CheckParseEq("[x]", "[x]");
     219          12 :   CheckParseEq("[xyz]", "[x y z]");
     220          12 :   CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
     221          12 :   CheckParseEq("[-123]", "[- 1 2 3]");
     222          12 :   CheckParseEq("[^123]", "^[1 2 3]");
     223          12 :   CheckParseEq("]", "']'");
     224          12 :   CheckParseEq("}", "'}'");
     225          12 :   CheckParseEq("[a-b-c]", "[a-b - c]");
     226          12 :   CheckParseEq("[\\d]", "[0-9]");
     227          12 :   CheckParseEq("[x\\dz]", "[x 0-9 z]");
     228          12 :   CheckParseEq("[\\d-z]", "[0-9 - z]");
     229          12 :   CheckParseEq("[\\d-\\d]", "[0-9 0-9 -]");
     230          12 :   CheckParseEq("[z-\\d]", "[0-9 z -]");
     231             :   // Control character outside character class.
     232          12 :   CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
     233          12 :   CheckParseEq("\\c!", "'\\c!'");
     234          12 :   CheckParseEq("\\c_", "'\\c_'");
     235          12 :   CheckParseEq("\\c~", "'\\c~'");
     236          12 :   CheckParseEq("\\c1", "'\\c1'");
     237             :   // Control character inside character class.
     238          12 :   CheckParseEq("[\\c!]", "[\\ c !]");
     239          12 :   CheckParseEq("[\\c_]", "[\\x1f]");
     240          12 :   CheckParseEq("[\\c~]", "[\\ c ~]");
     241          12 :   CheckParseEq("[\\ca]", "[\\x01]");
     242          12 :   CheckParseEq("[\\cz]", "[\\x1a]");
     243          12 :   CheckParseEq("[\\cA]", "[\\x01]");
     244          12 :   CheckParseEq("[\\cZ]", "[\\x1a]");
     245          12 :   CheckParseEq("[\\c1]", "[\\x11]");
     246             : 
     247          12 :   CheckParseEq("[a\\]c]", "[a ] c]");
     248          12 :   CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
     249          12 :   CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ #  ]");
     250          12 :   CheckParseEq("\\0", "'\\x00'");
     251          12 :   CheckParseEq("\\8", "'8'");
     252          12 :   CheckParseEq("\\9", "'9'");
     253          12 :   CheckParseEq("\\11", "'\\x09'");
     254          12 :   CheckParseEq("\\11a", "'\\x09a'");
     255          12 :   CheckParseEq("\\011", "'\\x09'");
     256          12 :   CheckParseEq("\\00011", "'\\x0011'");
     257          12 :   CheckParseEq("\\118", "'\\x098'");
     258          12 :   CheckParseEq("\\111", "'I'");
     259          12 :   CheckParseEq("\\1111", "'I1'");
     260          12 :   CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
     261          12 :   CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
     262          12 :   CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
     263          12 :   CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
     264             :   CheckParseEq("(x)(x)(x)\\1*",
     265             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     266          12 :                " (# 0 - g (<- 1)))");
     267             :   CheckParseEq("(x)(x)(x)\\2*",
     268             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     269          12 :                " (# 0 - g (<- 2)))");
     270             :   CheckParseEq("(x)(x)(x)\\3*",
     271             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     272          12 :                " (# 0 - g (<- 3)))");
     273             :   CheckParseEq("(x)(x)(x)\\4*",
     274             :                "(: (^ 'x') (^ 'x') (^ 'x')"
     275          12 :                " (# 0 - g '\\x04'))");
     276             :   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
     277             :                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
     278          12 :                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
     279             :   CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
     280             :                "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
     281          12 :                " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
     282          12 :   CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
     283          12 :   CheckParseEq("(a\\1)", "(^ 'a')");
     284          12 :   CheckParseEq("(\\1a)", "(^ 'a')");
     285          12 :   CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
     286          12 :   CheckParseEq("(?=a)?a", "'a'");
     287          12 :   CheckParseEq("(?=a){0,10}a", "'a'");
     288          12 :   CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
     289          12 :   CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
     290          12 :   CheckParseEq("(?!a)?a", "'a'");
     291          12 :   CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
     292          12 :   CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
     293             :   CheckParseEq("(?!\\1(a\\1)\\1)\\1",
     294          12 :                "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
     295             :   CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
     296          12 :                "(: (<- 1) (<- 2) (^ (: 'a' (?: (^ 'b')) (<- 2))) (<- 1))");
     297          12 :   if (lookbehind) {
     298             :     CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
     299          12 :                  "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
     300             :   }
     301          12 :   CheckParseEq("[\\0]", "[\\x00]");
     302          12 :   CheckParseEq("[\\11]", "[\\x09]");
     303          12 :   CheckParseEq("[\\11a]", "[\\x09 a]");
     304          12 :   CheckParseEq("[\\011]", "[\\x09]");
     305          12 :   CheckParseEq("[\\00011]", "[\\x00 1 1]");
     306          12 :   CheckParseEq("[\\118]", "[\\x09 8]");
     307          12 :   CheckParseEq("[\\111]", "[I]");
     308          12 :   CheckParseEq("[\\1111]", "[I 1]");
     309          12 :   CheckParseEq("\\x34", "'\x34'");
     310          12 :   CheckParseEq("\\x60", "'\x60'");
     311          12 :   CheckParseEq("\\x3z", "'x3z'");
     312          12 :   CheckParseEq("\\c", "'\\c'");
     313          12 :   CheckParseEq("\\u0034", "'\x34'");
     314          12 :   CheckParseEq("\\u003z", "'u003z'");
     315          12 :   CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
     316          12 :   CheckParseEq("^^^$$$\\b\\b\\b\\b", "(: @^i @$i @b)");
     317          12 :   CheckParseEq("\\b\\b\\b\\b\\B\\B\\B\\B\\b\\b\\b\\b", "(: @b @B @b)");
     318          12 :   CheckParseEq("\\b\\B\\b", "(: @b @B @b)");
     319             : 
     320             :   // Unicode regexps
     321          12 :   CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
     322             :   CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
     323          12 :                true);
     324             :   CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
     325          12 :                true);
     326          12 :   CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
     327          12 :   CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
     328             : 
     329          12 :   CheckParseEq("\\ud808\\udf45*", "(# 0 - g '\\ud808\\udf45')", true);
     330             :   CheckParseEq("[\\ud808\\udf45-\\ud809\\udccc]", "[\\u{012345}-\\u{0124cc}]",
     331          12 :                true);
     332             : 
     333          24 :   CHECK_SIMPLE("", false);
     334          12 :   CHECK_SIMPLE("a", true);
     335          24 :   CHECK_SIMPLE("a|b", false);
     336          24 :   CHECK_SIMPLE("a\\n", false);
     337          24 :   CHECK_SIMPLE("^a", false);
     338          24 :   CHECK_SIMPLE("a$", false);
     339          24 :   CHECK_SIMPLE("a\\b!", false);
     340          24 :   CHECK_SIMPLE("a\\Bb", false);
     341          24 :   CHECK_SIMPLE("a*", false);
     342          24 :   CHECK_SIMPLE("a*?", false);
     343          24 :   CHECK_SIMPLE("a?", false);
     344          24 :   CHECK_SIMPLE("a??", false);
     345          24 :   CHECK_SIMPLE("a{0,1}?", false);
     346          24 :   CHECK_SIMPLE("a{1,1}?", false);
     347          24 :   CHECK_SIMPLE("a{1,2}?", false);
     348          24 :   CHECK_SIMPLE("a+?", false);
     349          24 :   CHECK_SIMPLE("(a)", false);
     350          24 :   CHECK_SIMPLE("(a)\\1", false);
     351          24 :   CHECK_SIMPLE("(\\1a)", false);
     352          24 :   CHECK_SIMPLE("\\1(a)", false);
     353          24 :   CHECK_SIMPLE("a\\s", false);
     354          24 :   CHECK_SIMPLE("a\\S", false);
     355          24 :   CHECK_SIMPLE("a\\d", false);
     356          24 :   CHECK_SIMPLE("a\\D", false);
     357          24 :   CHECK_SIMPLE("a\\w", false);
     358          24 :   CHECK_SIMPLE("a\\W", false);
     359          24 :   CHECK_SIMPLE("a.", false);
     360          24 :   CHECK_SIMPLE("a\\q", false);
     361          24 :   CHECK_SIMPLE("a[a]", false);
     362          24 :   CHECK_SIMPLE("a[^a]", false);
     363          24 :   CHECK_SIMPLE("a[a-z]", false);
     364          24 :   CHECK_SIMPLE("a[\\q]", false);
     365          24 :   CHECK_SIMPLE("a(?:b)", false);
     366          24 :   CHECK_SIMPLE("a(?=b)", false);
     367          24 :   CHECK_SIMPLE("a(?!b)", false);
     368          24 :   CHECK_SIMPLE("\\x60", false);
     369          24 :   CHECK_SIMPLE("\\u0060", false);
     370          24 :   CHECK_SIMPLE("\\cA", false);
     371          24 :   CHECK_SIMPLE("\\q", false);
     372          24 :   CHECK_SIMPLE("\\1112", false);
     373          24 :   CHECK_SIMPLE("\\0", false);
     374          24 :   CHECK_SIMPLE("(a)\\1", false);
     375          24 :   CHECK_SIMPLE("(?=a)?a", false);
     376          24 :   CHECK_SIMPLE("(?!a)?a\\1", false);
     377          24 :   CHECK_SIMPLE("(?:(?=a))a\\1", false);
     378             : 
     379          12 :   CheckParseEq("a{}", "'a{}'");
     380          12 :   CheckParseEq("a{,}", "'a{,}'");
     381          12 :   CheckParseEq("a{", "'a{'");
     382          12 :   CheckParseEq("a{z}", "'a{z}'");
     383          12 :   CheckParseEq("a{1z}", "'a{1z}'");
     384          12 :   CheckParseEq("a{12z}", "'a{12z}'");
     385          12 :   CheckParseEq("a{12,", "'a{12,'");
     386          12 :   CheckParseEq("a{12,3b", "'a{12,3b'");
     387          12 :   CheckParseEq("{}", "'{}'");
     388          12 :   CheckParseEq("{,}", "'{,}'");
     389          12 :   CheckParseEq("{", "'{'");
     390          12 :   CheckParseEq("{z}", "'{z}'");
     391          12 :   CheckParseEq("{1z}", "'{1z}'");
     392          12 :   CheckParseEq("{12z}", "'{12z}'");
     393          12 :   CheckParseEq("{12,", "'{12,'");
     394          12 :   CheckParseEq("{12,3b", "'{12,3b'");
     395             : 
     396          12 :   CHECK_MIN_MAX("a", 1, 1);
     397          12 :   CHECK_MIN_MAX("abc", 3, 3);
     398          12 :   CHECK_MIN_MAX("a[bc]d", 3, 3);
     399          12 :   CHECK_MIN_MAX("a|bc", 1, 2);
     400          12 :   CHECK_MIN_MAX("ab|c", 1, 2);
     401          12 :   CHECK_MIN_MAX("a||bc", 0, 2);
     402          12 :   CHECK_MIN_MAX("|", 0, 0);
     403          12 :   CHECK_MIN_MAX("(?:ab)", 2, 2);
     404          12 :   CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
     405          12 :   CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
     406          12 :   CHECK_MIN_MAX("(ab)", 2, 2);
     407          12 :   CHECK_MIN_MAX("(ab|cde)", 2, 3);
     408          12 :   CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
     409          12 :   CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
     410          12 :   CHECK_MIN_MAX("(?:ab)?", 0, 2);
     411          12 :   CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
     412          12 :   CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
     413          12 :   CHECK_MIN_MAX("a?", 0, 1);
     414          12 :   CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
     415          12 :   CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
     416          12 :   CHECK_MIN_MAX("a??", 0, 1);
     417          12 :   CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
     418          12 :   CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
     419          12 :   CHECK_MIN_MAX("(?:a?)?", 0, 1);
     420          12 :   CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
     421          12 :   CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
     422          12 :   CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
     423          12 :   CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
     424          12 :   CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
     425          12 :   CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
     426          12 :   CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
     427          12 :   CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
     428          12 :   CHECK_MIN_MAX("a{0}", 0, 0);
     429          12 :   CHECK_MIN_MAX("(?:a+){0}", 0, 0);
     430          12 :   CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
     431          12 :   CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
     432          12 :   CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
     433          12 :   CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
     434          12 :   CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
     435          12 :   CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
     436          12 :   CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
     437          12 :   CHECK_MIN_MAX("a\\bc", 2, 2);
     438          12 :   CHECK_MIN_MAX("a\\Bc", 2, 2);
     439          12 :   CHECK_MIN_MAX("a\\sc", 3, 3);
     440          12 :   CHECK_MIN_MAX("a\\Sc", 3, 3);
     441          12 :   CHECK_MIN_MAX("a(?=b)c", 2, 2);
     442          12 :   CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
     443          12 :   CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
     444             : 
     445          12 :   FLAG_harmony_regexp_named_captures = true;
     446             :   CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<a>",
     447          12 :                "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))", true);
     448             :   CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<b>",
     449          12 :                "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))", true);
     450             :   CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<c>",
     451          12 :                "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))", true);
     452          12 :   CheckParseEq("(?<a>a)\\k<a>", "(: (^ 'a') (<- 1))", true);
     453          12 :   CheckParseEq("(?<a>a\\k<a>)", "(^ 'a')", true);
     454          12 :   CheckParseEq("(?<a>\\k<a>a)", "(^ 'a')", true);
     455          12 :   CheckParseEq("(?<a>\\k<b>)(?<b>\\k<a>)", "(: (^ (<- 2)) (^ (<- 1)))", true);
     456          12 :   CheckParseEq("\\k<a>(?<a>a)", "(: (<- 1) (^ 'a'))", true);
     457             : 
     458          12 :   CheckParseEq("(?<\\u{03C0}>a)", "(^ 'a')", true);
     459          12 :   CheckParseEq("(?<\\u03C0>a)", "(^ 'a')", true);
     460          12 :   FLAG_harmony_regexp_named_captures = false;
     461          12 : }
     462             : 
     463             : 
     464       23724 : TEST(ParserWithLookbehind) {
     465           6 :   TestRegExpParser(true);  // Lookbehind enabled.
     466           6 : }
     467             : 
     468             : 
     469       23724 : TEST(ParserWithoutLookbehind) {
     470           6 :   TestRegExpParser(true);  // Lookbehind enabled.
     471           6 : }
     472             : 
     473       23724 : TEST(ParserRegression) {
     474           6 :   CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
     475           6 :   CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
     476           6 :   CheckParseEq("{", "'{'");
     477           6 :   CheckParseEq("a|", "(| 'a' %)");
     478           6 : }
     479             : 
     480         126 : static void ExpectError(const char* input, const char* expected,
     481             :                         bool unicode = false) {
     482         126 :   v8::HandleScope scope(CcTest::isolate());
     483         252 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     484         126 :   FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
     485             :   RegExpCompileData result;
     486             :   JSRegExp::Flags flags = JSRegExp::kNone;
     487         126 :   if (unicode) flags |= JSRegExp::kUnicode;
     488         126 :   CHECK(!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
     489             :                                                  &reader, flags, &result));
     490         126 :   CHECK_NULL(result.tree);
     491         126 :   CHECK(!result.error.is_null());
     492         126 :   std::unique_ptr<char[]> str = result.error->ToCString(ALLOW_NULLS);
     493         252 :   CHECK_EQ(0, strcmp(expected, str.get()));
     494         126 : }
     495             : 
     496             : 
     497       23724 : TEST(Errors) {
     498             :   const char* kEndBackslash = "\\ at end of pattern";
     499           6 :   ExpectError("\\", kEndBackslash);
     500             :   const char* kUnterminatedGroup = "Unterminated group";
     501           6 :   ExpectError("(foo", kUnterminatedGroup);
     502             :   const char* kInvalidGroup = "Invalid group";
     503           6 :   ExpectError("(?", kInvalidGroup);
     504             :   const char* kUnterminatedCharacterClass = "Unterminated character class";
     505           6 :   ExpectError("[", kUnterminatedCharacterClass);
     506           6 :   ExpectError("[a-", kUnterminatedCharacterClass);
     507             :   const char* kNothingToRepeat = "Nothing to repeat";
     508           6 :   ExpectError("*", kNothingToRepeat);
     509           6 :   ExpectError("?", kNothingToRepeat);
     510           6 :   ExpectError("+", kNothingToRepeat);
     511           6 :   ExpectError("{1}", kNothingToRepeat);
     512           6 :   ExpectError("{1,2}", kNothingToRepeat);
     513           6 :   ExpectError("{1,}", kNothingToRepeat);
     514             : 
     515             :   // Check that we don't allow more than kMaxCapture captures
     516             :   const int kMaxCaptures = 1 << 16;  // Must match RegExpParser::kMaxCaptures.
     517             :   const char* kTooManyCaptures = "Too many captures";
     518           6 :   std::ostringstream os;
     519      393228 :   for (int i = 0; i <= kMaxCaptures; i++) {
     520      393222 :     os << "()";
     521             :   }
     522          12 :   ExpectError(os.str().c_str(), kTooManyCaptures);
     523             : 
     524           6 :   FLAG_harmony_regexp_named_captures = true;
     525             :   const char* kInvalidCaptureName = "Invalid capture group name";
     526           6 :   ExpectError("(?<>.)", kInvalidCaptureName, true);
     527           6 :   ExpectError("(?<1>.)", kInvalidCaptureName, true);
     528           6 :   ExpectError("(?<_%>.)", kInvalidCaptureName, true);
     529           6 :   ExpectError("\\k<a", kInvalidCaptureName, true);
     530             :   const char* kDuplicateCaptureName = "Duplicate capture group name";
     531           6 :   ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
     532             :   const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
     533           6 :   ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
     534             :   const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
     535           6 :   ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
     536           6 :   ExpectError("(?<b>)\\k<a>", kInvalidCaptureReferenced, true);
     537             :   const char* kInvalidNamedReference = "Invalid named reference";
     538           6 :   ExpectError("\\ka", kInvalidNamedReference, true);
     539           6 :   FLAG_harmony_regexp_named_captures = false;
     540           6 : }
     541             : 
     542             : 
     543      393216 : static bool IsDigit(uc16 c) {
     544      786432 :   return ('0' <= c && c <= '9');
     545             : }
     546             : 
     547             : 
     548      393216 : static bool NotDigit(uc16 c) {
     549      393216 :   return !IsDigit(c);
     550             : }
     551             : 
     552             : 
     553      393216 : static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
     554             :   // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
     555             :   // WhiteSpace (7.2) and LineTerminator (7.3) values.
     556      786432 :   return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
     557             : }
     558             : 
     559             : 
     560      393216 : static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
     561      393216 :   return !IsWhiteSpaceOrLineTerminator(c);
     562             : }
     563             : 
     564             : 
     565      393216 : static bool NotWord(uc16 c) {
     566      393216 :   return !IsRegExpWord(c);
     567             : }
     568             : 
     569             : 
     570          42 : static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
     571          42 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     572    14978166 :   ZoneList<CharacterRange>* ranges =
     573          42 :       new(&zone) ZoneList<CharacterRange>(2, &zone);
     574          42 :   CharacterRange::AddClassEscape(c, ranges, &zone);
     575     2752554 :   for (uc32 i = 0; i < (1 << 16); i++) {
     576             :     bool in_class = false;
     577    28776660 :     for (int j = 0; !in_class && j < ranges->length(); j++) {
     578    26895372 :       CharacterRange& range = ranges->at(j);
     579    26895372 :       in_class = (range.from() <= i && i <= range.to());
     580             :     }
     581     2752512 :     CHECK_EQ(pred(i), in_class);
     582          42 :   }
     583          42 : }
     584             : 
     585             : 
     586       23724 : TEST(CharacterClassEscapes) {
     587           6 :   TestCharacterClassEscapes('.', IsRegExpNewline);
     588           6 :   TestCharacterClassEscapes('d', IsDigit);
     589           6 :   TestCharacterClassEscapes('D', NotDigit);
     590           6 :   TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
     591           6 :   TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
     592           6 :   TestCharacterClassEscapes('w', IsRegExpWord);
     593           6 :   TestCharacterClassEscapes('W', NotWord);
     594           6 : }
     595             : 
     596             : 
     597           6 : static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
     598             :                            bool is_one_byte, Zone* zone) {
     599             :   Isolate* isolate = CcTest::i_isolate();
     600           6 :   FlatStringReader reader(isolate, CStrVector(input));
     601             :   RegExpCompileData compile_data;
     602             :   JSRegExp::Flags flags = JSRegExp::kNone;
     603           6 :   if (multiline) flags = JSRegExp::kMultiline;
     604           6 :   if (unicode) flags = JSRegExp::kUnicode;
     605           6 :   if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
     606           6 :                                                &reader, flags, &compile_data))
     607             :     return nullptr;
     608             :   Handle<String> pattern = isolate->factory()
     609             :                                ->NewStringFromUtf8(CStrVector(input))
     610          12 :                                .ToHandleChecked();
     611             :   Handle<String> sample_subject =
     612          12 :       isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
     613             :   RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern,
     614           6 :                         sample_subject, is_one_byte);
     615           6 :   return compile_data.node;
     616             : }
     617             : 
     618             : 
     619           6 : static void Execute(const char* input, bool multiline, bool unicode,
     620             :                     bool is_one_byte, bool dot_output = false) {
     621           6 :   v8::HandleScope scope(CcTest::isolate());
     622          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     623           6 :   RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
     624           6 :   USE(node);
     625             : #ifdef DEBUG
     626             :   if (dot_output) {
     627             :     RegExpEngine::DotPrint(input, node, false);
     628             :   }
     629             : #endif  // DEBUG
     630           6 : }
     631             : 
     632             : 
     633             : class TestConfig {
     634             :  public:
     635             :   typedef int Key;
     636             :   typedef int Value;
     637             :   static const int kNoKey;
     638             :   static int NoValue() { return 0; }
     639             :   static inline int Compare(int a, int b) {
     640     5338566 :     if (a < b)
     641             :       return -1;
     642     2650086 :     else if (a > b)
     643             :       return 1;
     644             :     else
     645             :       return 0;
     646             :   }
     647             : };
     648             : 
     649             : 
     650             : const int TestConfig::kNoKey = 0;
     651             : 
     652             : 
     653             : static unsigned PseudoRandom(int i, int j) {
     654        7128 :   return ~(~((i * 781) ^ (j * 329)));
     655             : }
     656             : 
     657             : 
     658       23724 : TEST(SplayTreeSimple) {
     659             :   static const unsigned kLimit = 1000;
     660           6 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     661             :   ZoneSplayTree<TestConfig> tree(&zone);
     662             :   bool seen[kLimit];
     663           6 :   for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
     664             : #define CHECK_MAPS_EQUAL() do {                                      \
     665             :     for (unsigned k = 0; k < kLimit; k++)                            \
     666             :       CHECK_EQ(seen[k], tree.Find(k, &loc));                         \
     667             :   } while (false)
     668         300 :   for (int i = 0; i < 50; i++) {
     669         978 :     for (int j = 0; j < 50; j++) {
     670        1278 :       int next = PseudoRandom(i, j) % kLimit;
     671        1278 :       if (seen[next]) {
     672             :         // We've already seen this one.  Check the value and remove
     673             :         // it.
     674             :         ZoneSplayTree<TestConfig>::Locator loc;
     675         102 :         CHECK(tree.Find(next, &loc));
     676         102 :         CHECK_EQ(next, loc.key());
     677         102 :         CHECK_EQ(3 * next, loc.value());
     678         102 :         tree.Remove(next);
     679         102 :         seen[next] = false;
     680      102102 :         CHECK_MAPS_EQUAL();
     681             :       } else {
     682             :         // Check that it wasn't there already and then add it.
     683             :         ZoneSplayTree<TestConfig>::Locator loc;
     684        1176 :         CHECK(!tree.Find(next, &loc));
     685        1176 :         CHECK(tree.Insert(next, &loc));
     686        1176 :         CHECK_EQ(next, loc.key());
     687        1176 :         loc.set_value(3 * next);
     688        1176 :         seen[next] = true;
     689     1177176 :         CHECK_MAPS_EQUAL();
     690             :       }
     691        1278 :       int val = PseudoRandom(j, i) % kLimit;
     692        1278 :       if (seen[val]) {
     693             :         ZoneSplayTree<TestConfig>::Locator loc;
     694         162 :         CHECK(tree.FindGreatestLessThan(val, &loc));
     695         324 :         CHECK_EQ(loc.key(), val);
     696             :         break;
     697             :       }
     698        2232 :       val = PseudoRandom(i + j, i - j) % kLimit;
     699        1116 :       if (seen[val]) {
     700             :         ZoneSplayTree<TestConfig>::Locator loc;
     701         138 :         CHECK(tree.FindLeastGreaterThan(val, &loc));
     702         276 :         CHECK_EQ(loc.key(), val);
     703             :         break;
     704             :       }
     705             :     }
     706           6 :   }
     707           6 : }
     708             : 
     709             : 
     710       23724 : TEST(DispatchTableConstruction) {
     711             :   // Initialize test data.
     712             :   static const int kLimit = 1000;
     713             :   static const int kRangeCount = 8;
     714             :   static const int kRangeSize = 16;
     715             :   uc16 ranges[kRangeCount][2 * kRangeSize];
     716          54 :   for (int i = 0; i < kRangeCount; i++) {
     717          48 :     Vector<uc16> range(ranges[i], 2 * kRangeSize);
     718        1584 :     for (int j = 0; j < 2 * kRangeSize; j++) {
     719        4608 :       range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
     720             :     }
     721             :     range.Sort();
     722        1536 :     for (int j = 1; j < 2 * kRangeSize; j++) {
     723        4464 :       CHECK(range[j-1] <= range[j]);
     724             :     }
     725             :   }
     726             :   // Enter test data into dispatch table.
     727           6 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     728             :   DispatchTable table(&zone);
     729          54 :   for (int i = 0; i < kRangeCount; i++) {
     730          48 :     uc16* range = ranges[i];
     731         816 :     for (int j = 0; j < 2 * kRangeSize; j += 2)
     732         768 :       table.AddRange(CharacterRange::Range(range[j], range[j + 1]), i, &zone);
     733             :   }
     734             :   // Check that the table looks as we would expect
     735        6000 :   for (int p = 0; p < kLimit; p++) {
     736        6000 :     OutSet* outs = table.Get(p);
     737       54000 :     for (int j = 0; j < kRangeCount; j++) {
     738       48000 :       uc16* range = ranges[j];
     739             :       bool is_on = false;
     740      622320 :       for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
     741      574320 :         is_on = (range[k] <= p && p <= range[k + 1]);
     742       48000 :       CHECK_EQ(is_on, outs->Get(j));
     743             :     }
     744           6 :   }
     745           6 : }
     746             : 
     747             : 
     748             : // Test of debug-only syntax.
     749             : #ifdef DEBUG
     750             : 
     751             : TEST(ParsePossessiveRepetition) {
     752             :   bool old_flag_value = FLAG_regexp_possessive_quantifier;
     753             : 
     754             :   // Enable possessive quantifier syntax.
     755             :   FLAG_regexp_possessive_quantifier = true;
     756             : 
     757             :   CheckParseEq("a*+", "(# 0 - p 'a')");
     758             :   CheckParseEq("a++", "(# 1 - p 'a')");
     759             :   CheckParseEq("a?+", "(# 0 1 p 'a')");
     760             :   CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
     761             :   CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
     762             : 
     763             :   // Disable possessive quantifier syntax.
     764             :   FLAG_regexp_possessive_quantifier = false;
     765             : 
     766             :   CHECK_PARSE_ERROR("a*+");
     767             :   CHECK_PARSE_ERROR("a++");
     768             :   CHECK_PARSE_ERROR("a?+");
     769             :   CHECK_PARSE_ERROR("a{10,20}+");
     770             :   CHECK_PARSE_ERROR("a{10,20}+b");
     771             : 
     772             :   FLAG_regexp_possessive_quantifier = old_flag_value;
     773             : }
     774             : 
     775             : #endif
     776             : 
     777             : // Tests of interpreter.
     778             : 
     779             : 
     780             : #ifndef V8_INTERPRETED_REGEXP
     781             : 
     782             : #if V8_TARGET_ARCH_IA32
     783             : typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
     784             : #elif V8_TARGET_ARCH_X64
     785             : typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
     786             : #elif V8_TARGET_ARCH_ARM
     787             : typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
     788             : #elif V8_TARGET_ARCH_ARM64
     789             : typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
     790             : #elif V8_TARGET_ARCH_S390
     791             : typedef RegExpMacroAssemblerS390 ArchRegExpMacroAssembler;
     792             : #elif V8_TARGET_ARCH_PPC
     793             : typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
     794             : #elif V8_TARGET_ARCH_MIPS
     795             : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
     796             : #elif V8_TARGET_ARCH_MIPS64
     797             : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
     798             : #elif V8_TARGET_ARCH_X87
     799             : typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
     800             : #endif
     801             : 
     802             : class ContextInitializer {
     803             :  public:
     804          66 :   ContextInitializer()
     805             :       : scope_(CcTest::isolate()),
     806         132 :         env_(v8::Context::New(CcTest::isolate())) {
     807          66 :     env_->Enter();
     808          66 :   }
     809          66 :   ~ContextInitializer() {
     810          66 :     env_->Exit();
     811             :   }
     812             :  private:
     813             :   v8::HandleScope scope_;
     814             :   v8::Local<v8::Context> env_;
     815             : };
     816             : 
     817             : 
     818          84 : static ArchRegExpMacroAssembler::Result Execute(Code* code,
     819             :                                                 String* input,
     820             :                                                 int start_offset,
     821             :                                                 const byte* input_start,
     822             :                                                 const byte* input_end,
     823             :                                                 int* captures) {
     824             :   return NativeRegExpMacroAssembler::Execute(
     825             :       code,
     826             :       input,
     827             :       start_offset,
     828             :       input_start,
     829             :       input_end,
     830             :       captures,
     831             :       0,
     832          84 :       CcTest::i_isolate());
     833             : }
     834             : 
     835             : 
     836       23724 : TEST(MacroAssemblerNativeSuccess) {
     837           6 :   v8::V8::Initialize();
     838           6 :   ContextInitializer initializer;
     839             :   Isolate* isolate = CcTest::i_isolate();
     840             :   Factory* factory = isolate->factory();
     841          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     842             : 
     843             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
     844          12 :                              4);
     845             : 
     846           6 :   m.Succeed();
     847             : 
     848           6 :   Handle<String> source = factory->NewStringFromStaticChars("");
     849           6 :   Handle<Object> code_object = m.GetCode(source);
     850             :   Handle<Code> code = Handle<Code>::cast(code_object);
     851             : 
     852           6 :   int captures[4] = {42, 37, 87, 117};
     853           6 :   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
     854             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
     855             :   const byte* start_adr =
     856           6 :       reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
     857             : 
     858             :   NativeRegExpMacroAssembler::Result result =
     859             :       Execute(*code,
     860             :               *input,
     861             :               0,
     862             :               start_adr,
     863             :               start_adr + seq_input->length(),
     864          12 :               captures);
     865             : 
     866           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
     867           6 :   CHECK_EQ(-1, captures[0]);
     868           6 :   CHECK_EQ(-1, captures[1]);
     869           6 :   CHECK_EQ(-1, captures[2]);
     870           6 :   CHECK_EQ(-1, captures[3]);
     871           6 : }
     872             : 
     873             : 
     874       23724 : TEST(MacroAssemblerNativeSimple) {
     875           6 :   v8::V8::Initialize();
     876           6 :   ContextInitializer initializer;
     877             :   Isolate* isolate = CcTest::i_isolate();
     878             :   Factory* factory = isolate->factory();
     879          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     880             : 
     881             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
     882          12 :                              4);
     883             : 
     884             :   Label fail, backtrack;
     885           6 :   m.PushBacktrack(&fail);
     886           6 :   m.CheckNotAtStart(0, nullptr);
     887           6 :   m.LoadCurrentCharacter(2, nullptr);
     888           6 :   m.CheckNotCharacter('o', nullptr);
     889           6 :   m.LoadCurrentCharacter(1, nullptr, false);
     890           6 :   m.CheckNotCharacter('o', nullptr);
     891           6 :   m.LoadCurrentCharacter(0, nullptr, false);
     892           6 :   m.CheckNotCharacter('f', nullptr);
     893           6 :   m.WriteCurrentPositionToRegister(0, 0);
     894           6 :   m.WriteCurrentPositionToRegister(1, 3);
     895           6 :   m.AdvanceCurrentPosition(3);
     896           6 :   m.PushBacktrack(&backtrack);
     897           6 :   m.Succeed();
     898           6 :   m.Bind(&backtrack);
     899           6 :   m.Backtrack();
     900           6 :   m.Bind(&fail);
     901           6 :   m.Fail();
     902             : 
     903           6 :   Handle<String> source = factory->NewStringFromStaticChars("^foo");
     904           6 :   Handle<Object> code_object = m.GetCode(source);
     905             :   Handle<Code> code = Handle<Code>::cast(code_object);
     906             : 
     907           6 :   int captures[4] = {42, 37, 87, 117};
     908           6 :   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
     909             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
     910           6 :   Address start_adr = seq_input->GetCharsAddress();
     911             : 
     912             :   NativeRegExpMacroAssembler::Result result =
     913             :       Execute(*code,
     914             :               *input,
     915             :               0,
     916             :               start_adr,
     917             :               start_adr + input->length(),
     918          12 :               captures);
     919             : 
     920           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
     921           6 :   CHECK_EQ(0, captures[0]);
     922           6 :   CHECK_EQ(3, captures[1]);
     923           6 :   CHECK_EQ(-1, captures[2]);
     924           6 :   CHECK_EQ(-1, captures[3]);
     925             : 
     926           6 :   input = factory->NewStringFromStaticChars("barbarbar");
     927             :   seq_input = Handle<SeqOneByteString>::cast(input);
     928           6 :   start_adr = seq_input->GetCharsAddress();
     929             : 
     930             :   result = Execute(*code,
     931             :                    *input,
     932             :                    0,
     933             :                    start_adr,
     934             :                    start_adr + input->length(),
     935          12 :                    captures);
     936             : 
     937           6 :   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
     938           6 : }
     939             : 
     940             : 
     941       23724 : TEST(MacroAssemblerNativeSimpleUC16) {
     942           6 :   v8::V8::Initialize();
     943           6 :   ContextInitializer initializer;
     944             :   Isolate* isolate = CcTest::i_isolate();
     945             :   Factory* factory = isolate->factory();
     946          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
     947             : 
     948             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
     949          12 :                              4);
     950             : 
     951             :   Label fail, backtrack;
     952           6 :   m.PushBacktrack(&fail);
     953           6 :   m.CheckNotAtStart(0, nullptr);
     954           6 :   m.LoadCurrentCharacter(2, nullptr);
     955           6 :   m.CheckNotCharacter('o', nullptr);
     956           6 :   m.LoadCurrentCharacter(1, nullptr, false);
     957           6 :   m.CheckNotCharacter('o', nullptr);
     958           6 :   m.LoadCurrentCharacter(0, nullptr, false);
     959           6 :   m.CheckNotCharacter('f', nullptr);
     960           6 :   m.WriteCurrentPositionToRegister(0, 0);
     961           6 :   m.WriteCurrentPositionToRegister(1, 3);
     962           6 :   m.AdvanceCurrentPosition(3);
     963           6 :   m.PushBacktrack(&backtrack);
     964           6 :   m.Succeed();
     965           6 :   m.Bind(&backtrack);
     966           6 :   m.Backtrack();
     967           6 :   m.Bind(&fail);
     968           6 :   m.Fail();
     969             : 
     970           6 :   Handle<String> source = factory->NewStringFromStaticChars("^foo");
     971           6 :   Handle<Object> code_object = m.GetCode(source);
     972             :   Handle<Code> code = Handle<Code>::cast(code_object);
     973             : 
     974           6 :   int captures[4] = {42, 37, 87, 117};
     975             :   const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
     976           6 :                               static_cast<uc16>(0x2603)};
     977             :   Handle<String> input = factory->NewStringFromTwoByte(
     978          12 :       Vector<const uc16>(input_data, 6)).ToHandleChecked();
     979             :   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
     980           6 :   Address start_adr = seq_input->GetCharsAddress();
     981             : 
     982             :   NativeRegExpMacroAssembler::Result result =
     983             :       Execute(*code,
     984             :               *input,
     985             :               0,
     986             :               start_adr,
     987             :               start_adr + input->length(),
     988          12 :               captures);
     989             : 
     990           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
     991           6 :   CHECK_EQ(0, captures[0]);
     992           6 :   CHECK_EQ(3, captures[1]);
     993           6 :   CHECK_EQ(-1, captures[2]);
     994           6 :   CHECK_EQ(-1, captures[3]);
     995             : 
     996             :   const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
     997           6 :                                static_cast<uc16>(0x2603)};
     998             :   input = factory->NewStringFromTwoByte(
     999          12 :       Vector<const uc16>(input_data2, 9)).ToHandleChecked();
    1000             :   seq_input = Handle<SeqTwoByteString>::cast(input);
    1001           6 :   start_adr = seq_input->GetCharsAddress();
    1002             : 
    1003             :   result = Execute(*code,
    1004             :                    *input,
    1005             :                    0,
    1006             :                    start_adr,
    1007           6 :                    start_adr + input->length() * 2,
    1008          12 :                    captures);
    1009             : 
    1010           6 :   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
    1011           6 : }
    1012             : 
    1013             : 
    1014       23724 : TEST(MacroAssemblerNativeBacktrack) {
    1015           6 :   v8::V8::Initialize();
    1016           6 :   ContextInitializer initializer;
    1017             :   Isolate* isolate = CcTest::i_isolate();
    1018             :   Factory* factory = isolate->factory();
    1019          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1020             : 
    1021             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1022          12 :                              0);
    1023             : 
    1024             :   Label fail;
    1025             :   Label backtrack;
    1026           6 :   m.LoadCurrentCharacter(10, &fail);
    1027           6 :   m.Succeed();
    1028           6 :   m.Bind(&fail);
    1029           6 :   m.PushBacktrack(&backtrack);
    1030           6 :   m.LoadCurrentCharacter(10, nullptr);
    1031           6 :   m.Succeed();
    1032           6 :   m.Bind(&backtrack);
    1033           6 :   m.Fail();
    1034             : 
    1035           6 :   Handle<String> source = factory->NewStringFromStaticChars("..........");
    1036           6 :   Handle<Object> code_object = m.GetCode(source);
    1037             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1038             : 
    1039           6 :   Handle<String> input = factory->NewStringFromStaticChars("foofoo");
    1040             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1041           6 :   Address start_adr = seq_input->GetCharsAddress();
    1042             : 
    1043             :   NativeRegExpMacroAssembler::Result result = Execute(
    1044          12 :       *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
    1045             : 
    1046           6 :   CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
    1047           6 : }
    1048             : 
    1049             : 
    1050       23724 : TEST(MacroAssemblerNativeBackReferenceLATIN1) {
    1051           6 :   v8::V8::Initialize();
    1052           6 :   ContextInitializer initializer;
    1053             :   Isolate* isolate = CcTest::i_isolate();
    1054             :   Factory* factory = isolate->factory();
    1055          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1056             : 
    1057             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1058          12 :                              4);
    1059             : 
    1060           6 :   m.WriteCurrentPositionToRegister(0, 0);
    1061           6 :   m.AdvanceCurrentPosition(2);
    1062           6 :   m.WriteCurrentPositionToRegister(1, 0);
    1063             :   Label nomatch;
    1064           6 :   m.CheckNotBackReference(0, false, &nomatch);
    1065           6 :   m.Fail();
    1066           6 :   m.Bind(&nomatch);
    1067           6 :   m.AdvanceCurrentPosition(2);
    1068             :   Label missing_match;
    1069           6 :   m.CheckNotBackReference(0, false, &missing_match);
    1070           6 :   m.WriteCurrentPositionToRegister(2, 0);
    1071           6 :   m.Succeed();
    1072           6 :   m.Bind(&missing_match);
    1073           6 :   m.Fail();
    1074             : 
    1075           6 :   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
    1076           6 :   Handle<Object> code_object = m.GetCode(source);
    1077             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1078             : 
    1079           6 :   Handle<String> input = factory->NewStringFromStaticChars("fooofo");
    1080             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1081           6 :   Address start_adr = seq_input->GetCharsAddress();
    1082             : 
    1083             :   int output[4];
    1084             :   NativeRegExpMacroAssembler::Result result =
    1085             :       Execute(*code,
    1086             :               *input,
    1087             :               0,
    1088             :               start_adr,
    1089             :               start_adr + input->length(),
    1090          12 :               output);
    1091             : 
    1092           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1093           6 :   CHECK_EQ(0, output[0]);
    1094           6 :   CHECK_EQ(2, output[1]);
    1095           6 :   CHECK_EQ(6, output[2]);
    1096           6 :   CHECK_EQ(-1, output[3]);
    1097           6 : }
    1098             : 
    1099             : 
    1100       23724 : TEST(MacroAssemblerNativeBackReferenceUC16) {
    1101           6 :   v8::V8::Initialize();
    1102           6 :   ContextInitializer initializer;
    1103             :   Isolate* isolate = CcTest::i_isolate();
    1104             :   Factory* factory = isolate->factory();
    1105          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1106             : 
    1107             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
    1108          12 :                              4);
    1109             : 
    1110           6 :   m.WriteCurrentPositionToRegister(0, 0);
    1111           6 :   m.AdvanceCurrentPosition(2);
    1112           6 :   m.WriteCurrentPositionToRegister(1, 0);
    1113             :   Label nomatch;
    1114           6 :   m.CheckNotBackReference(0, false, &nomatch);
    1115           6 :   m.Fail();
    1116           6 :   m.Bind(&nomatch);
    1117           6 :   m.AdvanceCurrentPosition(2);
    1118             :   Label missing_match;
    1119           6 :   m.CheckNotBackReference(0, false, &missing_match);
    1120           6 :   m.WriteCurrentPositionToRegister(2, 0);
    1121           6 :   m.Succeed();
    1122           6 :   m.Bind(&missing_match);
    1123           6 :   m.Fail();
    1124             : 
    1125           6 :   Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
    1126           6 :   Handle<Object> code_object = m.GetCode(source);
    1127             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1128             : 
    1129           6 :   const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
    1130             :   Handle<String> input = factory->NewStringFromTwoByte(
    1131          12 :       Vector<const uc16>(input_data, 6)).ToHandleChecked();
    1132             :   Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
    1133           6 :   Address start_adr = seq_input->GetCharsAddress();
    1134             : 
    1135             :   int output[4];
    1136             :   NativeRegExpMacroAssembler::Result result =
    1137             :       Execute(*code,
    1138             :               *input,
    1139             :               0,
    1140             :               start_adr,
    1141           6 :               start_adr + input->length() * 2,
    1142          12 :               output);
    1143             : 
    1144           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1145           6 :   CHECK_EQ(0, output[0]);
    1146           6 :   CHECK_EQ(2, output[1]);
    1147           6 :   CHECK_EQ(6, output[2]);
    1148           6 :   CHECK_EQ(-1, output[3]);
    1149           6 : }
    1150             : 
    1151             : 
    1152             : 
    1153       23724 : TEST(MacroAssemblernativeAtStart) {
    1154           6 :   v8::V8::Initialize();
    1155           6 :   ContextInitializer initializer;
    1156             :   Isolate* isolate = CcTest::i_isolate();
    1157             :   Factory* factory = isolate->factory();
    1158          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1159             : 
    1160             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1161          12 :                              0);
    1162             : 
    1163             :   Label not_at_start, newline, fail;
    1164           6 :   m.CheckNotAtStart(0, &not_at_start);
    1165             :   // Check that prevchar = '\n' and current = 'f'.
    1166           6 :   m.CheckCharacter('\n', &newline);
    1167           6 :   m.Bind(&fail);
    1168           6 :   m.Fail();
    1169           6 :   m.Bind(&newline);
    1170           6 :   m.LoadCurrentCharacter(0, &fail);
    1171           6 :   m.CheckNotCharacter('f', &fail);
    1172           6 :   m.Succeed();
    1173             : 
    1174           6 :   m.Bind(&not_at_start);
    1175             :   // Check that prevchar = 'o' and current = 'b'.
    1176             :   Label prevo;
    1177           6 :   m.CheckCharacter('o', &prevo);
    1178           6 :   m.Fail();
    1179           6 :   m.Bind(&prevo);
    1180           6 :   m.LoadCurrentCharacter(0, &fail);
    1181           6 :   m.CheckNotCharacter('b', &fail);
    1182           6 :   m.Succeed();
    1183             : 
    1184           6 :   Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
    1185           6 :   Handle<Object> code_object = m.GetCode(source);
    1186             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1187             : 
    1188           6 :   Handle<String> input = factory->NewStringFromStaticChars("foobar");
    1189             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1190           6 :   Address start_adr = seq_input->GetCharsAddress();
    1191             : 
    1192             :   NativeRegExpMacroAssembler::Result result = Execute(
    1193          12 :       *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
    1194             : 
    1195           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1196             : 
    1197             :   result = Execute(*code, *input, 3, start_adr + 3, start_adr + input->length(),
    1198          12 :                    nullptr);
    1199             : 
    1200           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1201           6 : }
    1202             : 
    1203             : 
    1204       23724 : TEST(MacroAssemblerNativeBackRefNoCase) {
    1205           6 :   v8::V8::Initialize();
    1206           6 :   ContextInitializer initializer;
    1207             :   Isolate* isolate = CcTest::i_isolate();
    1208             :   Factory* factory = isolate->factory();
    1209          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1210             : 
    1211             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1212          12 :                              4);
    1213             : 
    1214             :   Label fail, succ;
    1215             : 
    1216           6 :   m.WriteCurrentPositionToRegister(0, 0);
    1217           6 :   m.WriteCurrentPositionToRegister(2, 0);
    1218           6 :   m.AdvanceCurrentPosition(3);
    1219           6 :   m.WriteCurrentPositionToRegister(3, 0);
    1220           6 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail);  // Match "AbC".
    1221           6 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail);  // Match "ABC".
    1222             :   Label expected_fail;
    1223           6 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
    1224           6 :   m.Bind(&fail);
    1225           6 :   m.Fail();
    1226             : 
    1227           6 :   m.Bind(&expected_fail);
    1228           6 :   m.AdvanceCurrentPosition(3);  // Skip "xYz"
    1229           6 :   m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
    1230           6 :   m.Fail();
    1231             : 
    1232           6 :   m.Bind(&succ);
    1233           6 :   m.WriteCurrentPositionToRegister(1, 0);
    1234           6 :   m.Succeed();
    1235             : 
    1236             :   Handle<String> source =
    1237           6 :       factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
    1238           6 :   Handle<Object> code_object = m.GetCode(source);
    1239             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1240             : 
    1241           6 :   Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
    1242             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1243           6 :   Address start_adr = seq_input->GetCharsAddress();
    1244             : 
    1245             :   int output[4];
    1246             :   NativeRegExpMacroAssembler::Result result =
    1247             :       Execute(*code,
    1248             :               *input,
    1249             :               0,
    1250             :               start_adr,
    1251             :               start_adr + input->length(),
    1252          12 :               output);
    1253             : 
    1254           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1255           6 :   CHECK_EQ(0, output[0]);
    1256           6 :   CHECK_EQ(12, output[1]);
    1257           6 :   CHECK_EQ(0, output[2]);
    1258           6 :   CHECK_EQ(3, output[3]);
    1259           6 : }
    1260             : 
    1261             : 
    1262             : 
    1263       23724 : TEST(MacroAssemblerNativeRegisters) {
    1264           6 :   v8::V8::Initialize();
    1265           6 :   ContextInitializer initializer;
    1266             :   Isolate* isolate = CcTest::i_isolate();
    1267             :   Factory* factory = isolate->factory();
    1268          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1269             : 
    1270             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1271          12 :                              6);
    1272             : 
    1273           6 :   uc16 foo_chars[3] = {'f', 'o', 'o'};
    1274             :   Vector<const uc16> foo(foo_chars, 3);
    1275             : 
    1276             :   enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
    1277             :   Label fail;
    1278             :   Label backtrack;
    1279           6 :   m.WriteCurrentPositionToRegister(out1, 0);  // Output: [0]
    1280           6 :   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
    1281           6 :   m.PushBacktrack(&backtrack);
    1282           6 :   m.WriteStackPointerToRegister(sp);
    1283             :   // Fill stack and registers
    1284           6 :   m.AdvanceCurrentPosition(2);
    1285           6 :   m.WriteCurrentPositionToRegister(out1, 0);
    1286           6 :   m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
    1287           6 :   m.PushBacktrack(&fail);
    1288             :   // Drop backtrack stack frames.
    1289           6 :   m.ReadStackPointerFromRegister(sp);
    1290             :   // And take the first backtrack (to &backtrack)
    1291           6 :   m.Backtrack();
    1292             : 
    1293           6 :   m.PushCurrentPosition();
    1294           6 :   m.AdvanceCurrentPosition(2);
    1295           6 :   m.PopCurrentPosition();
    1296             : 
    1297           6 :   m.Bind(&backtrack);
    1298           6 :   m.PopRegister(out1);
    1299           6 :   m.ReadCurrentPositionFromRegister(out1);
    1300           6 :   m.AdvanceCurrentPosition(3);
    1301           6 :   m.WriteCurrentPositionToRegister(out2, 0);  // [0,3]
    1302             : 
    1303             :   Label loop;
    1304           6 :   m.SetRegister(loop_cnt, 0);  // loop counter
    1305           6 :   m.Bind(&loop);
    1306           6 :   m.AdvanceRegister(loop_cnt, 1);
    1307           6 :   m.AdvanceCurrentPosition(1);
    1308           6 :   m.IfRegisterLT(loop_cnt, 3, &loop);
    1309           6 :   m.WriteCurrentPositionToRegister(out3, 0);  // [0,3,6]
    1310             : 
    1311             :   Label loop2;
    1312           6 :   m.SetRegister(loop_cnt, 2);  // loop counter
    1313           6 :   m.Bind(&loop2);
    1314           6 :   m.AdvanceRegister(loop_cnt, -1);
    1315           6 :   m.AdvanceCurrentPosition(1);
    1316           6 :   m.IfRegisterGE(loop_cnt, 0, &loop2);
    1317           6 :   m.WriteCurrentPositionToRegister(out4, 0);  // [0,3,6,9]
    1318             : 
    1319             :   Label loop3;
    1320             :   Label exit_loop3;
    1321           6 :   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
    1322           6 :   m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
    1323           6 :   m.ReadCurrentPositionFromRegister(out3);
    1324           6 :   m.Bind(&loop3);
    1325           6 :   m.AdvanceCurrentPosition(1);
    1326           6 :   m.CheckGreedyLoop(&exit_loop3);
    1327           6 :   m.GoTo(&loop3);
    1328           6 :   m.Bind(&exit_loop3);
    1329           6 :   m.PopCurrentPosition();
    1330           6 :   m.WriteCurrentPositionToRegister(out5, 0);  // [0,3,6,9,9,-1]
    1331             : 
    1332           6 :   m.Succeed();
    1333             : 
    1334           6 :   m.Bind(&fail);
    1335           6 :   m.Fail();
    1336             : 
    1337           6 :   Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
    1338           6 :   Handle<Object> code_object = m.GetCode(source);
    1339             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1340             : 
    1341             :   // String long enough for test (content doesn't matter).
    1342           6 :   Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
    1343             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1344           6 :   Address start_adr = seq_input->GetCharsAddress();
    1345             : 
    1346             :   int output[6];
    1347             :   NativeRegExpMacroAssembler::Result result =
    1348             :       Execute(*code,
    1349             :               *input,
    1350             :               0,
    1351             :               start_adr,
    1352             :               start_adr + input->length(),
    1353          12 :               output);
    1354             : 
    1355           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1356           6 :   CHECK_EQ(0, output[0]);
    1357           6 :   CHECK_EQ(3, output[1]);
    1358           6 :   CHECK_EQ(6, output[2]);
    1359           6 :   CHECK_EQ(9, output[3]);
    1360           6 :   CHECK_EQ(9, output[4]);
    1361           6 :   CHECK_EQ(-1, output[5]);
    1362           6 : }
    1363             : 
    1364             : 
    1365       23724 : TEST(MacroAssemblerStackOverflow) {
    1366           6 :   v8::V8::Initialize();
    1367           6 :   ContextInitializer initializer;
    1368             :   Isolate* isolate = CcTest::i_isolate();
    1369             :   Factory* factory = isolate->factory();
    1370          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1371             : 
    1372             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1373          12 :                              0);
    1374             : 
    1375             :   Label loop;
    1376           6 :   m.Bind(&loop);
    1377           6 :   m.PushBacktrack(&loop);
    1378           6 :   m.GoTo(&loop);
    1379             : 
    1380             :   Handle<String> source =
    1381           6 :       factory->NewStringFromStaticChars("<stack overflow test>");
    1382           6 :   Handle<Object> code_object = m.GetCode(source);
    1383             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1384             : 
    1385             :   // String long enough for test (content doesn't matter).
    1386           6 :   Handle<String> input = factory->NewStringFromStaticChars("dummy");
    1387             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1388           6 :   Address start_adr = seq_input->GetCharsAddress();
    1389             : 
    1390             :   NativeRegExpMacroAssembler::Result result = Execute(
    1391          12 :       *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
    1392             : 
    1393           6 :   CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
    1394           6 :   CHECK(isolate->has_pending_exception());
    1395             :   isolate->clear_pending_exception();
    1396           6 : }
    1397             : 
    1398             : 
    1399       23724 : TEST(MacroAssemblerNativeLotsOfRegisters) {
    1400           6 :   v8::V8::Initialize();
    1401           6 :   ContextInitializer initializer;
    1402             :   Isolate* isolate = CcTest::i_isolate();
    1403             :   Factory* factory = isolate->factory();
    1404          12 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1405             : 
    1406             :   ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
    1407          12 :                              2);
    1408             : 
    1409             :   // At least 2048, to ensure the allocated space for registers
    1410             :   // span one full page.
    1411             :   const int large_number = 8000;
    1412           6 :   m.WriteCurrentPositionToRegister(large_number, 42);
    1413           6 :   m.WriteCurrentPositionToRegister(0, 0);
    1414           6 :   m.WriteCurrentPositionToRegister(1, 1);
    1415             :   Label done;
    1416           6 :   m.CheckNotBackReference(0, false, &done);  // Performs a system-stack push.
    1417           6 :   m.Bind(&done);
    1418           6 :   m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
    1419           6 :   m.PopRegister(1);
    1420           6 :   m.Succeed();
    1421             : 
    1422             :   Handle<String> source =
    1423           6 :       factory->NewStringFromStaticChars("<huge register space test>");
    1424           6 :   Handle<Object> code_object = m.GetCode(source);
    1425             :   Handle<Code> code = Handle<Code>::cast(code_object);
    1426             : 
    1427             :   // String long enough for test (content doesn't matter).
    1428           6 :   Handle<String> input = factory->NewStringFromStaticChars("sample text");
    1429             :   Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
    1430           6 :   Address start_adr = seq_input->GetCharsAddress();
    1431             : 
    1432             :   int captures[2];
    1433             :   NativeRegExpMacroAssembler::Result result =
    1434             :       Execute(*code,
    1435             :               *input,
    1436             :               0,
    1437             :               start_adr,
    1438             :               start_adr + input->length(),
    1439          12 :               captures);
    1440             : 
    1441           6 :   CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
    1442           6 :   CHECK_EQ(0, captures[0]);
    1443           6 :   CHECK_EQ(42, captures[1]);
    1444             : 
    1445             :   isolate->clear_pending_exception();
    1446           6 : }
    1447             : 
    1448             : #else  // V8_INTERPRETED_REGEXP
    1449             : 
    1450             : TEST(MacroAssembler) {
    1451             :   byte codes[1024];
    1452             :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1453             :   RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024),
    1454             :                                  &zone);
    1455             :   // ^f(o)o.
    1456             :   Label start, fail, backtrack;
    1457             : 
    1458             :   m.SetRegister(4, 42);
    1459             :   m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
    1460             :   m.AdvanceRegister(4, 42);
    1461             :   m.GoTo(&start);
    1462             :   m.Fail();
    1463             :   m.Bind(&start);
    1464             :   m.PushBacktrack(&fail);
    1465             :   m.CheckNotAtStart(0, nullptr);
    1466             :   m.LoadCurrentCharacter(0, nullptr);
    1467             :   m.CheckNotCharacter('f', nullptr);
    1468             :   m.LoadCurrentCharacter(1, nullptr);
    1469             :   m.CheckNotCharacter('o', nullptr);
    1470             :   m.LoadCurrentCharacter(2, nullptr);
    1471             :   m.CheckNotCharacter('o', nullptr);
    1472             :   m.WriteCurrentPositionToRegister(0, 0);
    1473             :   m.WriteCurrentPositionToRegister(1, 3);
    1474             :   m.WriteCurrentPositionToRegister(2, 1);
    1475             :   m.WriteCurrentPositionToRegister(3, 2);
    1476             :   m.AdvanceCurrentPosition(3);
    1477             :   m.PushBacktrack(&backtrack);
    1478             :   m.Succeed();
    1479             :   m.Bind(&backtrack);
    1480             :   m.ClearRegisters(2, 3);
    1481             :   m.Backtrack();
    1482             :   m.Bind(&fail);
    1483             :   m.PopRegister(0);
    1484             :   m.Fail();
    1485             : 
    1486             :   Isolate* isolate = CcTest::i_isolate();
    1487             :   Factory* factory = isolate->factory();
    1488             :   HandleScope scope(isolate);
    1489             : 
    1490             :   Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
    1491             :   Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
    1492             :   int captures[5];
    1493             : 
    1494             :   const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
    1495             :   Handle<String> f1_16 = factory->NewStringFromTwoByte(
    1496             :       Vector<const uc16>(str1, 6)).ToHandleChecked();
    1497             : 
    1498             :   CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
    1499             :   CHECK_EQ(0, captures[0]);
    1500             :   CHECK_EQ(3, captures[1]);
    1501             :   CHECK_EQ(1, captures[2]);
    1502             :   CHECK_EQ(2, captures[3]);
    1503             :   CHECK_EQ(84, captures[4]);
    1504             : 
    1505             :   const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
    1506             :   Handle<String> f2_16 = factory->NewStringFromTwoByte(
    1507             :       Vector<const uc16>(str2, 6)).ToHandleChecked();
    1508             : 
    1509             :   CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
    1510             :   CHECK_EQ(42, captures[0]);
    1511             : }
    1512             : 
    1513             : #endif  // V8_INTERPRETED_REGEXP
    1514             : 
    1515             : 
    1516       23724 : TEST(AddInverseToTable) {
    1517             :   static const int kLimit = 1000;
    1518             :   static const int kRangeCount = 16;
    1519          66 :   for (int t = 0; t < 10; t++) {
    1520          60 :     Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1521             :     ZoneList<CharacterRange>* ranges =
    1522          60 :         new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
    1523        1020 :     for (int i = 0; i < kRangeCount; i++) {
    1524        1920 :       int from = PseudoRandom(t + 87, i + 25) % kLimit;
    1525        1920 :       int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
    1526         960 :       if (to > kLimit) to = kLimit;
    1527         960 :       ranges->Add(CharacterRange::Range(from, to), &zone);
    1528             :     }
    1529             :     DispatchTable table(&zone);
    1530             :     DispatchTableConstructor cons(&table, false, &zone);
    1531             :     cons.set_choice_index(0);
    1532          60 :     cons.AddInverse(ranges);
    1533       60060 :     for (int i = 0; i < kLimit; i++) {
    1534             :       bool is_on = false;
    1535      810738 :       for (int j = 0; !is_on && j < kRangeCount; j++)
    1536             :         is_on = ranges->at(j).Contains(i);
    1537       60000 :       OutSet* set = table.Get(i);
    1538       60000 :       CHECK_EQ(is_on, set->Get(0) == false);
    1539             :     }
    1540          60 :   }
    1541           6 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1542             :   ZoneList<CharacterRange>* ranges =
    1543           6 :       new(&zone) ZoneList<CharacterRange>(1, &zone);
    1544           6 :   ranges->Add(CharacterRange::Range(0xFFF0, 0xFFFE), &zone);
    1545             :   DispatchTable table(&zone);
    1546             :   DispatchTableConstructor cons(&table, false, &zone);
    1547             :   cons.set_choice_index(0);
    1548           6 :   cons.AddInverse(ranges);
    1549           6 :   CHECK(!table.Get(0xFFFE)->Get(0));
    1550          12 :   CHECK(table.Get(0xFFFF)->Get(0));
    1551           6 : }
    1552             : 
    1553             : 
    1554    12582456 : static uc32 canonicalize(uc32 c) {
    1555             :   unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
    1556    12582456 :   int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, nullptr);
    1557    12582456 :   if (count == 0) {
    1558             :     return c;
    1559             :   } else {
    1560        5994 :     CHECK_EQ(1, count);
    1561        5994 :     return canon[0];
    1562             :   }
    1563             : }
    1564             : 
    1565             : 
    1566       23724 : TEST(LatinCanonicalize) {
    1567             :   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
    1568         156 :   for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
    1569         156 :     unibrow::uchar upper = lower + ('A' - 'a');
    1570         156 :     CHECK_EQ(canonicalize(lower), canonicalize(upper));
    1571             :     unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1572         156 :     int length = un_canonicalize.get(lower, '\0', uncanon);
    1573         156 :     CHECK_EQ(2, length);
    1574         156 :     CHECK_EQ(upper, uncanon[0]);
    1575         156 :     CHECK_EQ(lower, uncanon[1]);
    1576             :   }
    1577    12582144 :   for (uc32 c = 128; c < (1 << 21); c++)
    1578    12582144 :     CHECK_GE(canonicalize(c), 128);
    1579             : #ifndef V8_INTL_SUPPORT
    1580             :   unibrow::Mapping<unibrow::ToUppercase> to_upper;
    1581             :   // Canonicalization is only defined for the Basic Multilingual Plane.
    1582             :   for (uc32 c = 0; c < (1 << 16); c++) {
    1583             :     unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
    1584             :     int length = to_upper.get(c, '\0', upper);
    1585             :     if (length == 0) {
    1586             :       length = 1;
    1587             :       upper[0] = c;
    1588             :     }
    1589             :     uc32 u = upper[0];
    1590             :     if (length > 1 || (c >= 128 && u < 128))
    1591             :       u = c;
    1592             :     CHECK_EQ(u, canonicalize(c));
    1593             :   }
    1594             : #endif
    1595           6 : }
    1596             : 
    1597             : 
    1598      389196 : static uc32 CanonRangeEnd(uc32 c) {
    1599             :   unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
    1600      389196 :   int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, nullptr);
    1601      389196 :   if (count == 0) {
    1602             :     return c;
    1603             :   } else {
    1604         264 :     CHECK_EQ(1, count);
    1605         264 :     return canon[0];
    1606             :   }
    1607             : }
    1608             : 
    1609             : 
    1610       23724 : TEST(RangeCanonicalization) {
    1611             :   // Check that we arrive at the same result when using the basic
    1612             :   // range canonicalization primitives as when using immediate
    1613             :   // canonicalization.
    1614             :   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
    1615             :   int block_start = 0;
    1616      389202 :   while (block_start <= 0xFFFF) {
    1617      389196 :     uc32 block_end = CanonRangeEnd(block_start);
    1618      389196 :     unsigned block_length = block_end - block_start + 1;
    1619      389196 :     if (block_length > 1) {
    1620             :       unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1621         264 :       int first_length = un_canonicalize.get(block_start, '\0', first);
    1622        4284 :       for (unsigned i = 1; i < block_length; i++) {
    1623             :         unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1624        4020 :         int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
    1625        4020 :         CHECK_EQ(first_length, succ_length);
    1626        8040 :         for (int j = 0; j < succ_length; j++) {
    1627        8040 :           int calc = first[j] + i;
    1628        8040 :           int found = succ[j];
    1629        8040 :           CHECK_EQ(calc, found);
    1630             :         }
    1631             :       }
    1632             :     }
    1633      389196 :     block_start = block_start + block_length;
    1634             :   }
    1635           6 : }
    1636             : 
    1637             : 
    1638       23724 : TEST(UncanonicalizeEquivalence) {
    1639             :   unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
    1640             :   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1641      393216 :   for (int i = 0; i < (1 << 16); i++) {
    1642      393216 :     int length = un_canonicalize.get(i, '\0', chars);
    1643      417300 :     for (int j = 0; j < length; j++) {
    1644             :       unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
    1645       24084 :       int length2 = un_canonicalize.get(chars[j], '\0', chars2);
    1646       24084 :       CHECK_EQ(length, length2);
    1647       49116 :       for (int k = 0; k < length; k++)
    1648       49116 :         CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
    1649             :     }
    1650             :   }
    1651           6 : }
    1652             : 
    1653             : 
    1654          66 : static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
    1655             :                                       Vector<CharacterRange> expected) {
    1656          66 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1657          66 :   int count = expected.length();
    1658          66 :   ZoneList<CharacterRange>* list =
    1659          66 :       new(&zone) ZoneList<CharacterRange>(count, &zone);
    1660          66 :   list->Add(input, &zone);
    1661          66 :   CharacterRange::AddCaseEquivalents(isolate, &zone, list, false);
    1662          66 :   list->Remove(0);  // Remove the input before checking results.
    1663          66 :   CHECK_EQ(count, list->length());
    1664          66 :   for (int i = 0; i < list->length(); i++) {
    1665         132 :     CHECK_EQ(expected[i].from(), list->at(i).from());
    1666          66 :     CHECK_EQ(expected[i].to(), list->at(i).to());
    1667          66 :   }
    1668          66 : }
    1669             : 
    1670             : 
    1671          66 : static void TestSimpleRangeCaseIndependence(Isolate* isolate,
    1672             :                                             CharacterRange input,
    1673             :                                             CharacterRange expected) {
    1674             :   EmbeddedVector<CharacterRange, 1> vector;
    1675          66 :   vector[0] = expected;
    1676          66 :   TestRangeCaseIndependence(isolate, input, vector);
    1677          66 : }
    1678             : 
    1679             : 
    1680       23724 : TEST(CharacterRangeCaseIndependence) {
    1681             :   Isolate* isolate = CcTest::i_isolate();
    1682             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
    1683           6 :                                   CharacterRange::Singleton('A'));
    1684             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
    1685           6 :                                   CharacterRange::Singleton('Z'));
    1686             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'),
    1687           6 :                                   CharacterRange::Range('A', 'Z'));
    1688             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'),
    1689           6 :                                   CharacterRange::Range('C', 'F'));
    1690             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'),
    1691           6 :                                   CharacterRange::Range('A', 'B'));
    1692             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'),
    1693           6 :                                   CharacterRange::Range('Y', 'Z'));
    1694             :   TestSimpleRangeCaseIndependence(isolate,
    1695             :                                   CharacterRange::Range('a' - 1, 'z' + 1),
    1696           6 :                                   CharacterRange::Range('A', 'Z'));
    1697             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'),
    1698           6 :                                   CharacterRange::Range('a', 'z'));
    1699             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'),
    1700           6 :                                   CharacterRange::Range('c', 'f'));
    1701             :   TestSimpleRangeCaseIndependence(isolate,
    1702             :                                   CharacterRange::Range('A' - 1, 'Z' + 1),
    1703           6 :                                   CharacterRange::Range('a', 'z'));
    1704             :   // Here we need to add [l-z] to complete the case independence of
    1705             :   // [A-Za-z] but we expect [a-z] to be added since we always add a
    1706             :   // whole block at a time.
    1707             :   TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'),
    1708           6 :                                   CharacterRange::Range('a', 'z'));
    1709           6 : }
    1710             : 
    1711             : 
    1712    80166630 : static bool InClass(uc32 c, ZoneList<CharacterRange>* ranges) {
    1713    26738592 :   if (ranges == nullptr) return false;
    1714    80117484 :   for (int i = 0; i < ranges->length(); i++) {
    1715    33374094 :     CharacterRange range = ranges->at(i);
    1716    33374094 :     if (range.from() <= c && c <= range.to())
    1717             :       return true;
    1718             :   }
    1719             :   return false;
    1720             : }
    1721             : 
    1722             : 
    1723       23724 : TEST(UnicodeRangeSplitter) {
    1724           6 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1725             :   ZoneList<CharacterRange>* base =
    1726           6 :       new(&zone) ZoneList<CharacterRange>(1, &zone);
    1727           6 :   base->Add(CharacterRange::Everything(), &zone);
    1728           6 :   UnicodeRangeSplitter splitter(&zone, base);
    1729             :   // BMP
    1730      331782 :   for (uc32 c = 0; c < 0xd800; c++) {
    1731      331776 :     CHECK(InClass(c, splitter.bmp()));
    1732      331776 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1733      331776 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1734      331776 :     CHECK(!InClass(c, splitter.non_bmp()));
    1735             :   }
    1736             :   // Lead surrogates
    1737        6138 :   for (uc32 c = 0xd800; c < 0xdbff; c++) {
    1738        6138 :     CHECK(!InClass(c, splitter.bmp()));
    1739        6138 :     CHECK(InClass(c, splitter.lead_surrogates()));
    1740        6138 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1741        6138 :     CHECK(!InClass(c, splitter.non_bmp()));
    1742             :   }
    1743             :   // Trail surrogates
    1744        6138 :   for (uc32 c = 0xdc00; c < 0xdfff; c++) {
    1745        6138 :     CHECK(!InClass(c, splitter.bmp()));
    1746        6138 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1747        6138 :     CHECK(InClass(c, splitter.trail_surrogates()));
    1748        6138 :     CHECK(!InClass(c, splitter.non_bmp()));
    1749             :   }
    1750             :   // BMP
    1751       49146 :   for (uc32 c = 0xe000; c < 0xffff; c++) {
    1752       49146 :     CHECK(InClass(c, splitter.bmp()));
    1753       49146 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1754       49146 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1755       49146 :     CHECK(!InClass(c, splitter.non_bmp()));
    1756             :   }
    1757             :   // Non-BMP
    1758     6291450 :   for (uc32 c = 0x10000; c < 0x10ffff; c++) {
    1759     6291450 :     CHECK(!InClass(c, splitter.bmp()));
    1760     6291450 :     CHECK(!InClass(c, splitter.lead_surrogates()));
    1761     6291450 :     CHECK(!InClass(c, splitter.trail_surrogates()));
    1762     6291450 :     CHECK(InClass(c, splitter.non_bmp()));
    1763           6 :   }
    1764           6 : }
    1765             : 
    1766             : 
    1767       23724 : TEST(CanonicalizeCharacterSets) {
    1768           6 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1769          24 :   ZoneList<CharacterRange>* list =
    1770           6 :       new(&zone) ZoneList<CharacterRange>(4, &zone);
    1771             :   CharacterSet set(list);
    1772             : 
    1773           6 :   list->Add(CharacterRange::Range(10, 20), &zone);
    1774           6 :   list->Add(CharacterRange::Range(30, 40), &zone);
    1775           6 :   list->Add(CharacterRange::Range(50, 60), &zone);
    1776           6 :   set.Canonicalize();
    1777           6 :   CHECK_EQ(3, list->length());
    1778           6 :   CHECK_EQ(10, list->at(0).from());
    1779           6 :   CHECK_EQ(20, list->at(0).to());
    1780           6 :   CHECK_EQ(30, list->at(1).from());
    1781           6 :   CHECK_EQ(40, list->at(1).to());
    1782           6 :   CHECK_EQ(50, list->at(2).from());
    1783           6 :   CHECK_EQ(60, list->at(2).to());
    1784             : 
    1785             :   list->Rewind(0);
    1786           6 :   list->Add(CharacterRange::Range(10, 20), &zone);
    1787           6 :   list->Add(CharacterRange::Range(50, 60), &zone);
    1788           6 :   list->Add(CharacterRange::Range(30, 40), &zone);
    1789           6 :   set.Canonicalize();
    1790           6 :   CHECK_EQ(3, list->length());
    1791           6 :   CHECK_EQ(10, list->at(0).from());
    1792           6 :   CHECK_EQ(20, list->at(0).to());
    1793           6 :   CHECK_EQ(30, list->at(1).from());
    1794           6 :   CHECK_EQ(40, list->at(1).to());
    1795           6 :   CHECK_EQ(50, list->at(2).from());
    1796           6 :   CHECK_EQ(60, list->at(2).to());
    1797             : 
    1798             :   list->Rewind(0);
    1799           6 :   list->Add(CharacterRange::Range(30, 40), &zone);
    1800           6 :   list->Add(CharacterRange::Range(10, 20), &zone);
    1801           6 :   list->Add(CharacterRange::Range(25, 25), &zone);
    1802           6 :   list->Add(CharacterRange::Range(100, 100), &zone);
    1803           6 :   list->Add(CharacterRange::Range(1, 1), &zone);
    1804           6 :   set.Canonicalize();
    1805           6 :   CHECK_EQ(5, list->length());
    1806           6 :   CHECK_EQ(1, list->at(0).from());
    1807           6 :   CHECK_EQ(1, list->at(0).to());
    1808           6 :   CHECK_EQ(10, list->at(1).from());
    1809           6 :   CHECK_EQ(20, list->at(1).to());
    1810           6 :   CHECK_EQ(25, list->at(2).from());
    1811           6 :   CHECK_EQ(25, list->at(2).to());
    1812           6 :   CHECK_EQ(30, list->at(3).from());
    1813           6 :   CHECK_EQ(40, list->at(3).to());
    1814           6 :   CHECK_EQ(100, list->at(4).from());
    1815           6 :   CHECK_EQ(100, list->at(4).to());
    1816             : 
    1817             :   list->Rewind(0);
    1818           6 :   list->Add(CharacterRange::Range(10, 19), &zone);
    1819           6 :   list->Add(CharacterRange::Range(21, 30), &zone);
    1820           6 :   list->Add(CharacterRange::Range(20, 20), &zone);
    1821           6 :   set.Canonicalize();
    1822           6 :   CHECK_EQ(1, list->length());
    1823           6 :   CHECK_EQ(10, list->at(0).from());
    1824           6 :   CHECK_EQ(30, list->at(0).to());
    1825           6 : }
    1826             : 
    1827             : 
    1828       23724 : TEST(CharacterRangeMerge) {
    1829           6 :   Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
    1830           6 :   ZoneList<CharacterRange> l1(4, &zone);
    1831           6 :   ZoneList<CharacterRange> l2(4, &zone);
    1832             :   // Create all combinations of intersections of ranges, both singletons and
    1833             :   // longer.
    1834             : 
    1835             :   int offset = 0;
    1836             : 
    1837             :   // The five kinds of singleton intersections:
    1838             :   //     X
    1839             :   //   Y      - outside before
    1840             :   //    Y     - outside touching start
    1841             :   //     Y    - overlap
    1842             :   //      Y   - outside touching end
    1843             :   //       Y  - outside after
    1844             : 
    1845          36 :   for (int i = 0; i < 5; i++) {
    1846          30 :     l1.Add(CharacterRange::Singleton(offset + 2), &zone);
    1847          30 :     l2.Add(CharacterRange::Singleton(offset + i), &zone);
    1848          30 :     offset += 6;
    1849             :   }
    1850             : 
    1851             :   // The seven kinds of singleton/non-singleton intersections:
    1852             :   //    XXX
    1853             :   //  Y        - outside before
    1854             :   //   Y       - outside touching start
    1855             :   //    Y      - inside touching start
    1856             :   //     Y     - entirely inside
    1857             :   //      Y    - inside touching end
    1858             :   //       Y   - outside touching end
    1859             :   //        Y  - disjoint after
    1860             : 
    1861          42 :   for (int i = 0; i < 7; i++) {
    1862          42 :     l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
    1863          42 :     l2.Add(CharacterRange::Singleton(offset + i), &zone);
    1864          42 :     offset += 8;
    1865             :   }
    1866             : 
    1867             :   // The eleven kinds of non-singleton intersections:
    1868             :   //
    1869             :   //       XXXXXXXX
    1870             :   // YYYY                  - outside before.
    1871             :   //   YYYY                - outside touching start.
    1872             :   //     YYYY              - overlapping start
    1873             :   //       YYYY            - inside touching start
    1874             :   //         YYYY          - entirely inside
    1875             :   //           YYYY        - inside touching end
    1876             :   //             YYYY      - overlapping end
    1877             :   //               YYYY    - outside touching end
    1878             :   //                 YYYY  - outside after
    1879             :   //       YYYYYYYY        - identical
    1880             :   //     YYYYYYYYYYYY      - containing entirely.
    1881             : 
    1882          54 :   for (int i = 0; i < 9; i++) {
    1883          54 :     l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);  // Length 8.
    1884          54 :     l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
    1885          54 :     offset += 22;
    1886             :   }
    1887           6 :   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
    1888           6 :   l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
    1889             :   offset += 22;
    1890           6 :   l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
    1891           6 :   l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
    1892           6 :   offset += 22;
    1893             : 
    1894             :   // Different kinds of multi-range overlap:
    1895             :   // XXXXXXXXXXXXXXXXXXXXXX         XXXXXXXXXXXXXXXXXXXXXX
    1896             :   //   YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y  YYYY  Y
    1897             : 
    1898           6 :   l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
    1899           6 :   l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
    1900          42 :   for (int i = 0; i < 6; i++) {
    1901          36 :     l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
    1902          36 :     l2.Add(CharacterRange::Singleton(offset + 8), &zone);
    1903          36 :     offset += 9;
    1904             :   }
    1905             : 
    1906           6 :   CHECK(CharacterRange::IsCanonical(&l1));
    1907           6 :   CHECK(CharacterRange::IsCanonical(&l2));
    1908             : 
    1909           6 :   ZoneList<CharacterRange> first_only(4, &zone);
    1910           6 :   ZoneList<CharacterRange> second_only(4, &zone);
    1911           6 :   ZoneList<CharacterRange> both(4, &zone);
    1912           6 : }
    1913             : 
    1914             : 
    1915       23724 : TEST(Graph) {
    1916           6 :   Execute("\\b\\w+\\b", false, true, true);
    1917           6 : }
    1918             : 
    1919             : 
    1920             : namespace {
    1921             : 
    1922             : int* global_use_counts = nullptr;
    1923             : 
    1924          84 : void MockUseCounterCallback(v8::Isolate* isolate,
    1925             :                             v8::Isolate::UseCounterFeature feature) {
    1926          84 :   ++global_use_counts[feature];
    1927          84 : }
    1928             : }
    1929             : 
    1930             : 
    1931             : // Test that ES2015 RegExp compatibility fixes are in place, that they
    1932             : // are not overly broad, and the appropriate UseCounters are incremented
    1933       23724 : TEST(UseCountRegExp) {
    1934           6 :   v8::Isolate* isolate = CcTest::isolate();
    1935           6 :   v8::HandleScope scope(isolate);
    1936          12 :   LocalContext env;
    1937           6 :   int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
    1938           6 :   global_use_counts = use_counts;
    1939           6 :   CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
    1940             : 
    1941             :   // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
    1942             :   v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
    1943           6 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1944           6 :   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1945           6 :   CHECK(resultSticky->IsUndefined());
    1946             : 
    1947             :   // re.sticky has approriate value and doesn't touch UseCounter
    1948             :   v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
    1949           6 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1950           6 :   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1951           6 :   CHECK(resultReSticky->IsFalse());
    1952             : 
    1953             :   // When the getter is caleld on another object, throw an exception
    1954             :   // and don't increment the UseCounter
    1955             :   v8::Local<v8::Value> resultStickyError = CompileRun(
    1956             :       "var exception;"
    1957             :       "try { "
    1958             :       "  Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
    1959             :       "      .get.call(null);"
    1960             :       "} catch (e) {"
    1961             :       "  exception = e;"
    1962             :       "}"
    1963             :       "exception");
    1964           6 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1965           6 :   CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1966           6 :   CHECK(resultStickyError->IsObject());
    1967             : 
    1968             :   // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
    1969             :   // a UseCounter is incremented to track it.
    1970             :   v8::Local<v8::Value> resultToString =
    1971             :       CompileRun("RegExp.prototype.toString().length");
    1972           6 :   CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1973           6 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1974           6 :   CHECK(resultToString->IsInt32());
    1975          12 :   CHECK_EQ(6,
    1976             :            resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
    1977             : 
    1978             :   // .toString() works on normal RegExps
    1979             :   v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
    1980           6 :   CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1981           6 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1982           6 :   CHECK(resultReToString->IsInt32());
    1983          12 :   CHECK_EQ(
    1984             :       3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
    1985             : 
    1986             :   // .toString() throws on non-RegExps that aren't RegExp.prototype
    1987             :   v8::Local<v8::Value> resultToStringError = CompileRun(
    1988             :       "var exception;"
    1989             :       "try { RegExp.prototype.toString.call(null) }"
    1990             :       "catch (e) { exception = e; }"
    1991             :       "exception");
    1992           6 :   CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
    1993           6 :   CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
    1994          12 :   CHECK(resultToStringError->IsObject());
    1995           6 : }
    1996             : 
    1997          18 : class UncachedExternalString
    1998             :     : public v8::String::ExternalOneByteStringResource {
    1999             :  public:
    2000          30 :   const char* data() const override { return "abcdefghijklmnopqrstuvwxyz"; }
    2001          18 :   size_t length() const override { return 26; }
    2002           6 :   bool IsCompressible() const override { return true; }
    2003             : };
    2004             : 
    2005       23724 : TEST(UncachedExternalString) {
    2006           6 :   v8::Isolate* isolate = CcTest::isolate();
    2007           6 :   v8::HandleScope scope(isolate);
    2008          12 :   LocalContext env;
    2009             :   v8::Local<v8::String> external =
    2010           6 :       v8::String::NewExternalOneByte(isolate, new UncachedExternalString())
    2011           6 :           .ToLocalChecked();
    2012           6 :   CHECK(v8::Utils::OpenHandle(*external)->map() ==
    2013             :         CcTest::i_isolate()->heap()->short_external_one_byte_string_map());
    2014           6 :   v8::Local<v8::Object> global = env->Global();
    2015          24 :   global->Set(env.local(), v8_str("external"), external).FromJust();
    2016             :   CompileRun("var re = /y(.)/; re.test('ab');");
    2017          12 :   ExpectString("external.substring(1).match(re)[1]", "z");
    2018           6 : }
    2019             : 
    2020             : }  // namespace test_regexp
    2021             : }  // namespace internal
    2022       71154 : }  // namespace v8

Generated by: LCOV version 1.10