Line data Source code
1 : // Copyright 2012 the V8 project authors. All rights reserved.
2 : // Redistribution and use in source and binary forms, with or without
3 : // modification, are permitted provided that the following conditions are
4 : // met:
5 : //
6 : // * Redistributions of source code must retain the above copyright
7 : // notice, this list of conditions and the following disclaimer.
8 : // * Redistributions in binary form must reproduce the above
9 : // copyright notice, this list of conditions and the following
10 : // disclaimer in the documentation and/or other materials provided
11 : // with the distribution.
12 : // * Neither the name of Google Inc. nor the names of its
13 : // contributors may be used to endorse or promote products derived
14 : // from this software without specific prior written permission.
15 : //
16 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 :
28 : #include <cstdlib>
29 : #include <memory>
30 : #include <sstream>
31 :
32 : #include "include/v8.h"
33 : #include "src/v8.h"
34 :
35 : #include "src/api.h"
36 : #include "src/ast/ast.h"
37 : #include "src/char-predicates-inl.h"
38 : #include "src/objects-inl.h"
39 : #include "src/ostreams.h"
40 : #include "src/regexp/jsregexp.h"
41 : #include "src/regexp/regexp-macro-assembler-irregexp.h"
42 : #include "src/regexp/regexp-macro-assembler.h"
43 : #include "src/regexp/regexp-parser.h"
44 : #include "src/splay-tree-inl.h"
45 : #include "src/string-stream.h"
46 : #include "src/unicode-inl.h"
47 :
48 : #ifdef V8_INTERPRETED_REGEXP
49 : #include "src/regexp/interpreter-irregexp.h"
50 : #else // V8_INTERPRETED_REGEXP
51 : #include "src/macro-assembler.h"
52 : #if V8_TARGET_ARCH_ARM
53 : #include "src/arm/assembler-arm.h" // NOLINT
54 : #include "src/arm/macro-assembler-arm.h"
55 : #include "src/regexp/arm/regexp-macro-assembler-arm.h"
56 : #endif
57 : #if V8_TARGET_ARCH_ARM64
58 : #include "src/arm64/assembler-arm64.h"
59 : #include "src/arm64/macro-assembler-arm64.h"
60 : #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
61 : #endif
62 : #if V8_TARGET_ARCH_S390
63 : #include "src/regexp/s390/regexp-macro-assembler-s390.h"
64 : #include "src/s390/assembler-s390.h"
65 : #include "src/s390/macro-assembler-s390.h"
66 : #endif
67 : #if V8_TARGET_ARCH_PPC
68 : #include "src/ppc/assembler-ppc.h"
69 : #include "src/ppc/macro-assembler-ppc.h"
70 : #include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
71 : #endif
72 : #if V8_TARGET_ARCH_MIPS
73 : #include "src/mips/assembler-mips.h"
74 : #include "src/mips/macro-assembler-mips.h"
75 : #include "src/regexp/mips/regexp-macro-assembler-mips.h"
76 : #endif
77 : #if V8_TARGET_ARCH_MIPS64
78 : #include "src/mips64/assembler-mips64.h"
79 : #include "src/mips64/macro-assembler-mips64.h"
80 : #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
81 : #endif
82 : #if V8_TARGET_ARCH_X64
83 : #include "src/regexp/x64/regexp-macro-assembler-x64.h"
84 : #include "src/x64/assembler-x64.h"
85 : #include "src/x64/macro-assembler-x64.h"
86 : #endif
87 : #if V8_TARGET_ARCH_IA32
88 : #include "src/ia32/assembler-ia32.h"
89 : #include "src/ia32/macro-assembler-ia32.h"
90 : #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
91 : #endif
92 : #endif // V8_INTERPRETED_REGEXP
93 : #include "test/cctest/cctest.h"
94 :
95 : namespace v8 {
96 : namespace internal {
97 : namespace test_regexp {
98 :
99 12 : static bool CheckParse(const char* input) {
100 12 : v8::HandleScope scope(CcTest::isolate());
101 24 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
102 12 : FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
103 : RegExpCompileData result;
104 : return v8::internal::RegExpParser::ParseRegExp(
105 24 : CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result);
106 : }
107 :
108 :
109 1812 : static void CheckParseEq(const char* input, const char* expected,
110 : bool unicode = false) {
111 1812 : v8::HandleScope scope(CcTest::isolate());
112 3624 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
113 1812 : FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
114 : RegExpCompileData result;
115 : JSRegExp::Flags flags = JSRegExp::kNone;
116 1812 : if (unicode) flags |= JSRegExp::kUnicode;
117 1812 : CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
118 : &reader, flags, &result));
119 1812 : CHECK_NOT_NULL(result.tree);
120 1812 : CHECK(result.error.is_null());
121 3624 : std::ostringstream os;
122 1812 : result.tree->Print(os, &zone);
123 3624 : if (strcmp(expected, os.str().c_str()) != 0) {
124 0 : printf("%s | %s\n", expected, os.str().c_str());
125 : }
126 5436 : CHECK_EQ(0, strcmp(expected, os.str().c_str()));
127 1812 : }
128 :
129 :
130 540 : static bool CheckSimple(const char* input) {
131 540 : v8::HandleScope scope(CcTest::isolate());
132 1080 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
133 540 : FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
134 : RegExpCompileData result;
135 540 : CHECK(v8::internal::RegExpParser::ParseRegExp(
136 : CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
137 540 : CHECK_NOT_NULL(result.tree);
138 540 : CHECK(result.error.is_null());
139 1080 : return result.simple;
140 : }
141 :
142 : struct MinMaxPair {
143 : int min_match;
144 : int max_match;
145 : };
146 :
147 :
148 576 : static MinMaxPair CheckMinMaxMatch(const char* input) {
149 576 : v8::HandleScope scope(CcTest::isolate());
150 1152 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
151 576 : FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
152 : RegExpCompileData result;
153 576 : CHECK(v8::internal::RegExpParser::ParseRegExp(
154 : CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
155 576 : CHECK_NOT_NULL(result.tree);
156 576 : CHECK(result.error.is_null());
157 576 : int min_match = result.tree->min_match();
158 576 : int max_match = result.tree->max_match();
159 : MinMaxPair pair = { min_match, max_match };
160 1152 : return pair;
161 : }
162 :
163 :
164 : #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
165 : #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
166 : #define CHECK_MIN_MAX(input, min, max) \
167 : { MinMaxPair min_max = CheckMinMaxMatch(input); \
168 : CHECK_EQ(min, min_max.min_match); \
169 : CHECK_EQ(max, min_max.max_match); \
170 : }
171 :
172 :
173 12 : void TestRegExpParser(bool lookbehind) {
174 12 : FLAG_harmony_regexp_lookbehind = lookbehind;
175 :
176 12 : CHECK_PARSE_ERROR("?");
177 :
178 12 : CheckParseEq("abc", "'abc'");
179 12 : CheckParseEq("", "%");
180 12 : CheckParseEq("abc|def", "(| 'abc' 'def')");
181 12 : CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
182 12 : CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
183 12 : CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
184 12 : CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
185 12 : CheckParseEq("a*", "(# 0 - g 'a')");
186 12 : CheckParseEq("a*?", "(# 0 - n 'a')");
187 12 : CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
188 12 : CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
189 12 : CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
190 12 : CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
191 12 : CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
192 12 : CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
193 12 : CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
194 12 : CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
195 12 : CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
196 12 : CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
197 12 : CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
198 12 : CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
199 12 : CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
200 12 : CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
201 12 : CheckParseEq("(?:foo)", "(?: 'foo')");
202 12 : CheckParseEq("(?: foo )", "(?: ' foo ')");
203 12 : CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
204 12 : CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
205 12 : CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
206 12 : CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
207 12 : if (lookbehind) {
208 12 : CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
209 12 : CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
210 : } else {
211 0 : CHECK_PARSE_ERROR("foo(?<=bar)baz");
212 0 : CHECK_PARSE_ERROR("foo(?<!bar)baz");
213 : }
214 12 : CheckParseEq("()", "(^ %)");
215 12 : CheckParseEq("(?=)", "(-> + %)");
216 12 : CheckParseEq("[]", "^[\\x00-\\u{10ffff}]"); // Doesn't compile on windows
217 12 : CheckParseEq("[^]", "[\\x00-\\u{10ffff}]"); // \uffff isn't in codepage 1252
218 12 : CheckParseEq("[x]", "[x]");
219 12 : CheckParseEq("[xyz]", "[x y z]");
220 12 : CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
221 12 : CheckParseEq("[-123]", "[- 1 2 3]");
222 12 : CheckParseEq("[^123]", "^[1 2 3]");
223 12 : CheckParseEq("]", "']'");
224 12 : CheckParseEq("}", "'}'");
225 12 : CheckParseEq("[a-b-c]", "[a-b - c]");
226 12 : CheckParseEq("[\\d]", "[0-9]");
227 12 : CheckParseEq("[x\\dz]", "[x 0-9 z]");
228 12 : CheckParseEq("[\\d-z]", "[0-9 - z]");
229 12 : CheckParseEq("[\\d-\\d]", "[0-9 0-9 -]");
230 12 : CheckParseEq("[z-\\d]", "[0-9 z -]");
231 : // Control character outside character class.
232 12 : CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
233 12 : CheckParseEq("\\c!", "'\\c!'");
234 12 : CheckParseEq("\\c_", "'\\c_'");
235 12 : CheckParseEq("\\c~", "'\\c~'");
236 12 : CheckParseEq("\\c1", "'\\c1'");
237 : // Control character inside character class.
238 12 : CheckParseEq("[\\c!]", "[\\ c !]");
239 12 : CheckParseEq("[\\c_]", "[\\x1f]");
240 12 : CheckParseEq("[\\c~]", "[\\ c ~]");
241 12 : CheckParseEq("[\\ca]", "[\\x01]");
242 12 : CheckParseEq("[\\cz]", "[\\x1a]");
243 12 : CheckParseEq("[\\cA]", "[\\x01]");
244 12 : CheckParseEq("[\\cZ]", "[\\x1a]");
245 12 : CheckParseEq("[\\c1]", "[\\x11]");
246 :
247 12 : CheckParseEq("[a\\]c]", "[a ] c]");
248 12 : CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
249 12 : CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
250 12 : CheckParseEq("\\0", "'\\x00'");
251 12 : CheckParseEq("\\8", "'8'");
252 12 : CheckParseEq("\\9", "'9'");
253 12 : CheckParseEq("\\11", "'\\x09'");
254 12 : CheckParseEq("\\11a", "'\\x09a'");
255 12 : CheckParseEq("\\011", "'\\x09'");
256 12 : CheckParseEq("\\00011", "'\\x0011'");
257 12 : CheckParseEq("\\118", "'\\x098'");
258 12 : CheckParseEq("\\111", "'I'");
259 12 : CheckParseEq("\\1111", "'I1'");
260 12 : CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
261 12 : CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
262 12 : CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
263 12 : CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
264 : CheckParseEq("(x)(x)(x)\\1*",
265 : "(: (^ 'x') (^ 'x') (^ 'x')"
266 12 : " (# 0 - g (<- 1)))");
267 : CheckParseEq("(x)(x)(x)\\2*",
268 : "(: (^ 'x') (^ 'x') (^ 'x')"
269 12 : " (# 0 - g (<- 2)))");
270 : CheckParseEq("(x)(x)(x)\\3*",
271 : "(: (^ 'x') (^ 'x') (^ 'x')"
272 12 : " (# 0 - g (<- 3)))");
273 : CheckParseEq("(x)(x)(x)\\4*",
274 : "(: (^ 'x') (^ 'x') (^ 'x')"
275 12 : " (# 0 - g '\\x04'))");
276 : CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
277 : "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
278 12 : " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
279 : CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
280 : "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
281 12 : " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
282 12 : CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
283 12 : CheckParseEq("(a\\1)", "(^ 'a')");
284 12 : CheckParseEq("(\\1a)", "(^ 'a')");
285 12 : CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
286 12 : CheckParseEq("(?=a)?a", "'a'");
287 12 : CheckParseEq("(?=a){0,10}a", "'a'");
288 12 : CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
289 12 : CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
290 12 : CheckParseEq("(?!a)?a", "'a'");
291 12 : CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
292 12 : CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
293 : CheckParseEq("(?!\\1(a\\1)\\1)\\1",
294 12 : "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
295 : CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
296 12 : "(: (<- 1) (<- 2) (^ (: 'a' (?: (^ 'b')) (<- 2))) (<- 1))");
297 12 : if (lookbehind) {
298 : CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
299 12 : "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
300 : }
301 12 : CheckParseEq("[\\0]", "[\\x00]");
302 12 : CheckParseEq("[\\11]", "[\\x09]");
303 12 : CheckParseEq("[\\11a]", "[\\x09 a]");
304 12 : CheckParseEq("[\\011]", "[\\x09]");
305 12 : CheckParseEq("[\\00011]", "[\\x00 1 1]");
306 12 : CheckParseEq("[\\118]", "[\\x09 8]");
307 12 : CheckParseEq("[\\111]", "[I]");
308 12 : CheckParseEq("[\\1111]", "[I 1]");
309 12 : CheckParseEq("\\x34", "'\x34'");
310 12 : CheckParseEq("\\x60", "'\x60'");
311 12 : CheckParseEq("\\x3z", "'x3z'");
312 12 : CheckParseEq("\\c", "'\\c'");
313 12 : CheckParseEq("\\u0034", "'\x34'");
314 12 : CheckParseEq("\\u003z", "'u003z'");
315 12 : CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
316 12 : CheckParseEq("^^^$$$\\b\\b\\b\\b", "(: @^i @$i @b)");
317 12 : CheckParseEq("\\b\\b\\b\\b\\B\\B\\B\\B\\b\\b\\b\\b", "(: @b @B @b)");
318 12 : CheckParseEq("\\b\\B\\b", "(: @b @B @b)");
319 :
320 : // Unicode regexps
321 12 : CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
322 : CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
323 12 : true);
324 : CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
325 12 : true);
326 12 : CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
327 12 : CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
328 :
329 12 : CheckParseEq("\\ud808\\udf45*", "(# 0 - g '\\ud808\\udf45')", true);
330 : CheckParseEq("[\\ud808\\udf45-\\ud809\\udccc]", "[\\u{012345}-\\u{0124cc}]",
331 12 : true);
332 :
333 24 : CHECK_SIMPLE("", false);
334 12 : CHECK_SIMPLE("a", true);
335 24 : CHECK_SIMPLE("a|b", false);
336 24 : CHECK_SIMPLE("a\\n", false);
337 24 : CHECK_SIMPLE("^a", false);
338 24 : CHECK_SIMPLE("a$", false);
339 24 : CHECK_SIMPLE("a\\b!", false);
340 24 : CHECK_SIMPLE("a\\Bb", false);
341 24 : CHECK_SIMPLE("a*", false);
342 24 : CHECK_SIMPLE("a*?", false);
343 24 : CHECK_SIMPLE("a?", false);
344 24 : CHECK_SIMPLE("a??", false);
345 24 : CHECK_SIMPLE("a{0,1}?", false);
346 24 : CHECK_SIMPLE("a{1,1}?", false);
347 24 : CHECK_SIMPLE("a{1,2}?", false);
348 24 : CHECK_SIMPLE("a+?", false);
349 24 : CHECK_SIMPLE("(a)", false);
350 24 : CHECK_SIMPLE("(a)\\1", false);
351 24 : CHECK_SIMPLE("(\\1a)", false);
352 24 : CHECK_SIMPLE("\\1(a)", false);
353 24 : CHECK_SIMPLE("a\\s", false);
354 24 : CHECK_SIMPLE("a\\S", false);
355 24 : CHECK_SIMPLE("a\\d", false);
356 24 : CHECK_SIMPLE("a\\D", false);
357 24 : CHECK_SIMPLE("a\\w", false);
358 24 : CHECK_SIMPLE("a\\W", false);
359 24 : CHECK_SIMPLE("a.", false);
360 24 : CHECK_SIMPLE("a\\q", false);
361 24 : CHECK_SIMPLE("a[a]", false);
362 24 : CHECK_SIMPLE("a[^a]", false);
363 24 : CHECK_SIMPLE("a[a-z]", false);
364 24 : CHECK_SIMPLE("a[\\q]", false);
365 24 : CHECK_SIMPLE("a(?:b)", false);
366 24 : CHECK_SIMPLE("a(?=b)", false);
367 24 : CHECK_SIMPLE("a(?!b)", false);
368 24 : CHECK_SIMPLE("\\x60", false);
369 24 : CHECK_SIMPLE("\\u0060", false);
370 24 : CHECK_SIMPLE("\\cA", false);
371 24 : CHECK_SIMPLE("\\q", false);
372 24 : CHECK_SIMPLE("\\1112", false);
373 24 : CHECK_SIMPLE("\\0", false);
374 24 : CHECK_SIMPLE("(a)\\1", false);
375 24 : CHECK_SIMPLE("(?=a)?a", false);
376 24 : CHECK_SIMPLE("(?!a)?a\\1", false);
377 24 : CHECK_SIMPLE("(?:(?=a))a\\1", false);
378 :
379 12 : CheckParseEq("a{}", "'a{}'");
380 12 : CheckParseEq("a{,}", "'a{,}'");
381 12 : CheckParseEq("a{", "'a{'");
382 12 : CheckParseEq("a{z}", "'a{z}'");
383 12 : CheckParseEq("a{1z}", "'a{1z}'");
384 12 : CheckParseEq("a{12z}", "'a{12z}'");
385 12 : CheckParseEq("a{12,", "'a{12,'");
386 12 : CheckParseEq("a{12,3b", "'a{12,3b'");
387 12 : CheckParseEq("{}", "'{}'");
388 12 : CheckParseEq("{,}", "'{,}'");
389 12 : CheckParseEq("{", "'{'");
390 12 : CheckParseEq("{z}", "'{z}'");
391 12 : CheckParseEq("{1z}", "'{1z}'");
392 12 : CheckParseEq("{12z}", "'{12z}'");
393 12 : CheckParseEq("{12,", "'{12,'");
394 12 : CheckParseEq("{12,3b", "'{12,3b'");
395 :
396 12 : CHECK_MIN_MAX("a", 1, 1);
397 12 : CHECK_MIN_MAX("abc", 3, 3);
398 12 : CHECK_MIN_MAX("a[bc]d", 3, 3);
399 12 : CHECK_MIN_MAX("a|bc", 1, 2);
400 12 : CHECK_MIN_MAX("ab|c", 1, 2);
401 12 : CHECK_MIN_MAX("a||bc", 0, 2);
402 12 : CHECK_MIN_MAX("|", 0, 0);
403 12 : CHECK_MIN_MAX("(?:ab)", 2, 2);
404 12 : CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
405 12 : CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
406 12 : CHECK_MIN_MAX("(ab)", 2, 2);
407 12 : CHECK_MIN_MAX("(ab|cde)", 2, 3);
408 12 : CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
409 12 : CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
410 12 : CHECK_MIN_MAX("(?:ab)?", 0, 2);
411 12 : CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
412 12 : CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
413 12 : CHECK_MIN_MAX("a?", 0, 1);
414 12 : CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
415 12 : CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
416 12 : CHECK_MIN_MAX("a??", 0, 1);
417 12 : CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
418 12 : CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
419 12 : CHECK_MIN_MAX("(?:a?)?", 0, 1);
420 12 : CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
421 12 : CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
422 12 : CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
423 12 : CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
424 12 : CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
425 12 : CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
426 12 : CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
427 12 : CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
428 12 : CHECK_MIN_MAX("a{0}", 0, 0);
429 12 : CHECK_MIN_MAX("(?:a+){0}", 0, 0);
430 12 : CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
431 12 : CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
432 12 : CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
433 12 : CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
434 12 : CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
435 12 : CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
436 12 : CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
437 12 : CHECK_MIN_MAX("a\\bc", 2, 2);
438 12 : CHECK_MIN_MAX("a\\Bc", 2, 2);
439 12 : CHECK_MIN_MAX("a\\sc", 3, 3);
440 12 : CHECK_MIN_MAX("a\\Sc", 3, 3);
441 12 : CHECK_MIN_MAX("a(?=b)c", 2, 2);
442 12 : CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
443 12 : CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
444 :
445 12 : FLAG_harmony_regexp_named_captures = true;
446 : CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<a>",
447 12 : "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))", true);
448 : CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<b>",
449 12 : "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))", true);
450 : CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<c>",
451 12 : "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))", true);
452 12 : CheckParseEq("(?<a>a)\\k<a>", "(: (^ 'a') (<- 1))", true);
453 12 : CheckParseEq("(?<a>a\\k<a>)", "(^ 'a')", true);
454 12 : CheckParseEq("(?<a>\\k<a>a)", "(^ 'a')", true);
455 12 : CheckParseEq("(?<a>\\k<b>)(?<b>\\k<a>)", "(: (^ (<- 2)) (^ (<- 1)))", true);
456 12 : CheckParseEq("\\k<a>(?<a>a)", "(: (<- 1) (^ 'a'))", true);
457 :
458 12 : CheckParseEq("(?<\\u{03C0}>a)", "(^ 'a')", true);
459 12 : CheckParseEq("(?<\\u03C0>a)", "(^ 'a')", true);
460 12 : FLAG_harmony_regexp_named_captures = false;
461 12 : }
462 :
463 :
464 23724 : TEST(ParserWithLookbehind) {
465 6 : TestRegExpParser(true); // Lookbehind enabled.
466 6 : }
467 :
468 :
469 23724 : TEST(ParserWithoutLookbehind) {
470 6 : TestRegExpParser(true); // Lookbehind enabled.
471 6 : }
472 :
473 23724 : TEST(ParserRegression) {
474 6 : CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
475 6 : CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
476 6 : CheckParseEq("{", "'{'");
477 6 : CheckParseEq("a|", "(| 'a' %)");
478 6 : }
479 :
480 126 : static void ExpectError(const char* input, const char* expected,
481 : bool unicode = false) {
482 126 : v8::HandleScope scope(CcTest::isolate());
483 252 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
484 126 : FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
485 : RegExpCompileData result;
486 : JSRegExp::Flags flags = JSRegExp::kNone;
487 126 : if (unicode) flags |= JSRegExp::kUnicode;
488 126 : CHECK(!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
489 : &reader, flags, &result));
490 126 : CHECK_NULL(result.tree);
491 126 : CHECK(!result.error.is_null());
492 126 : std::unique_ptr<char[]> str = result.error->ToCString(ALLOW_NULLS);
493 252 : CHECK_EQ(0, strcmp(expected, str.get()));
494 126 : }
495 :
496 :
497 23724 : TEST(Errors) {
498 : const char* kEndBackslash = "\\ at end of pattern";
499 6 : ExpectError("\\", kEndBackslash);
500 : const char* kUnterminatedGroup = "Unterminated group";
501 6 : ExpectError("(foo", kUnterminatedGroup);
502 : const char* kInvalidGroup = "Invalid group";
503 6 : ExpectError("(?", kInvalidGroup);
504 : const char* kUnterminatedCharacterClass = "Unterminated character class";
505 6 : ExpectError("[", kUnterminatedCharacterClass);
506 6 : ExpectError("[a-", kUnterminatedCharacterClass);
507 : const char* kNothingToRepeat = "Nothing to repeat";
508 6 : ExpectError("*", kNothingToRepeat);
509 6 : ExpectError("?", kNothingToRepeat);
510 6 : ExpectError("+", kNothingToRepeat);
511 6 : ExpectError("{1}", kNothingToRepeat);
512 6 : ExpectError("{1,2}", kNothingToRepeat);
513 6 : ExpectError("{1,}", kNothingToRepeat);
514 :
515 : // Check that we don't allow more than kMaxCapture captures
516 : const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
517 : const char* kTooManyCaptures = "Too many captures";
518 6 : std::ostringstream os;
519 393228 : for (int i = 0; i <= kMaxCaptures; i++) {
520 393222 : os << "()";
521 : }
522 12 : ExpectError(os.str().c_str(), kTooManyCaptures);
523 :
524 6 : FLAG_harmony_regexp_named_captures = true;
525 : const char* kInvalidCaptureName = "Invalid capture group name";
526 6 : ExpectError("(?<>.)", kInvalidCaptureName, true);
527 6 : ExpectError("(?<1>.)", kInvalidCaptureName, true);
528 6 : ExpectError("(?<_%>.)", kInvalidCaptureName, true);
529 6 : ExpectError("\\k<a", kInvalidCaptureName, true);
530 : const char* kDuplicateCaptureName = "Duplicate capture group name";
531 6 : ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
532 : const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
533 6 : ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
534 : const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
535 6 : ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
536 6 : ExpectError("(?<b>)\\k<a>", kInvalidCaptureReferenced, true);
537 : const char* kInvalidNamedReference = "Invalid named reference";
538 6 : ExpectError("\\ka", kInvalidNamedReference, true);
539 6 : FLAG_harmony_regexp_named_captures = false;
540 6 : }
541 :
542 :
543 393216 : static bool IsDigit(uc16 c) {
544 786432 : return ('0' <= c && c <= '9');
545 : }
546 :
547 :
548 393216 : static bool NotDigit(uc16 c) {
549 393216 : return !IsDigit(c);
550 : }
551 :
552 :
553 393216 : static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
554 : // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
555 : // WhiteSpace (7.2) and LineTerminator (7.3) values.
556 786432 : return v8::internal::WhiteSpaceOrLineTerminator::Is(c);
557 : }
558 :
559 :
560 393216 : static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
561 393216 : return !IsWhiteSpaceOrLineTerminator(c);
562 : }
563 :
564 :
565 393216 : static bool NotWord(uc16 c) {
566 393216 : return !IsRegExpWord(c);
567 : }
568 :
569 :
570 42 : static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
571 42 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
572 14978166 : ZoneList<CharacterRange>* ranges =
573 42 : new(&zone) ZoneList<CharacterRange>(2, &zone);
574 42 : CharacterRange::AddClassEscape(c, ranges, &zone);
575 2752554 : for (uc32 i = 0; i < (1 << 16); i++) {
576 : bool in_class = false;
577 28776660 : for (int j = 0; !in_class && j < ranges->length(); j++) {
578 26895372 : CharacterRange& range = ranges->at(j);
579 26895372 : in_class = (range.from() <= i && i <= range.to());
580 : }
581 2752512 : CHECK_EQ(pred(i), in_class);
582 42 : }
583 42 : }
584 :
585 :
586 23724 : TEST(CharacterClassEscapes) {
587 6 : TestCharacterClassEscapes('.', IsRegExpNewline);
588 6 : TestCharacterClassEscapes('d', IsDigit);
589 6 : TestCharacterClassEscapes('D', NotDigit);
590 6 : TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
591 6 : TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
592 6 : TestCharacterClassEscapes('w', IsRegExpWord);
593 6 : TestCharacterClassEscapes('W', NotWord);
594 6 : }
595 :
596 :
597 6 : static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
598 : bool is_one_byte, Zone* zone) {
599 : Isolate* isolate = CcTest::i_isolate();
600 6 : FlatStringReader reader(isolate, CStrVector(input));
601 : RegExpCompileData compile_data;
602 : JSRegExp::Flags flags = JSRegExp::kNone;
603 6 : if (multiline) flags = JSRegExp::kMultiline;
604 6 : if (unicode) flags = JSRegExp::kUnicode;
605 6 : if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
606 6 : &reader, flags, &compile_data))
607 : return nullptr;
608 : Handle<String> pattern = isolate->factory()
609 : ->NewStringFromUtf8(CStrVector(input))
610 12 : .ToHandleChecked();
611 : Handle<String> sample_subject =
612 12 : isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
613 : RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern,
614 6 : sample_subject, is_one_byte);
615 6 : return compile_data.node;
616 : }
617 :
618 :
619 6 : static void Execute(const char* input, bool multiline, bool unicode,
620 : bool is_one_byte, bool dot_output = false) {
621 6 : v8::HandleScope scope(CcTest::isolate());
622 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
623 6 : RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
624 6 : USE(node);
625 : #ifdef DEBUG
626 : if (dot_output) {
627 : RegExpEngine::DotPrint(input, node, false);
628 : }
629 : #endif // DEBUG
630 6 : }
631 :
632 :
633 : class TestConfig {
634 : public:
635 : typedef int Key;
636 : typedef int Value;
637 : static const int kNoKey;
638 : static int NoValue() { return 0; }
639 : static inline int Compare(int a, int b) {
640 5338566 : if (a < b)
641 : return -1;
642 2650086 : else if (a > b)
643 : return 1;
644 : else
645 : return 0;
646 : }
647 : };
648 :
649 :
650 : const int TestConfig::kNoKey = 0;
651 :
652 :
653 : static unsigned PseudoRandom(int i, int j) {
654 7128 : return ~(~((i * 781) ^ (j * 329)));
655 : }
656 :
657 :
658 23724 : TEST(SplayTreeSimple) {
659 : static const unsigned kLimit = 1000;
660 6 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
661 : ZoneSplayTree<TestConfig> tree(&zone);
662 : bool seen[kLimit];
663 6 : for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
664 : #define CHECK_MAPS_EQUAL() do { \
665 : for (unsigned k = 0; k < kLimit; k++) \
666 : CHECK_EQ(seen[k], tree.Find(k, &loc)); \
667 : } while (false)
668 300 : for (int i = 0; i < 50; i++) {
669 978 : for (int j = 0; j < 50; j++) {
670 1278 : int next = PseudoRandom(i, j) % kLimit;
671 1278 : if (seen[next]) {
672 : // We've already seen this one. Check the value and remove
673 : // it.
674 : ZoneSplayTree<TestConfig>::Locator loc;
675 102 : CHECK(tree.Find(next, &loc));
676 102 : CHECK_EQ(next, loc.key());
677 102 : CHECK_EQ(3 * next, loc.value());
678 102 : tree.Remove(next);
679 102 : seen[next] = false;
680 102102 : CHECK_MAPS_EQUAL();
681 : } else {
682 : // Check that it wasn't there already and then add it.
683 : ZoneSplayTree<TestConfig>::Locator loc;
684 1176 : CHECK(!tree.Find(next, &loc));
685 1176 : CHECK(tree.Insert(next, &loc));
686 1176 : CHECK_EQ(next, loc.key());
687 1176 : loc.set_value(3 * next);
688 1176 : seen[next] = true;
689 1177176 : CHECK_MAPS_EQUAL();
690 : }
691 1278 : int val = PseudoRandom(j, i) % kLimit;
692 1278 : if (seen[val]) {
693 : ZoneSplayTree<TestConfig>::Locator loc;
694 162 : CHECK(tree.FindGreatestLessThan(val, &loc));
695 324 : CHECK_EQ(loc.key(), val);
696 : break;
697 : }
698 2232 : val = PseudoRandom(i + j, i - j) % kLimit;
699 1116 : if (seen[val]) {
700 : ZoneSplayTree<TestConfig>::Locator loc;
701 138 : CHECK(tree.FindLeastGreaterThan(val, &loc));
702 276 : CHECK_EQ(loc.key(), val);
703 : break;
704 : }
705 : }
706 6 : }
707 6 : }
708 :
709 :
710 23724 : TEST(DispatchTableConstruction) {
711 : // Initialize test data.
712 : static const int kLimit = 1000;
713 : static const int kRangeCount = 8;
714 : static const int kRangeSize = 16;
715 : uc16 ranges[kRangeCount][2 * kRangeSize];
716 54 : for (int i = 0; i < kRangeCount; i++) {
717 48 : Vector<uc16> range(ranges[i], 2 * kRangeSize);
718 1584 : for (int j = 0; j < 2 * kRangeSize; j++) {
719 4608 : range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
720 : }
721 : range.Sort();
722 1536 : for (int j = 1; j < 2 * kRangeSize; j++) {
723 4464 : CHECK(range[j-1] <= range[j]);
724 : }
725 : }
726 : // Enter test data into dispatch table.
727 6 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
728 : DispatchTable table(&zone);
729 54 : for (int i = 0; i < kRangeCount; i++) {
730 48 : uc16* range = ranges[i];
731 816 : for (int j = 0; j < 2 * kRangeSize; j += 2)
732 768 : table.AddRange(CharacterRange::Range(range[j], range[j + 1]), i, &zone);
733 : }
734 : // Check that the table looks as we would expect
735 6000 : for (int p = 0; p < kLimit; p++) {
736 6000 : OutSet* outs = table.Get(p);
737 54000 : for (int j = 0; j < kRangeCount; j++) {
738 48000 : uc16* range = ranges[j];
739 : bool is_on = false;
740 622320 : for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
741 574320 : is_on = (range[k] <= p && p <= range[k + 1]);
742 48000 : CHECK_EQ(is_on, outs->Get(j));
743 : }
744 6 : }
745 6 : }
746 :
747 :
748 : // Test of debug-only syntax.
749 : #ifdef DEBUG
750 :
751 : TEST(ParsePossessiveRepetition) {
752 : bool old_flag_value = FLAG_regexp_possessive_quantifier;
753 :
754 : // Enable possessive quantifier syntax.
755 : FLAG_regexp_possessive_quantifier = true;
756 :
757 : CheckParseEq("a*+", "(# 0 - p 'a')");
758 : CheckParseEq("a++", "(# 1 - p 'a')");
759 : CheckParseEq("a?+", "(# 0 1 p 'a')");
760 : CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
761 : CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
762 :
763 : // Disable possessive quantifier syntax.
764 : FLAG_regexp_possessive_quantifier = false;
765 :
766 : CHECK_PARSE_ERROR("a*+");
767 : CHECK_PARSE_ERROR("a++");
768 : CHECK_PARSE_ERROR("a?+");
769 : CHECK_PARSE_ERROR("a{10,20}+");
770 : CHECK_PARSE_ERROR("a{10,20}+b");
771 :
772 : FLAG_regexp_possessive_quantifier = old_flag_value;
773 : }
774 :
775 : #endif
776 :
777 : // Tests of interpreter.
778 :
779 :
780 : #ifndef V8_INTERPRETED_REGEXP
781 :
782 : #if V8_TARGET_ARCH_IA32
783 : typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
784 : #elif V8_TARGET_ARCH_X64
785 : typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
786 : #elif V8_TARGET_ARCH_ARM
787 : typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
788 : #elif V8_TARGET_ARCH_ARM64
789 : typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
790 : #elif V8_TARGET_ARCH_S390
791 : typedef RegExpMacroAssemblerS390 ArchRegExpMacroAssembler;
792 : #elif V8_TARGET_ARCH_PPC
793 : typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
794 : #elif V8_TARGET_ARCH_MIPS
795 : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
796 : #elif V8_TARGET_ARCH_MIPS64
797 : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
798 : #elif V8_TARGET_ARCH_X87
799 : typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
800 : #endif
801 :
802 : class ContextInitializer {
803 : public:
804 66 : ContextInitializer()
805 : : scope_(CcTest::isolate()),
806 132 : env_(v8::Context::New(CcTest::isolate())) {
807 66 : env_->Enter();
808 66 : }
809 66 : ~ContextInitializer() {
810 66 : env_->Exit();
811 : }
812 : private:
813 : v8::HandleScope scope_;
814 : v8::Local<v8::Context> env_;
815 : };
816 :
817 :
818 84 : static ArchRegExpMacroAssembler::Result Execute(Code* code,
819 : String* input,
820 : int start_offset,
821 : const byte* input_start,
822 : const byte* input_end,
823 : int* captures) {
824 : return NativeRegExpMacroAssembler::Execute(
825 : code,
826 : input,
827 : start_offset,
828 : input_start,
829 : input_end,
830 : captures,
831 : 0,
832 84 : CcTest::i_isolate());
833 : }
834 :
835 :
836 23724 : TEST(MacroAssemblerNativeSuccess) {
837 6 : v8::V8::Initialize();
838 6 : ContextInitializer initializer;
839 : Isolate* isolate = CcTest::i_isolate();
840 : Factory* factory = isolate->factory();
841 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
842 :
843 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
844 12 : 4);
845 :
846 6 : m.Succeed();
847 :
848 6 : Handle<String> source = factory->NewStringFromStaticChars("");
849 6 : Handle<Object> code_object = m.GetCode(source);
850 : Handle<Code> code = Handle<Code>::cast(code_object);
851 :
852 6 : int captures[4] = {42, 37, 87, 117};
853 6 : Handle<String> input = factory->NewStringFromStaticChars("foofoo");
854 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
855 : const byte* start_adr =
856 6 : reinterpret_cast<const byte*>(seq_input->GetCharsAddress());
857 :
858 : NativeRegExpMacroAssembler::Result result =
859 : Execute(*code,
860 : *input,
861 : 0,
862 : start_adr,
863 : start_adr + seq_input->length(),
864 12 : captures);
865 :
866 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
867 6 : CHECK_EQ(-1, captures[0]);
868 6 : CHECK_EQ(-1, captures[1]);
869 6 : CHECK_EQ(-1, captures[2]);
870 6 : CHECK_EQ(-1, captures[3]);
871 6 : }
872 :
873 :
874 23724 : TEST(MacroAssemblerNativeSimple) {
875 6 : v8::V8::Initialize();
876 6 : ContextInitializer initializer;
877 : Isolate* isolate = CcTest::i_isolate();
878 : Factory* factory = isolate->factory();
879 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
880 :
881 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
882 12 : 4);
883 :
884 : Label fail, backtrack;
885 6 : m.PushBacktrack(&fail);
886 6 : m.CheckNotAtStart(0, nullptr);
887 6 : m.LoadCurrentCharacter(2, nullptr);
888 6 : m.CheckNotCharacter('o', nullptr);
889 6 : m.LoadCurrentCharacter(1, nullptr, false);
890 6 : m.CheckNotCharacter('o', nullptr);
891 6 : m.LoadCurrentCharacter(0, nullptr, false);
892 6 : m.CheckNotCharacter('f', nullptr);
893 6 : m.WriteCurrentPositionToRegister(0, 0);
894 6 : m.WriteCurrentPositionToRegister(1, 3);
895 6 : m.AdvanceCurrentPosition(3);
896 6 : m.PushBacktrack(&backtrack);
897 6 : m.Succeed();
898 6 : m.Bind(&backtrack);
899 6 : m.Backtrack();
900 6 : m.Bind(&fail);
901 6 : m.Fail();
902 :
903 6 : Handle<String> source = factory->NewStringFromStaticChars("^foo");
904 6 : Handle<Object> code_object = m.GetCode(source);
905 : Handle<Code> code = Handle<Code>::cast(code_object);
906 :
907 6 : int captures[4] = {42, 37, 87, 117};
908 6 : Handle<String> input = factory->NewStringFromStaticChars("foofoo");
909 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
910 6 : Address start_adr = seq_input->GetCharsAddress();
911 :
912 : NativeRegExpMacroAssembler::Result result =
913 : Execute(*code,
914 : *input,
915 : 0,
916 : start_adr,
917 : start_adr + input->length(),
918 12 : captures);
919 :
920 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
921 6 : CHECK_EQ(0, captures[0]);
922 6 : CHECK_EQ(3, captures[1]);
923 6 : CHECK_EQ(-1, captures[2]);
924 6 : CHECK_EQ(-1, captures[3]);
925 :
926 6 : input = factory->NewStringFromStaticChars("barbarbar");
927 : seq_input = Handle<SeqOneByteString>::cast(input);
928 6 : start_adr = seq_input->GetCharsAddress();
929 :
930 : result = Execute(*code,
931 : *input,
932 : 0,
933 : start_adr,
934 : start_adr + input->length(),
935 12 : captures);
936 :
937 6 : CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
938 6 : }
939 :
940 :
941 23724 : TEST(MacroAssemblerNativeSimpleUC16) {
942 6 : v8::V8::Initialize();
943 6 : ContextInitializer initializer;
944 : Isolate* isolate = CcTest::i_isolate();
945 : Factory* factory = isolate->factory();
946 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
947 :
948 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
949 12 : 4);
950 :
951 : Label fail, backtrack;
952 6 : m.PushBacktrack(&fail);
953 6 : m.CheckNotAtStart(0, nullptr);
954 6 : m.LoadCurrentCharacter(2, nullptr);
955 6 : m.CheckNotCharacter('o', nullptr);
956 6 : m.LoadCurrentCharacter(1, nullptr, false);
957 6 : m.CheckNotCharacter('o', nullptr);
958 6 : m.LoadCurrentCharacter(0, nullptr, false);
959 6 : m.CheckNotCharacter('f', nullptr);
960 6 : m.WriteCurrentPositionToRegister(0, 0);
961 6 : m.WriteCurrentPositionToRegister(1, 3);
962 6 : m.AdvanceCurrentPosition(3);
963 6 : m.PushBacktrack(&backtrack);
964 6 : m.Succeed();
965 6 : m.Bind(&backtrack);
966 6 : m.Backtrack();
967 6 : m.Bind(&fail);
968 6 : m.Fail();
969 :
970 6 : Handle<String> source = factory->NewStringFromStaticChars("^foo");
971 6 : Handle<Object> code_object = m.GetCode(source);
972 : Handle<Code> code = Handle<Code>::cast(code_object);
973 :
974 6 : int captures[4] = {42, 37, 87, 117};
975 : const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
976 6 : static_cast<uc16>(0x2603)};
977 : Handle<String> input = factory->NewStringFromTwoByte(
978 12 : Vector<const uc16>(input_data, 6)).ToHandleChecked();
979 : Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
980 6 : Address start_adr = seq_input->GetCharsAddress();
981 :
982 : NativeRegExpMacroAssembler::Result result =
983 : Execute(*code,
984 : *input,
985 : 0,
986 : start_adr,
987 : start_adr + input->length(),
988 12 : captures);
989 :
990 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
991 6 : CHECK_EQ(0, captures[0]);
992 6 : CHECK_EQ(3, captures[1]);
993 6 : CHECK_EQ(-1, captures[2]);
994 6 : CHECK_EQ(-1, captures[3]);
995 :
996 : const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
997 6 : static_cast<uc16>(0x2603)};
998 : input = factory->NewStringFromTwoByte(
999 12 : Vector<const uc16>(input_data2, 9)).ToHandleChecked();
1000 : seq_input = Handle<SeqTwoByteString>::cast(input);
1001 6 : start_adr = seq_input->GetCharsAddress();
1002 :
1003 : result = Execute(*code,
1004 : *input,
1005 : 0,
1006 : start_adr,
1007 6 : start_adr + input->length() * 2,
1008 12 : captures);
1009 :
1010 6 : CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
1011 6 : }
1012 :
1013 :
1014 23724 : TEST(MacroAssemblerNativeBacktrack) {
1015 6 : v8::V8::Initialize();
1016 6 : ContextInitializer initializer;
1017 : Isolate* isolate = CcTest::i_isolate();
1018 : Factory* factory = isolate->factory();
1019 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1020 :
1021 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1022 12 : 0);
1023 :
1024 : Label fail;
1025 : Label backtrack;
1026 6 : m.LoadCurrentCharacter(10, &fail);
1027 6 : m.Succeed();
1028 6 : m.Bind(&fail);
1029 6 : m.PushBacktrack(&backtrack);
1030 6 : m.LoadCurrentCharacter(10, nullptr);
1031 6 : m.Succeed();
1032 6 : m.Bind(&backtrack);
1033 6 : m.Fail();
1034 :
1035 6 : Handle<String> source = factory->NewStringFromStaticChars("..........");
1036 6 : Handle<Object> code_object = m.GetCode(source);
1037 : Handle<Code> code = Handle<Code>::cast(code_object);
1038 :
1039 6 : Handle<String> input = factory->NewStringFromStaticChars("foofoo");
1040 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1041 6 : Address start_adr = seq_input->GetCharsAddress();
1042 :
1043 : NativeRegExpMacroAssembler::Result result = Execute(
1044 12 : *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
1045 :
1046 6 : CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
1047 6 : }
1048 :
1049 :
1050 23724 : TEST(MacroAssemblerNativeBackReferenceLATIN1) {
1051 6 : v8::V8::Initialize();
1052 6 : ContextInitializer initializer;
1053 : Isolate* isolate = CcTest::i_isolate();
1054 : Factory* factory = isolate->factory();
1055 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1056 :
1057 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1058 12 : 4);
1059 :
1060 6 : m.WriteCurrentPositionToRegister(0, 0);
1061 6 : m.AdvanceCurrentPosition(2);
1062 6 : m.WriteCurrentPositionToRegister(1, 0);
1063 : Label nomatch;
1064 6 : m.CheckNotBackReference(0, false, &nomatch);
1065 6 : m.Fail();
1066 6 : m.Bind(&nomatch);
1067 6 : m.AdvanceCurrentPosition(2);
1068 : Label missing_match;
1069 6 : m.CheckNotBackReference(0, false, &missing_match);
1070 6 : m.WriteCurrentPositionToRegister(2, 0);
1071 6 : m.Succeed();
1072 6 : m.Bind(&missing_match);
1073 6 : m.Fail();
1074 :
1075 6 : Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
1076 6 : Handle<Object> code_object = m.GetCode(source);
1077 : Handle<Code> code = Handle<Code>::cast(code_object);
1078 :
1079 6 : Handle<String> input = factory->NewStringFromStaticChars("fooofo");
1080 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1081 6 : Address start_adr = seq_input->GetCharsAddress();
1082 :
1083 : int output[4];
1084 : NativeRegExpMacroAssembler::Result result =
1085 : Execute(*code,
1086 : *input,
1087 : 0,
1088 : start_adr,
1089 : start_adr + input->length(),
1090 12 : output);
1091 :
1092 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1093 6 : CHECK_EQ(0, output[0]);
1094 6 : CHECK_EQ(2, output[1]);
1095 6 : CHECK_EQ(6, output[2]);
1096 6 : CHECK_EQ(-1, output[3]);
1097 6 : }
1098 :
1099 :
1100 23724 : TEST(MacroAssemblerNativeBackReferenceUC16) {
1101 6 : v8::V8::Initialize();
1102 6 : ContextInitializer initializer;
1103 : Isolate* isolate = CcTest::i_isolate();
1104 : Factory* factory = isolate->factory();
1105 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1106 :
1107 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
1108 12 : 4);
1109 :
1110 6 : m.WriteCurrentPositionToRegister(0, 0);
1111 6 : m.AdvanceCurrentPosition(2);
1112 6 : m.WriteCurrentPositionToRegister(1, 0);
1113 : Label nomatch;
1114 6 : m.CheckNotBackReference(0, false, &nomatch);
1115 6 : m.Fail();
1116 6 : m.Bind(&nomatch);
1117 6 : m.AdvanceCurrentPosition(2);
1118 : Label missing_match;
1119 6 : m.CheckNotBackReference(0, false, &missing_match);
1120 6 : m.WriteCurrentPositionToRegister(2, 0);
1121 6 : m.Succeed();
1122 6 : m.Bind(&missing_match);
1123 6 : m.Fail();
1124 :
1125 6 : Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
1126 6 : Handle<Object> code_object = m.GetCode(source);
1127 : Handle<Code> code = Handle<Code>::cast(code_object);
1128 :
1129 6 : const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
1130 : Handle<String> input = factory->NewStringFromTwoByte(
1131 12 : Vector<const uc16>(input_data, 6)).ToHandleChecked();
1132 : Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
1133 6 : Address start_adr = seq_input->GetCharsAddress();
1134 :
1135 : int output[4];
1136 : NativeRegExpMacroAssembler::Result result =
1137 : Execute(*code,
1138 : *input,
1139 : 0,
1140 : start_adr,
1141 6 : start_adr + input->length() * 2,
1142 12 : output);
1143 :
1144 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1145 6 : CHECK_EQ(0, output[0]);
1146 6 : CHECK_EQ(2, output[1]);
1147 6 : CHECK_EQ(6, output[2]);
1148 6 : CHECK_EQ(-1, output[3]);
1149 6 : }
1150 :
1151 :
1152 :
1153 23724 : TEST(MacroAssemblernativeAtStart) {
1154 6 : v8::V8::Initialize();
1155 6 : ContextInitializer initializer;
1156 : Isolate* isolate = CcTest::i_isolate();
1157 : Factory* factory = isolate->factory();
1158 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1159 :
1160 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1161 12 : 0);
1162 :
1163 : Label not_at_start, newline, fail;
1164 6 : m.CheckNotAtStart(0, ¬_at_start);
1165 : // Check that prevchar = '\n' and current = 'f'.
1166 6 : m.CheckCharacter('\n', &newline);
1167 6 : m.Bind(&fail);
1168 6 : m.Fail();
1169 6 : m.Bind(&newline);
1170 6 : m.LoadCurrentCharacter(0, &fail);
1171 6 : m.CheckNotCharacter('f', &fail);
1172 6 : m.Succeed();
1173 :
1174 6 : m.Bind(¬_at_start);
1175 : // Check that prevchar = 'o' and current = 'b'.
1176 : Label prevo;
1177 6 : m.CheckCharacter('o', &prevo);
1178 6 : m.Fail();
1179 6 : m.Bind(&prevo);
1180 6 : m.LoadCurrentCharacter(0, &fail);
1181 6 : m.CheckNotCharacter('b', &fail);
1182 6 : m.Succeed();
1183 :
1184 6 : Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
1185 6 : Handle<Object> code_object = m.GetCode(source);
1186 : Handle<Code> code = Handle<Code>::cast(code_object);
1187 :
1188 6 : Handle<String> input = factory->NewStringFromStaticChars("foobar");
1189 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1190 6 : Address start_adr = seq_input->GetCharsAddress();
1191 :
1192 : NativeRegExpMacroAssembler::Result result = Execute(
1193 12 : *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
1194 :
1195 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1196 :
1197 : result = Execute(*code, *input, 3, start_adr + 3, start_adr + input->length(),
1198 12 : nullptr);
1199 :
1200 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1201 6 : }
1202 :
1203 :
1204 23724 : TEST(MacroAssemblerNativeBackRefNoCase) {
1205 6 : v8::V8::Initialize();
1206 6 : ContextInitializer initializer;
1207 : Isolate* isolate = CcTest::i_isolate();
1208 : Factory* factory = isolate->factory();
1209 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1210 :
1211 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1212 12 : 4);
1213 :
1214 : Label fail, succ;
1215 :
1216 6 : m.WriteCurrentPositionToRegister(0, 0);
1217 6 : m.WriteCurrentPositionToRegister(2, 0);
1218 6 : m.AdvanceCurrentPosition(3);
1219 6 : m.WriteCurrentPositionToRegister(3, 0);
1220 6 : m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "AbC".
1221 6 : m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "ABC".
1222 : Label expected_fail;
1223 6 : m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
1224 6 : m.Bind(&fail);
1225 6 : m.Fail();
1226 :
1227 6 : m.Bind(&expected_fail);
1228 6 : m.AdvanceCurrentPosition(3); // Skip "xYz"
1229 6 : m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
1230 6 : m.Fail();
1231 :
1232 6 : m.Bind(&succ);
1233 6 : m.WriteCurrentPositionToRegister(1, 0);
1234 6 : m.Succeed();
1235 :
1236 : Handle<String> source =
1237 6 : factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
1238 6 : Handle<Object> code_object = m.GetCode(source);
1239 : Handle<Code> code = Handle<Code>::cast(code_object);
1240 :
1241 6 : Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
1242 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1243 6 : Address start_adr = seq_input->GetCharsAddress();
1244 :
1245 : int output[4];
1246 : NativeRegExpMacroAssembler::Result result =
1247 : Execute(*code,
1248 : *input,
1249 : 0,
1250 : start_adr,
1251 : start_adr + input->length(),
1252 12 : output);
1253 :
1254 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1255 6 : CHECK_EQ(0, output[0]);
1256 6 : CHECK_EQ(12, output[1]);
1257 6 : CHECK_EQ(0, output[2]);
1258 6 : CHECK_EQ(3, output[3]);
1259 6 : }
1260 :
1261 :
1262 :
1263 23724 : TEST(MacroAssemblerNativeRegisters) {
1264 6 : v8::V8::Initialize();
1265 6 : ContextInitializer initializer;
1266 : Isolate* isolate = CcTest::i_isolate();
1267 : Factory* factory = isolate->factory();
1268 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1269 :
1270 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1271 12 : 6);
1272 :
1273 6 : uc16 foo_chars[3] = {'f', 'o', 'o'};
1274 : Vector<const uc16> foo(foo_chars, 3);
1275 :
1276 : enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1277 : Label fail;
1278 : Label backtrack;
1279 6 : m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1280 6 : m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1281 6 : m.PushBacktrack(&backtrack);
1282 6 : m.WriteStackPointerToRegister(sp);
1283 : // Fill stack and registers
1284 6 : m.AdvanceCurrentPosition(2);
1285 6 : m.WriteCurrentPositionToRegister(out1, 0);
1286 6 : m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1287 6 : m.PushBacktrack(&fail);
1288 : // Drop backtrack stack frames.
1289 6 : m.ReadStackPointerFromRegister(sp);
1290 : // And take the first backtrack (to &backtrack)
1291 6 : m.Backtrack();
1292 :
1293 6 : m.PushCurrentPosition();
1294 6 : m.AdvanceCurrentPosition(2);
1295 6 : m.PopCurrentPosition();
1296 :
1297 6 : m.Bind(&backtrack);
1298 6 : m.PopRegister(out1);
1299 6 : m.ReadCurrentPositionFromRegister(out1);
1300 6 : m.AdvanceCurrentPosition(3);
1301 6 : m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1302 :
1303 : Label loop;
1304 6 : m.SetRegister(loop_cnt, 0); // loop counter
1305 6 : m.Bind(&loop);
1306 6 : m.AdvanceRegister(loop_cnt, 1);
1307 6 : m.AdvanceCurrentPosition(1);
1308 6 : m.IfRegisterLT(loop_cnt, 3, &loop);
1309 6 : m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1310 :
1311 : Label loop2;
1312 6 : m.SetRegister(loop_cnt, 2); // loop counter
1313 6 : m.Bind(&loop2);
1314 6 : m.AdvanceRegister(loop_cnt, -1);
1315 6 : m.AdvanceCurrentPosition(1);
1316 6 : m.IfRegisterGE(loop_cnt, 0, &loop2);
1317 6 : m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1318 :
1319 : Label loop3;
1320 : Label exit_loop3;
1321 6 : m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1322 6 : m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1323 6 : m.ReadCurrentPositionFromRegister(out3);
1324 6 : m.Bind(&loop3);
1325 6 : m.AdvanceCurrentPosition(1);
1326 6 : m.CheckGreedyLoop(&exit_loop3);
1327 6 : m.GoTo(&loop3);
1328 6 : m.Bind(&exit_loop3);
1329 6 : m.PopCurrentPosition();
1330 6 : m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1331 :
1332 6 : m.Succeed();
1333 :
1334 6 : m.Bind(&fail);
1335 6 : m.Fail();
1336 :
1337 6 : Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
1338 6 : Handle<Object> code_object = m.GetCode(source);
1339 : Handle<Code> code = Handle<Code>::cast(code_object);
1340 :
1341 : // String long enough for test (content doesn't matter).
1342 6 : Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
1343 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1344 6 : Address start_adr = seq_input->GetCharsAddress();
1345 :
1346 : int output[6];
1347 : NativeRegExpMacroAssembler::Result result =
1348 : Execute(*code,
1349 : *input,
1350 : 0,
1351 : start_adr,
1352 : start_adr + input->length(),
1353 12 : output);
1354 :
1355 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1356 6 : CHECK_EQ(0, output[0]);
1357 6 : CHECK_EQ(3, output[1]);
1358 6 : CHECK_EQ(6, output[2]);
1359 6 : CHECK_EQ(9, output[3]);
1360 6 : CHECK_EQ(9, output[4]);
1361 6 : CHECK_EQ(-1, output[5]);
1362 6 : }
1363 :
1364 :
1365 23724 : TEST(MacroAssemblerStackOverflow) {
1366 6 : v8::V8::Initialize();
1367 6 : ContextInitializer initializer;
1368 : Isolate* isolate = CcTest::i_isolate();
1369 : Factory* factory = isolate->factory();
1370 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1371 :
1372 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1373 12 : 0);
1374 :
1375 : Label loop;
1376 6 : m.Bind(&loop);
1377 6 : m.PushBacktrack(&loop);
1378 6 : m.GoTo(&loop);
1379 :
1380 : Handle<String> source =
1381 6 : factory->NewStringFromStaticChars("<stack overflow test>");
1382 6 : Handle<Object> code_object = m.GetCode(source);
1383 : Handle<Code> code = Handle<Code>::cast(code_object);
1384 :
1385 : // String long enough for test (content doesn't matter).
1386 6 : Handle<String> input = factory->NewStringFromStaticChars("dummy");
1387 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1388 6 : Address start_adr = seq_input->GetCharsAddress();
1389 :
1390 : NativeRegExpMacroAssembler::Result result = Execute(
1391 12 : *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
1392 :
1393 6 : CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
1394 6 : CHECK(isolate->has_pending_exception());
1395 : isolate->clear_pending_exception();
1396 6 : }
1397 :
1398 :
1399 23724 : TEST(MacroAssemblerNativeLotsOfRegisters) {
1400 6 : v8::V8::Initialize();
1401 6 : ContextInitializer initializer;
1402 : Isolate* isolate = CcTest::i_isolate();
1403 : Factory* factory = isolate->factory();
1404 12 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1405 :
1406 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1407 12 : 2);
1408 :
1409 : // At least 2048, to ensure the allocated space for registers
1410 : // span one full page.
1411 : const int large_number = 8000;
1412 6 : m.WriteCurrentPositionToRegister(large_number, 42);
1413 6 : m.WriteCurrentPositionToRegister(0, 0);
1414 6 : m.WriteCurrentPositionToRegister(1, 1);
1415 : Label done;
1416 6 : m.CheckNotBackReference(0, false, &done); // Performs a system-stack push.
1417 6 : m.Bind(&done);
1418 6 : m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1419 6 : m.PopRegister(1);
1420 6 : m.Succeed();
1421 :
1422 : Handle<String> source =
1423 6 : factory->NewStringFromStaticChars("<huge register space test>");
1424 6 : Handle<Object> code_object = m.GetCode(source);
1425 : Handle<Code> code = Handle<Code>::cast(code_object);
1426 :
1427 : // String long enough for test (content doesn't matter).
1428 6 : Handle<String> input = factory->NewStringFromStaticChars("sample text");
1429 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1430 6 : Address start_adr = seq_input->GetCharsAddress();
1431 :
1432 : int captures[2];
1433 : NativeRegExpMacroAssembler::Result result =
1434 : Execute(*code,
1435 : *input,
1436 : 0,
1437 : start_adr,
1438 : start_adr + input->length(),
1439 12 : captures);
1440 :
1441 6 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1442 6 : CHECK_EQ(0, captures[0]);
1443 6 : CHECK_EQ(42, captures[1]);
1444 :
1445 : isolate->clear_pending_exception();
1446 6 : }
1447 :
1448 : #else // V8_INTERPRETED_REGEXP
1449 :
1450 : TEST(MacroAssembler) {
1451 : byte codes[1024];
1452 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1453 : RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024),
1454 : &zone);
1455 : // ^f(o)o.
1456 : Label start, fail, backtrack;
1457 :
1458 : m.SetRegister(4, 42);
1459 : m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1460 : m.AdvanceRegister(4, 42);
1461 : m.GoTo(&start);
1462 : m.Fail();
1463 : m.Bind(&start);
1464 : m.PushBacktrack(&fail);
1465 : m.CheckNotAtStart(0, nullptr);
1466 : m.LoadCurrentCharacter(0, nullptr);
1467 : m.CheckNotCharacter('f', nullptr);
1468 : m.LoadCurrentCharacter(1, nullptr);
1469 : m.CheckNotCharacter('o', nullptr);
1470 : m.LoadCurrentCharacter(2, nullptr);
1471 : m.CheckNotCharacter('o', nullptr);
1472 : m.WriteCurrentPositionToRegister(0, 0);
1473 : m.WriteCurrentPositionToRegister(1, 3);
1474 : m.WriteCurrentPositionToRegister(2, 1);
1475 : m.WriteCurrentPositionToRegister(3, 2);
1476 : m.AdvanceCurrentPosition(3);
1477 : m.PushBacktrack(&backtrack);
1478 : m.Succeed();
1479 : m.Bind(&backtrack);
1480 : m.ClearRegisters(2, 3);
1481 : m.Backtrack();
1482 : m.Bind(&fail);
1483 : m.PopRegister(0);
1484 : m.Fail();
1485 :
1486 : Isolate* isolate = CcTest::i_isolate();
1487 : Factory* factory = isolate->factory();
1488 : HandleScope scope(isolate);
1489 :
1490 : Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
1491 : Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1492 : int captures[5];
1493 :
1494 : const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1495 : Handle<String> f1_16 = factory->NewStringFromTwoByte(
1496 : Vector<const uc16>(str1, 6)).ToHandleChecked();
1497 :
1498 : CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
1499 : CHECK_EQ(0, captures[0]);
1500 : CHECK_EQ(3, captures[1]);
1501 : CHECK_EQ(1, captures[2]);
1502 : CHECK_EQ(2, captures[3]);
1503 : CHECK_EQ(84, captures[4]);
1504 :
1505 : const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1506 : Handle<String> f2_16 = factory->NewStringFromTwoByte(
1507 : Vector<const uc16>(str2, 6)).ToHandleChecked();
1508 :
1509 : CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
1510 : CHECK_EQ(42, captures[0]);
1511 : }
1512 :
1513 : #endif // V8_INTERPRETED_REGEXP
1514 :
1515 :
1516 23724 : TEST(AddInverseToTable) {
1517 : static const int kLimit = 1000;
1518 : static const int kRangeCount = 16;
1519 66 : for (int t = 0; t < 10; t++) {
1520 60 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1521 : ZoneList<CharacterRange>* ranges =
1522 60 : new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
1523 1020 : for (int i = 0; i < kRangeCount; i++) {
1524 1920 : int from = PseudoRandom(t + 87, i + 25) % kLimit;
1525 1920 : int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1526 960 : if (to > kLimit) to = kLimit;
1527 960 : ranges->Add(CharacterRange::Range(from, to), &zone);
1528 : }
1529 : DispatchTable table(&zone);
1530 : DispatchTableConstructor cons(&table, false, &zone);
1531 : cons.set_choice_index(0);
1532 60 : cons.AddInverse(ranges);
1533 60060 : for (int i = 0; i < kLimit; i++) {
1534 : bool is_on = false;
1535 810738 : for (int j = 0; !is_on && j < kRangeCount; j++)
1536 : is_on = ranges->at(j).Contains(i);
1537 60000 : OutSet* set = table.Get(i);
1538 60000 : CHECK_EQ(is_on, set->Get(0) == false);
1539 : }
1540 60 : }
1541 6 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1542 : ZoneList<CharacterRange>* ranges =
1543 6 : new(&zone) ZoneList<CharacterRange>(1, &zone);
1544 6 : ranges->Add(CharacterRange::Range(0xFFF0, 0xFFFE), &zone);
1545 : DispatchTable table(&zone);
1546 : DispatchTableConstructor cons(&table, false, &zone);
1547 : cons.set_choice_index(0);
1548 6 : cons.AddInverse(ranges);
1549 6 : CHECK(!table.Get(0xFFFE)->Get(0));
1550 12 : CHECK(table.Get(0xFFFF)->Get(0));
1551 6 : }
1552 :
1553 :
1554 12582456 : static uc32 canonicalize(uc32 c) {
1555 : unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1556 12582456 : int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, nullptr);
1557 12582456 : if (count == 0) {
1558 : return c;
1559 : } else {
1560 5994 : CHECK_EQ(1, count);
1561 5994 : return canon[0];
1562 : }
1563 : }
1564 :
1565 :
1566 23724 : TEST(LatinCanonicalize) {
1567 : unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1568 156 : for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
1569 156 : unibrow::uchar upper = lower + ('A' - 'a');
1570 156 : CHECK_EQ(canonicalize(lower), canonicalize(upper));
1571 : unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1572 156 : int length = un_canonicalize.get(lower, '\0', uncanon);
1573 156 : CHECK_EQ(2, length);
1574 156 : CHECK_EQ(upper, uncanon[0]);
1575 156 : CHECK_EQ(lower, uncanon[1]);
1576 : }
1577 12582144 : for (uc32 c = 128; c < (1 << 21); c++)
1578 12582144 : CHECK_GE(canonicalize(c), 128);
1579 : #ifndef V8_INTL_SUPPORT
1580 : unibrow::Mapping<unibrow::ToUppercase> to_upper;
1581 : // Canonicalization is only defined for the Basic Multilingual Plane.
1582 : for (uc32 c = 0; c < (1 << 16); c++) {
1583 : unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1584 : int length = to_upper.get(c, '\0', upper);
1585 : if (length == 0) {
1586 : length = 1;
1587 : upper[0] = c;
1588 : }
1589 : uc32 u = upper[0];
1590 : if (length > 1 || (c >= 128 && u < 128))
1591 : u = c;
1592 : CHECK_EQ(u, canonicalize(c));
1593 : }
1594 : #endif
1595 6 : }
1596 :
1597 :
1598 389196 : static uc32 CanonRangeEnd(uc32 c) {
1599 : unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1600 389196 : int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, nullptr);
1601 389196 : if (count == 0) {
1602 : return c;
1603 : } else {
1604 264 : CHECK_EQ(1, count);
1605 264 : return canon[0];
1606 : }
1607 : }
1608 :
1609 :
1610 23724 : TEST(RangeCanonicalization) {
1611 : // Check that we arrive at the same result when using the basic
1612 : // range canonicalization primitives as when using immediate
1613 : // canonicalization.
1614 : unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1615 : int block_start = 0;
1616 389202 : while (block_start <= 0xFFFF) {
1617 389196 : uc32 block_end = CanonRangeEnd(block_start);
1618 389196 : unsigned block_length = block_end - block_start + 1;
1619 389196 : if (block_length > 1) {
1620 : unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1621 264 : int first_length = un_canonicalize.get(block_start, '\0', first);
1622 4284 : for (unsigned i = 1; i < block_length; i++) {
1623 : unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1624 4020 : int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1625 4020 : CHECK_EQ(first_length, succ_length);
1626 8040 : for (int j = 0; j < succ_length; j++) {
1627 8040 : int calc = first[j] + i;
1628 8040 : int found = succ[j];
1629 8040 : CHECK_EQ(calc, found);
1630 : }
1631 : }
1632 : }
1633 389196 : block_start = block_start + block_length;
1634 : }
1635 6 : }
1636 :
1637 :
1638 23724 : TEST(UncanonicalizeEquivalence) {
1639 : unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1640 : unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1641 393216 : for (int i = 0; i < (1 << 16); i++) {
1642 393216 : int length = un_canonicalize.get(i, '\0', chars);
1643 417300 : for (int j = 0; j < length; j++) {
1644 : unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1645 24084 : int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1646 24084 : CHECK_EQ(length, length2);
1647 49116 : for (int k = 0; k < length; k++)
1648 49116 : CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1649 : }
1650 : }
1651 6 : }
1652 :
1653 :
1654 66 : static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
1655 : Vector<CharacterRange> expected) {
1656 66 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1657 66 : int count = expected.length();
1658 66 : ZoneList<CharacterRange>* list =
1659 66 : new(&zone) ZoneList<CharacterRange>(count, &zone);
1660 66 : list->Add(input, &zone);
1661 66 : CharacterRange::AddCaseEquivalents(isolate, &zone, list, false);
1662 66 : list->Remove(0); // Remove the input before checking results.
1663 66 : CHECK_EQ(count, list->length());
1664 66 : for (int i = 0; i < list->length(); i++) {
1665 132 : CHECK_EQ(expected[i].from(), list->at(i).from());
1666 66 : CHECK_EQ(expected[i].to(), list->at(i).to());
1667 66 : }
1668 66 : }
1669 :
1670 :
1671 66 : static void TestSimpleRangeCaseIndependence(Isolate* isolate,
1672 : CharacterRange input,
1673 : CharacterRange expected) {
1674 : EmbeddedVector<CharacterRange, 1> vector;
1675 66 : vector[0] = expected;
1676 66 : TestRangeCaseIndependence(isolate, input, vector);
1677 66 : }
1678 :
1679 :
1680 23724 : TEST(CharacterRangeCaseIndependence) {
1681 : Isolate* isolate = CcTest::i_isolate();
1682 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
1683 6 : CharacterRange::Singleton('A'));
1684 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
1685 6 : CharacterRange::Singleton('Z'));
1686 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'),
1687 6 : CharacterRange::Range('A', 'Z'));
1688 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'),
1689 6 : CharacterRange::Range('C', 'F'));
1690 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'),
1691 6 : CharacterRange::Range('A', 'B'));
1692 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'),
1693 6 : CharacterRange::Range('Y', 'Z'));
1694 : TestSimpleRangeCaseIndependence(isolate,
1695 : CharacterRange::Range('a' - 1, 'z' + 1),
1696 6 : CharacterRange::Range('A', 'Z'));
1697 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'),
1698 6 : CharacterRange::Range('a', 'z'));
1699 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'),
1700 6 : CharacterRange::Range('c', 'f'));
1701 : TestSimpleRangeCaseIndependence(isolate,
1702 : CharacterRange::Range('A' - 1, 'Z' + 1),
1703 6 : CharacterRange::Range('a', 'z'));
1704 : // Here we need to add [l-z] to complete the case independence of
1705 : // [A-Za-z] but we expect [a-z] to be added since we always add a
1706 : // whole block at a time.
1707 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'),
1708 6 : CharacterRange::Range('a', 'z'));
1709 6 : }
1710 :
1711 :
1712 80166630 : static bool InClass(uc32 c, ZoneList<CharacterRange>* ranges) {
1713 26738592 : if (ranges == nullptr) return false;
1714 80117484 : for (int i = 0; i < ranges->length(); i++) {
1715 33374094 : CharacterRange range = ranges->at(i);
1716 33374094 : if (range.from() <= c && c <= range.to())
1717 : return true;
1718 : }
1719 : return false;
1720 : }
1721 :
1722 :
1723 23724 : TEST(UnicodeRangeSplitter) {
1724 6 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1725 : ZoneList<CharacterRange>* base =
1726 6 : new(&zone) ZoneList<CharacterRange>(1, &zone);
1727 6 : base->Add(CharacterRange::Everything(), &zone);
1728 6 : UnicodeRangeSplitter splitter(&zone, base);
1729 : // BMP
1730 331782 : for (uc32 c = 0; c < 0xd800; c++) {
1731 331776 : CHECK(InClass(c, splitter.bmp()));
1732 331776 : CHECK(!InClass(c, splitter.lead_surrogates()));
1733 331776 : CHECK(!InClass(c, splitter.trail_surrogates()));
1734 331776 : CHECK(!InClass(c, splitter.non_bmp()));
1735 : }
1736 : // Lead surrogates
1737 6138 : for (uc32 c = 0xd800; c < 0xdbff; c++) {
1738 6138 : CHECK(!InClass(c, splitter.bmp()));
1739 6138 : CHECK(InClass(c, splitter.lead_surrogates()));
1740 6138 : CHECK(!InClass(c, splitter.trail_surrogates()));
1741 6138 : CHECK(!InClass(c, splitter.non_bmp()));
1742 : }
1743 : // Trail surrogates
1744 6138 : for (uc32 c = 0xdc00; c < 0xdfff; c++) {
1745 6138 : CHECK(!InClass(c, splitter.bmp()));
1746 6138 : CHECK(!InClass(c, splitter.lead_surrogates()));
1747 6138 : CHECK(InClass(c, splitter.trail_surrogates()));
1748 6138 : CHECK(!InClass(c, splitter.non_bmp()));
1749 : }
1750 : // BMP
1751 49146 : for (uc32 c = 0xe000; c < 0xffff; c++) {
1752 49146 : CHECK(InClass(c, splitter.bmp()));
1753 49146 : CHECK(!InClass(c, splitter.lead_surrogates()));
1754 49146 : CHECK(!InClass(c, splitter.trail_surrogates()));
1755 49146 : CHECK(!InClass(c, splitter.non_bmp()));
1756 : }
1757 : // Non-BMP
1758 6291450 : for (uc32 c = 0x10000; c < 0x10ffff; c++) {
1759 6291450 : CHECK(!InClass(c, splitter.bmp()));
1760 6291450 : CHECK(!InClass(c, splitter.lead_surrogates()));
1761 6291450 : CHECK(!InClass(c, splitter.trail_surrogates()));
1762 6291450 : CHECK(InClass(c, splitter.non_bmp()));
1763 6 : }
1764 6 : }
1765 :
1766 :
1767 23724 : TEST(CanonicalizeCharacterSets) {
1768 6 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1769 24 : ZoneList<CharacterRange>* list =
1770 6 : new(&zone) ZoneList<CharacterRange>(4, &zone);
1771 : CharacterSet set(list);
1772 :
1773 6 : list->Add(CharacterRange::Range(10, 20), &zone);
1774 6 : list->Add(CharacterRange::Range(30, 40), &zone);
1775 6 : list->Add(CharacterRange::Range(50, 60), &zone);
1776 6 : set.Canonicalize();
1777 6 : CHECK_EQ(3, list->length());
1778 6 : CHECK_EQ(10, list->at(0).from());
1779 6 : CHECK_EQ(20, list->at(0).to());
1780 6 : CHECK_EQ(30, list->at(1).from());
1781 6 : CHECK_EQ(40, list->at(1).to());
1782 6 : CHECK_EQ(50, list->at(2).from());
1783 6 : CHECK_EQ(60, list->at(2).to());
1784 :
1785 : list->Rewind(0);
1786 6 : list->Add(CharacterRange::Range(10, 20), &zone);
1787 6 : list->Add(CharacterRange::Range(50, 60), &zone);
1788 6 : list->Add(CharacterRange::Range(30, 40), &zone);
1789 6 : set.Canonicalize();
1790 6 : CHECK_EQ(3, list->length());
1791 6 : CHECK_EQ(10, list->at(0).from());
1792 6 : CHECK_EQ(20, list->at(0).to());
1793 6 : CHECK_EQ(30, list->at(1).from());
1794 6 : CHECK_EQ(40, list->at(1).to());
1795 6 : CHECK_EQ(50, list->at(2).from());
1796 6 : CHECK_EQ(60, list->at(2).to());
1797 :
1798 : list->Rewind(0);
1799 6 : list->Add(CharacterRange::Range(30, 40), &zone);
1800 6 : list->Add(CharacterRange::Range(10, 20), &zone);
1801 6 : list->Add(CharacterRange::Range(25, 25), &zone);
1802 6 : list->Add(CharacterRange::Range(100, 100), &zone);
1803 6 : list->Add(CharacterRange::Range(1, 1), &zone);
1804 6 : set.Canonicalize();
1805 6 : CHECK_EQ(5, list->length());
1806 6 : CHECK_EQ(1, list->at(0).from());
1807 6 : CHECK_EQ(1, list->at(0).to());
1808 6 : CHECK_EQ(10, list->at(1).from());
1809 6 : CHECK_EQ(20, list->at(1).to());
1810 6 : CHECK_EQ(25, list->at(2).from());
1811 6 : CHECK_EQ(25, list->at(2).to());
1812 6 : CHECK_EQ(30, list->at(3).from());
1813 6 : CHECK_EQ(40, list->at(3).to());
1814 6 : CHECK_EQ(100, list->at(4).from());
1815 6 : CHECK_EQ(100, list->at(4).to());
1816 :
1817 : list->Rewind(0);
1818 6 : list->Add(CharacterRange::Range(10, 19), &zone);
1819 6 : list->Add(CharacterRange::Range(21, 30), &zone);
1820 6 : list->Add(CharacterRange::Range(20, 20), &zone);
1821 6 : set.Canonicalize();
1822 6 : CHECK_EQ(1, list->length());
1823 6 : CHECK_EQ(10, list->at(0).from());
1824 6 : CHECK_EQ(30, list->at(0).to());
1825 6 : }
1826 :
1827 :
1828 23724 : TEST(CharacterRangeMerge) {
1829 6 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1830 6 : ZoneList<CharacterRange> l1(4, &zone);
1831 6 : ZoneList<CharacterRange> l2(4, &zone);
1832 : // Create all combinations of intersections of ranges, both singletons and
1833 : // longer.
1834 :
1835 : int offset = 0;
1836 :
1837 : // The five kinds of singleton intersections:
1838 : // X
1839 : // Y - outside before
1840 : // Y - outside touching start
1841 : // Y - overlap
1842 : // Y - outside touching end
1843 : // Y - outside after
1844 :
1845 36 : for (int i = 0; i < 5; i++) {
1846 30 : l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1847 30 : l2.Add(CharacterRange::Singleton(offset + i), &zone);
1848 30 : offset += 6;
1849 : }
1850 :
1851 : // The seven kinds of singleton/non-singleton intersections:
1852 : // XXX
1853 : // Y - outside before
1854 : // Y - outside touching start
1855 : // Y - inside touching start
1856 : // Y - entirely inside
1857 : // Y - inside touching end
1858 : // Y - outside touching end
1859 : // Y - disjoint after
1860 :
1861 42 : for (int i = 0; i < 7; i++) {
1862 42 : l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1863 42 : l2.Add(CharacterRange::Singleton(offset + i), &zone);
1864 42 : offset += 8;
1865 : }
1866 :
1867 : // The eleven kinds of non-singleton intersections:
1868 : //
1869 : // XXXXXXXX
1870 : // YYYY - outside before.
1871 : // YYYY - outside touching start.
1872 : // YYYY - overlapping start
1873 : // YYYY - inside touching start
1874 : // YYYY - entirely inside
1875 : // YYYY - inside touching end
1876 : // YYYY - overlapping end
1877 : // YYYY - outside touching end
1878 : // YYYY - outside after
1879 : // YYYYYYYY - identical
1880 : // YYYYYYYYYYYY - containing entirely.
1881 :
1882 54 : for (int i = 0; i < 9; i++) {
1883 54 : l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8.
1884 54 : l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
1885 54 : offset += 22;
1886 : }
1887 6 : l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1888 6 : l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1889 : offset += 22;
1890 6 : l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1891 6 : l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
1892 6 : offset += 22;
1893 :
1894 : // Different kinds of multi-range overlap:
1895 : // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1896 : // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1897 :
1898 6 : l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1899 6 : l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
1900 42 : for (int i = 0; i < 6; i++) {
1901 36 : l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1902 36 : l2.Add(CharacterRange::Singleton(offset + 8), &zone);
1903 36 : offset += 9;
1904 : }
1905 :
1906 6 : CHECK(CharacterRange::IsCanonical(&l1));
1907 6 : CHECK(CharacterRange::IsCanonical(&l2));
1908 :
1909 6 : ZoneList<CharacterRange> first_only(4, &zone);
1910 6 : ZoneList<CharacterRange> second_only(4, &zone);
1911 6 : ZoneList<CharacterRange> both(4, &zone);
1912 6 : }
1913 :
1914 :
1915 23724 : TEST(Graph) {
1916 6 : Execute("\\b\\w+\\b", false, true, true);
1917 6 : }
1918 :
1919 :
1920 : namespace {
1921 :
1922 : int* global_use_counts = nullptr;
1923 :
1924 84 : void MockUseCounterCallback(v8::Isolate* isolate,
1925 : v8::Isolate::UseCounterFeature feature) {
1926 84 : ++global_use_counts[feature];
1927 84 : }
1928 : }
1929 :
1930 :
1931 : // Test that ES2015 RegExp compatibility fixes are in place, that they
1932 : // are not overly broad, and the appropriate UseCounters are incremented
1933 23724 : TEST(UseCountRegExp) {
1934 6 : v8::Isolate* isolate = CcTest::isolate();
1935 6 : v8::HandleScope scope(isolate);
1936 12 : LocalContext env;
1937 6 : int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
1938 6 : global_use_counts = use_counts;
1939 6 : CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
1940 :
1941 : // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
1942 : v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
1943 6 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1944 6 : CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1945 6 : CHECK(resultSticky->IsUndefined());
1946 :
1947 : // re.sticky has approriate value and doesn't touch UseCounter
1948 : v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
1949 6 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1950 6 : CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1951 6 : CHECK(resultReSticky->IsFalse());
1952 :
1953 : // When the getter is caleld on another object, throw an exception
1954 : // and don't increment the UseCounter
1955 : v8::Local<v8::Value> resultStickyError = CompileRun(
1956 : "var exception;"
1957 : "try { "
1958 : " Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
1959 : " .get.call(null);"
1960 : "} catch (e) {"
1961 : " exception = e;"
1962 : "}"
1963 : "exception");
1964 6 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1965 6 : CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1966 6 : CHECK(resultStickyError->IsObject());
1967 :
1968 : // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
1969 : // a UseCounter is incremented to track it.
1970 : v8::Local<v8::Value> resultToString =
1971 : CompileRun("RegExp.prototype.toString().length");
1972 6 : CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1973 6 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1974 6 : CHECK(resultToString->IsInt32());
1975 12 : CHECK_EQ(6,
1976 : resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1977 :
1978 : // .toString() works on normal RegExps
1979 : v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
1980 6 : CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1981 6 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1982 6 : CHECK(resultReToString->IsInt32());
1983 12 : CHECK_EQ(
1984 : 3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1985 :
1986 : // .toString() throws on non-RegExps that aren't RegExp.prototype
1987 : v8::Local<v8::Value> resultToStringError = CompileRun(
1988 : "var exception;"
1989 : "try { RegExp.prototype.toString.call(null) }"
1990 : "catch (e) { exception = e; }"
1991 : "exception");
1992 6 : CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1993 6 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1994 12 : CHECK(resultToStringError->IsObject());
1995 6 : }
1996 :
1997 18 : class UncachedExternalString
1998 : : public v8::String::ExternalOneByteStringResource {
1999 : public:
2000 30 : const char* data() const override { return "abcdefghijklmnopqrstuvwxyz"; }
2001 18 : size_t length() const override { return 26; }
2002 6 : bool IsCompressible() const override { return true; }
2003 : };
2004 :
2005 23724 : TEST(UncachedExternalString) {
2006 6 : v8::Isolate* isolate = CcTest::isolate();
2007 6 : v8::HandleScope scope(isolate);
2008 12 : LocalContext env;
2009 : v8::Local<v8::String> external =
2010 6 : v8::String::NewExternalOneByte(isolate, new UncachedExternalString())
2011 6 : .ToLocalChecked();
2012 6 : CHECK(v8::Utils::OpenHandle(*external)->map() ==
2013 : CcTest::i_isolate()->heap()->short_external_one_byte_string_map());
2014 6 : v8::Local<v8::Object> global = env->Global();
2015 24 : global->Set(env.local(), v8_str("external"), external).FromJust();
2016 : CompileRun("var re = /y(.)/; re.test('ab');");
2017 12 : ExpectString("external.substring(1).match(re)[1]", "z");
2018 6 : }
2019 :
2020 : } // namespace test_regexp
2021 : } // namespace internal
2022 71154 : } // namespace v8
|