Line data Source code
1 : // Copyright 2012 the V8 project authors. All rights reserved.
2 : // Redistribution and use in source and binary forms, with or without
3 : // modification, are permitted provided that the following conditions are
4 : // met:
5 : //
6 : // * Redistributions of source code must retain the above copyright
7 : // notice, this list of conditions and the following disclaimer.
8 : // * Redistributions in binary form must reproduce the above
9 : // copyright notice, this list of conditions and the following
10 : // disclaimer in the documentation and/or other materials provided
11 : // with the distribution.
12 : // * Neither the name of Google Inc. nor the names of its
13 : // contributors may be used to endorse or promote products derived
14 : // from this software without specific prior written permission.
15 : //
16 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 :
28 : #include <cstdlib>
29 : #include <memory>
30 : #include <sstream>
31 :
32 : #include "include/v8.h"
33 : #include "src/api-inl.h"
34 : #include "src/assembler-arch.h"
35 : #include "src/ast/ast.h"
36 : #include "src/char-predicates-inl.h"
37 : #include "src/objects-inl.h"
38 : #include "src/ostreams.h"
39 : #include "src/regexp/jsregexp.h"
40 : #include "src/regexp/regexp-macro-assembler-irregexp.h"
41 : #include "src/regexp/regexp-macro-assembler.h"
42 : #include "src/regexp/regexp-parser.h"
43 : #include "src/splay-tree-inl.h"
44 : #include "src/string-stream.h"
45 : #include "src/unicode-inl.h"
46 : #include "src/v8.h"
47 :
48 : #ifdef V8_INTERPRETED_REGEXP
49 : #include "src/regexp/interpreter-irregexp.h"
50 : #else // V8_INTERPRETED_REGEXP
51 : #include "src/macro-assembler.h"
52 : #if V8_TARGET_ARCH_ARM
53 : #include "src/regexp/arm/regexp-macro-assembler-arm.h"
54 : #endif
55 : #if V8_TARGET_ARCH_ARM64
56 : #include "src/regexp/arm64/regexp-macro-assembler-arm64.h"
57 : #endif
58 : #if V8_TARGET_ARCH_S390
59 : #include "src/regexp/s390/regexp-macro-assembler-s390.h"
60 : #endif
61 : #if V8_TARGET_ARCH_PPC
62 : #include "src/regexp/ppc/regexp-macro-assembler-ppc.h"
63 : #endif
64 : #if V8_TARGET_ARCH_MIPS
65 : #include "src/regexp/mips/regexp-macro-assembler-mips.h"
66 : #endif
67 : #if V8_TARGET_ARCH_MIPS64
68 : #include "src/regexp/mips64/regexp-macro-assembler-mips64.h"
69 : #endif
70 : #if V8_TARGET_ARCH_X64
71 : #include "src/regexp/x64/regexp-macro-assembler-x64.h"
72 : #endif
73 : #if V8_TARGET_ARCH_IA32
74 : #include "src/regexp/ia32/regexp-macro-assembler-ia32.h"
75 : #endif
76 : #endif // V8_INTERPRETED_REGEXP
77 : #include "test/cctest/cctest.h"
78 :
79 : namespace v8 {
80 : namespace internal {
81 : namespace test_regexp {
82 :
83 5 : static bool CheckParse(const char* input) {
84 5 : v8::HandleScope scope(CcTest::isolate());
85 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
86 5 : FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
87 : RegExpCompileData result;
88 : return v8::internal::RegExpParser::ParseRegExp(
89 10 : CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result);
90 : }
91 :
92 :
93 765 : static void CheckParseEq(const char* input, const char* expected,
94 : bool unicode = false) {
95 765 : v8::HandleScope scope(CcTest::isolate());
96 1530 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
97 765 : FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
98 : RegExpCompileData result;
99 : JSRegExp::Flags flags = JSRegExp::kNone;
100 765 : if (unicode) flags |= JSRegExp::kUnicode;
101 765 : CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
102 : &reader, flags, &result));
103 765 : CHECK_NOT_NULL(result.tree);
104 765 : CHECK(result.error.is_null());
105 1530 : std::ostringstream os;
106 765 : result.tree->Print(os, &zone);
107 1530 : if (strcmp(expected, os.str().c_str()) != 0) {
108 0 : printf("%s | %s\n", expected, os.str().c_str());
109 : }
110 2295 : CHECK_EQ(0, strcmp(expected, os.str().c_str()));
111 765 : }
112 :
113 :
114 225 : static bool CheckSimple(const char* input) {
115 225 : v8::HandleScope scope(CcTest::isolate());
116 450 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
117 225 : FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
118 : RegExpCompileData result;
119 225 : CHECK(v8::internal::RegExpParser::ParseRegExp(
120 : CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
121 225 : CHECK_NOT_NULL(result.tree);
122 225 : CHECK(result.error.is_null());
123 450 : return result.simple;
124 : }
125 :
126 : struct MinMaxPair {
127 : int min_match;
128 : int max_match;
129 : };
130 :
131 :
132 240 : static MinMaxPair CheckMinMaxMatch(const char* input) {
133 240 : v8::HandleScope scope(CcTest::isolate());
134 480 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
135 240 : FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
136 : RegExpCompileData result;
137 240 : CHECK(v8::internal::RegExpParser::ParseRegExp(
138 : CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
139 240 : CHECK_NOT_NULL(result.tree);
140 240 : CHECK(result.error.is_null());
141 240 : int min_match = result.tree->min_match();
142 240 : int max_match = result.tree->max_match();
143 : MinMaxPair pair = { min_match, max_match };
144 480 : return pair;
145 : }
146 :
147 :
148 : #define CHECK_PARSE_ERROR(input) CHECK(!CheckParse(input))
149 : #define CHECK_SIMPLE(input, simple) CHECK_EQ(simple, CheckSimple(input));
150 : #define CHECK_MIN_MAX(input, min, max) \
151 : { MinMaxPair min_max = CheckMinMaxMatch(input); \
152 : CHECK_EQ(min, min_max.min_match); \
153 : CHECK_EQ(max, min_max.max_match); \
154 : }
155 :
156 28342 : TEST(RegExpParser) {
157 5 : CHECK_PARSE_ERROR("?");
158 :
159 5 : CheckParseEq("abc", "'abc'");
160 5 : CheckParseEq("", "%");
161 5 : CheckParseEq("abc|def", "(| 'abc' 'def')");
162 5 : CheckParseEq("abc|def|ghi", "(| 'abc' 'def' 'ghi')");
163 5 : CheckParseEq("^xxx$", "(: @^i 'xxx' @$i)");
164 5 : CheckParseEq("ab\\b\\d\\bcd", "(: 'ab' @b [0-9] @b 'cd')");
165 5 : CheckParseEq("\\w|\\d", "(| [0-9 A-Z _ a-z] [0-9])");
166 5 : CheckParseEq("a*", "(# 0 - g 'a')");
167 5 : CheckParseEq("a*?", "(# 0 - n 'a')");
168 5 : CheckParseEq("abc+", "(: 'ab' (# 1 - g 'c'))");
169 5 : CheckParseEq("abc+?", "(: 'ab' (# 1 - n 'c'))");
170 5 : CheckParseEq("xyz?", "(: 'xy' (# 0 1 g 'z'))");
171 5 : CheckParseEq("xyz??", "(: 'xy' (# 0 1 n 'z'))");
172 5 : CheckParseEq("xyz{0,1}", "(: 'xy' (# 0 1 g 'z'))");
173 5 : CheckParseEq("xyz{0,1}?", "(: 'xy' (# 0 1 n 'z'))");
174 5 : CheckParseEq("xyz{93}", "(: 'xy' (# 93 93 g 'z'))");
175 5 : CheckParseEq("xyz{93}?", "(: 'xy' (# 93 93 n 'z'))");
176 5 : CheckParseEq("xyz{1,32}", "(: 'xy' (# 1 32 g 'z'))");
177 5 : CheckParseEq("xyz{1,32}?", "(: 'xy' (# 1 32 n 'z'))");
178 5 : CheckParseEq("xyz{1,}", "(: 'xy' (# 1 - g 'z'))");
179 5 : CheckParseEq("xyz{1,}?", "(: 'xy' (# 1 - n 'z'))");
180 5 : CheckParseEq("a\\fb\\nc\\rd\\te\\vf", "'a\\x0cb\\x0ac\\x0dd\\x09e\\x0bf'");
181 5 : CheckParseEq("a\\nb\\bc", "(: 'a\\x0ab' @b 'c')");
182 5 : CheckParseEq("(?:foo)", "(?: 'foo')");
183 5 : CheckParseEq("(?: foo )", "(?: ' foo ')");
184 5 : CheckParseEq("(foo|bar|baz)", "(^ (| 'foo' 'bar' 'baz'))");
185 5 : CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
186 5 : CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
187 5 : CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
188 5 : CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
189 5 : CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
190 5 : CheckParseEq("()", "(^ %)");
191 5 : CheckParseEq("(?=)", "(-> + %)");
192 5 : CheckParseEq("[]", "^[\\x00-\\u{10ffff}]"); // Doesn't compile on windows
193 5 : CheckParseEq("[^]", "[\\x00-\\u{10ffff}]"); // \uffff isn't in codepage 1252
194 5 : CheckParseEq("[x]", "[x]");
195 5 : CheckParseEq("[xyz]", "[x y z]");
196 5 : CheckParseEq("[a-zA-Z0-9]", "[a-z A-Z 0-9]");
197 5 : CheckParseEq("[-123]", "[- 1 2 3]");
198 5 : CheckParseEq("[^123]", "^[1 2 3]");
199 5 : CheckParseEq("]", "']'");
200 5 : CheckParseEq("}", "'}'");
201 5 : CheckParseEq("[a-b-c]", "[a-b - c]");
202 5 : CheckParseEq("[\\d]", "[0-9]");
203 5 : CheckParseEq("[x\\dz]", "[x 0-9 z]");
204 5 : CheckParseEq("[\\d-z]", "[0-9 - z]");
205 5 : CheckParseEq("[\\d-\\d]", "[0-9 0-9 -]");
206 5 : CheckParseEq("[z-\\d]", "[0-9 z -]");
207 : // Control character outside character class.
208 5 : CheckParseEq("\\cj\\cJ\\ci\\cI\\ck\\cK", "'\\x0a\\x0a\\x09\\x09\\x0b\\x0b'");
209 5 : CheckParseEq("\\c!", "'\\c!'");
210 5 : CheckParseEq("\\c_", "'\\c_'");
211 5 : CheckParseEq("\\c~", "'\\c~'");
212 5 : CheckParseEq("\\c1", "'\\c1'");
213 : // Control character inside character class.
214 5 : CheckParseEq("[\\c!]", "[\\ c !]");
215 5 : CheckParseEq("[\\c_]", "[\\x1f]");
216 5 : CheckParseEq("[\\c~]", "[\\ c ~]");
217 5 : CheckParseEq("[\\ca]", "[\\x01]");
218 5 : CheckParseEq("[\\cz]", "[\\x1a]");
219 5 : CheckParseEq("[\\cA]", "[\\x01]");
220 5 : CheckParseEq("[\\cZ]", "[\\x1a]");
221 5 : CheckParseEq("[\\c1]", "[\\x11]");
222 :
223 5 : CheckParseEq("[a\\]c]", "[a ] c]");
224 5 : CheckParseEq("\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ", "'[]{}()%^# '");
225 5 : CheckParseEq("[\\[\\]\\{\\}\\(\\)\\%\\^\\#\\ ]", "[[ ] { } ( ) % ^ # ]");
226 5 : CheckParseEq("\\0", "'\\x00'");
227 5 : CheckParseEq("\\8", "'8'");
228 5 : CheckParseEq("\\9", "'9'");
229 5 : CheckParseEq("\\11", "'\\x09'");
230 5 : CheckParseEq("\\11a", "'\\x09a'");
231 5 : CheckParseEq("\\011", "'\\x09'");
232 5 : CheckParseEq("\\00011", "'\\x0011'");
233 5 : CheckParseEq("\\118", "'\\x098'");
234 5 : CheckParseEq("\\111", "'I'");
235 5 : CheckParseEq("\\1111", "'I1'");
236 5 : CheckParseEq("(x)(x)(x)\\1", "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))");
237 5 : CheckParseEq("(x)(x)(x)\\2", "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))");
238 5 : CheckParseEq("(x)(x)(x)\\3", "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))");
239 5 : CheckParseEq("(x)(x)(x)\\4", "(: (^ 'x') (^ 'x') (^ 'x') '\\x04')");
240 : CheckParseEq("(x)(x)(x)\\1*",
241 : "(: (^ 'x') (^ 'x') (^ 'x')"
242 5 : " (# 0 - g (<- 1)))");
243 : CheckParseEq("(x)(x)(x)\\2*",
244 : "(: (^ 'x') (^ 'x') (^ 'x')"
245 5 : " (# 0 - g (<- 2)))");
246 : CheckParseEq("(x)(x)(x)\\3*",
247 : "(: (^ 'x') (^ 'x') (^ 'x')"
248 5 : " (# 0 - g (<- 3)))");
249 : CheckParseEq("(x)(x)(x)\\4*",
250 : "(: (^ 'x') (^ 'x') (^ 'x')"
251 5 : " (# 0 - g '\\x04'))");
252 : CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\10",
253 : "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
254 5 : " (^ 'x') (^ 'x') (^ 'x') (^ 'x') (<- 10))");
255 : CheckParseEq("(x)(x)(x)(x)(x)(x)(x)(x)(x)(x)\\11",
256 : "(: (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x') (^ 'x')"
257 5 : " (^ 'x') (^ 'x') (^ 'x') (^ 'x') '\\x09')");
258 5 : CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
259 5 : CheckParseEq("(a\\1)", "(^ 'a')");
260 5 : CheckParseEq("(\\1a)", "(^ 'a')");
261 5 : CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
262 5 : CheckParseEq("(?=a)?a", "'a'");
263 5 : CheckParseEq("(?=a){0,10}a", "'a'");
264 5 : CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
265 5 : CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
266 5 : CheckParseEq("(?!a)?a", "'a'");
267 5 : CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
268 5 : CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
269 : CheckParseEq("(?!\\1(a\\1)\\1)\\1",
270 5 : "(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
271 : CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
272 5 : "(: (<- 1) (<- 2) (^ (: 'a' (?: (^ 'b')) (<- 2))) (<- 1))");
273 : CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
274 5 : "(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
275 5 : CheckParseEq("[\\0]", "[\\x00]");
276 5 : CheckParseEq("[\\11]", "[\\x09]");
277 5 : CheckParseEq("[\\11a]", "[\\x09 a]");
278 5 : CheckParseEq("[\\011]", "[\\x09]");
279 5 : CheckParseEq("[\\00011]", "[\\x00 1 1]");
280 5 : CheckParseEq("[\\118]", "[\\x09 8]");
281 5 : CheckParseEq("[\\111]", "[I]");
282 5 : CheckParseEq("[\\1111]", "[I 1]");
283 5 : CheckParseEq("\\x34", "'\x34'");
284 5 : CheckParseEq("\\x60", "'\x60'");
285 5 : CheckParseEq("\\x3z", "'x3z'");
286 5 : CheckParseEq("\\c", "'\\c'");
287 5 : CheckParseEq("\\u0034", "'\x34'");
288 5 : CheckParseEq("\\u003z", "'u003z'");
289 5 : CheckParseEq("foo[z]*", "(: 'foo' (# 0 - g [z]))");
290 5 : CheckParseEq("^^^$$$\\b\\b\\b\\b", "(: @^i @$i @b)");
291 5 : CheckParseEq("\\b\\b\\b\\b\\B\\B\\B\\B\\b\\b\\b\\b", "(: @b @B @b)");
292 5 : CheckParseEq("\\b\\B\\b", "(: @b @B @b)");
293 :
294 : // Unicode regexps
295 5 : CheckParseEq("\\u{12345}", "'\\ud808\\udf45'", true);
296 : CheckParseEq("\\u{12345}\\u{23456}", "(! '\\ud808\\udf45' '\\ud84d\\udc56')",
297 5 : true);
298 : CheckParseEq("\\u{12345}|\\u{23456}", "(| '\\ud808\\udf45' '\\ud84d\\udc56')",
299 5 : true);
300 5 : CheckParseEq("\\u{12345}{3}", "(# 3 3 g '\\ud808\\udf45')", true);
301 5 : CheckParseEq("\\u{12345}*", "(# 0 - g '\\ud808\\udf45')", true);
302 :
303 5 : CheckParseEq("\\ud808\\udf45*", "(# 0 - g '\\ud808\\udf45')", true);
304 : CheckParseEq("[\\ud808\\udf45-\\ud809\\udccc]", "[\\u{012345}-\\u{0124cc}]",
305 5 : true);
306 :
307 10 : CHECK_SIMPLE("", false);
308 5 : CHECK_SIMPLE("a", true);
309 10 : CHECK_SIMPLE("a|b", false);
310 10 : CHECK_SIMPLE("a\\n", false);
311 10 : CHECK_SIMPLE("^a", false);
312 10 : CHECK_SIMPLE("a$", false);
313 10 : CHECK_SIMPLE("a\\b!", false);
314 10 : CHECK_SIMPLE("a\\Bb", false);
315 10 : CHECK_SIMPLE("a*", false);
316 10 : CHECK_SIMPLE("a*?", false);
317 10 : CHECK_SIMPLE("a?", false);
318 10 : CHECK_SIMPLE("a??", false);
319 10 : CHECK_SIMPLE("a{0,1}?", false);
320 10 : CHECK_SIMPLE("a{1,1}?", false);
321 10 : CHECK_SIMPLE("a{1,2}?", false);
322 10 : CHECK_SIMPLE("a+?", false);
323 10 : CHECK_SIMPLE("(a)", false);
324 10 : CHECK_SIMPLE("(a)\\1", false);
325 10 : CHECK_SIMPLE("(\\1a)", false);
326 10 : CHECK_SIMPLE("\\1(a)", false);
327 10 : CHECK_SIMPLE("a\\s", false);
328 10 : CHECK_SIMPLE("a\\S", false);
329 10 : CHECK_SIMPLE("a\\d", false);
330 10 : CHECK_SIMPLE("a\\D", false);
331 10 : CHECK_SIMPLE("a\\w", false);
332 10 : CHECK_SIMPLE("a\\W", false);
333 10 : CHECK_SIMPLE("a.", false);
334 10 : CHECK_SIMPLE("a\\q", false);
335 10 : CHECK_SIMPLE("a[a]", false);
336 10 : CHECK_SIMPLE("a[^a]", false);
337 10 : CHECK_SIMPLE("a[a-z]", false);
338 10 : CHECK_SIMPLE("a[\\q]", false);
339 10 : CHECK_SIMPLE("a(?:b)", false);
340 10 : CHECK_SIMPLE("a(?=b)", false);
341 10 : CHECK_SIMPLE("a(?!b)", false);
342 10 : CHECK_SIMPLE("\\x60", false);
343 10 : CHECK_SIMPLE("\\u0060", false);
344 10 : CHECK_SIMPLE("\\cA", false);
345 10 : CHECK_SIMPLE("\\q", false);
346 10 : CHECK_SIMPLE("\\1112", false);
347 10 : CHECK_SIMPLE("\\0", false);
348 10 : CHECK_SIMPLE("(a)\\1", false);
349 10 : CHECK_SIMPLE("(?=a)?a", false);
350 10 : CHECK_SIMPLE("(?!a)?a\\1", false);
351 10 : CHECK_SIMPLE("(?:(?=a))a\\1", false);
352 :
353 5 : CheckParseEq("a{}", "'a{}'");
354 5 : CheckParseEq("a{,}", "'a{,}'");
355 5 : CheckParseEq("a{", "'a{'");
356 5 : CheckParseEq("a{z}", "'a{z}'");
357 5 : CheckParseEq("a{1z}", "'a{1z}'");
358 5 : CheckParseEq("a{12z}", "'a{12z}'");
359 5 : CheckParseEq("a{12,", "'a{12,'");
360 5 : CheckParseEq("a{12,3b", "'a{12,3b'");
361 5 : CheckParseEq("{}", "'{}'");
362 5 : CheckParseEq("{,}", "'{,}'");
363 5 : CheckParseEq("{", "'{'");
364 5 : CheckParseEq("{z}", "'{z}'");
365 5 : CheckParseEq("{1z}", "'{1z}'");
366 5 : CheckParseEq("{12z}", "'{12z}'");
367 5 : CheckParseEq("{12,", "'{12,'");
368 5 : CheckParseEq("{12,3b", "'{12,3b'");
369 :
370 5 : CHECK_MIN_MAX("a", 1, 1);
371 5 : CHECK_MIN_MAX("abc", 3, 3);
372 5 : CHECK_MIN_MAX("a[bc]d", 3, 3);
373 5 : CHECK_MIN_MAX("a|bc", 1, 2);
374 5 : CHECK_MIN_MAX("ab|c", 1, 2);
375 5 : CHECK_MIN_MAX("a||bc", 0, 2);
376 5 : CHECK_MIN_MAX("|", 0, 0);
377 5 : CHECK_MIN_MAX("(?:ab)", 2, 2);
378 5 : CHECK_MIN_MAX("(?:ab|cde)", 2, 3);
379 5 : CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
380 5 : CHECK_MIN_MAX("(ab)", 2, 2);
381 5 : CHECK_MIN_MAX("(ab|cde)", 2, 3);
382 5 : CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
383 5 : CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
384 5 : CHECK_MIN_MAX("(?:ab)?", 0, 2);
385 5 : CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
386 5 : CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
387 5 : CHECK_MIN_MAX("a?", 0, 1);
388 5 : CHECK_MIN_MAX("a*", 0, RegExpTree::kInfinity);
389 5 : CHECK_MIN_MAX("a+", 1, RegExpTree::kInfinity);
390 5 : CHECK_MIN_MAX("a??", 0, 1);
391 5 : CHECK_MIN_MAX("a*?", 0, RegExpTree::kInfinity);
392 5 : CHECK_MIN_MAX("a+?", 1, RegExpTree::kInfinity);
393 5 : CHECK_MIN_MAX("(?:a?)?", 0, 1);
394 5 : CHECK_MIN_MAX("(?:a*)?", 0, RegExpTree::kInfinity);
395 5 : CHECK_MIN_MAX("(?:a+)?", 0, RegExpTree::kInfinity);
396 5 : CHECK_MIN_MAX("(?:a?)+", 0, RegExpTree::kInfinity);
397 5 : CHECK_MIN_MAX("(?:a*)+", 0, RegExpTree::kInfinity);
398 5 : CHECK_MIN_MAX("(?:a+)+", 1, RegExpTree::kInfinity);
399 5 : CHECK_MIN_MAX("(?:a?)*", 0, RegExpTree::kInfinity);
400 5 : CHECK_MIN_MAX("(?:a*)*", 0, RegExpTree::kInfinity);
401 5 : CHECK_MIN_MAX("(?:a+)*", 0, RegExpTree::kInfinity);
402 5 : CHECK_MIN_MAX("a{0}", 0, 0);
403 5 : CHECK_MIN_MAX("(?:a+){0}", 0, 0);
404 5 : CHECK_MIN_MAX("(?:a+){0,0}", 0, 0);
405 5 : CHECK_MIN_MAX("a*b", 1, RegExpTree::kInfinity);
406 5 : CHECK_MIN_MAX("a+b", 2, RegExpTree::kInfinity);
407 5 : CHECK_MIN_MAX("a*b|c", 1, RegExpTree::kInfinity);
408 5 : CHECK_MIN_MAX("a+b|c", 1, RegExpTree::kInfinity);
409 5 : CHECK_MIN_MAX("(?:a{5,1000000}){3,1000000}", 15, RegExpTree::kInfinity);
410 5 : CHECK_MIN_MAX("(?:ab){4,7}", 8, 14);
411 5 : CHECK_MIN_MAX("a\\bc", 2, 2);
412 5 : CHECK_MIN_MAX("a\\Bc", 2, 2);
413 5 : CHECK_MIN_MAX("a\\sc", 3, 3);
414 5 : CHECK_MIN_MAX("a\\Sc", 3, 3);
415 5 : CHECK_MIN_MAX("a(?=b)c", 2, 2);
416 5 : CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
417 5 : CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
418 :
419 : CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<a>",
420 5 : "(: (^ 'x') (^ 'x') (^ 'x') (<- 1))", true);
421 : CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<b>",
422 5 : "(: (^ 'x') (^ 'x') (^ 'x') (<- 2))", true);
423 : CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<c>",
424 5 : "(: (^ 'x') (^ 'x') (^ 'x') (<- 3))", true);
425 5 : CheckParseEq("(?<a>a)\\k<a>", "(: (^ 'a') (<- 1))", true);
426 5 : CheckParseEq("(?<a>a\\k<a>)", "(^ 'a')", true);
427 5 : CheckParseEq("(?<a>\\k<a>a)", "(^ 'a')", true);
428 5 : CheckParseEq("(?<a>\\k<b>)(?<b>\\k<a>)", "(: (^ (<- 2)) (^ (<- 1)))", true);
429 5 : CheckParseEq("\\k<a>(?<a>a)", "(: (<- 1) (^ 'a'))", true);
430 :
431 5 : CheckParseEq("(?<\\u{03C0}>a)", "(^ 'a')", true);
432 5 : CheckParseEq("(?<\\u03C0>a)", "(^ 'a')", true);
433 5 : }
434 :
435 28342 : TEST(ParserRegression) {
436 5 : CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
437 5 : CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
438 5 : CheckParseEq("{", "'{'");
439 5 : CheckParseEq("a|", "(| 'a' %)");
440 5 : }
441 :
442 105 : static void ExpectError(const char* input, const char* expected,
443 : bool unicode = false) {
444 105 : Isolate* isolate = CcTest::i_isolate();
445 :
446 105 : v8::HandleScope scope(CcTest::isolate());
447 210 : Zone zone(isolate->allocator(), ZONE_NAME);
448 105 : FlatStringReader reader(isolate, CStrVector(input));
449 : RegExpCompileData result;
450 : JSRegExp::Flags flags = JSRegExp::kNone;
451 105 : if (unicode) flags |= JSRegExp::kUnicode;
452 105 : CHECK(!v8::internal::RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
453 : &result));
454 105 : CHECK_NULL(result.tree);
455 105 : CHECK(!result.error.is_null());
456 105 : std::unique_ptr<char[]> str = result.error->ToCString(ALLOW_NULLS);
457 210 : CHECK_EQ(0, strcmp(expected, str.get()));
458 105 : }
459 :
460 :
461 28342 : TEST(Errors) {
462 : const char* kEndBackslash = "\\ at end of pattern";
463 5 : ExpectError("\\", kEndBackslash);
464 : const char* kUnterminatedGroup = "Unterminated group";
465 5 : ExpectError("(foo", kUnterminatedGroup);
466 : const char* kInvalidGroup = "Invalid group";
467 5 : ExpectError("(?", kInvalidGroup);
468 : const char* kUnterminatedCharacterClass = "Unterminated character class";
469 5 : ExpectError("[", kUnterminatedCharacterClass);
470 5 : ExpectError("[a-", kUnterminatedCharacterClass);
471 : const char* kNothingToRepeat = "Nothing to repeat";
472 5 : ExpectError("*", kNothingToRepeat);
473 5 : ExpectError("?", kNothingToRepeat);
474 5 : ExpectError("+", kNothingToRepeat);
475 5 : ExpectError("{1}", kNothingToRepeat);
476 5 : ExpectError("{1,2}", kNothingToRepeat);
477 5 : ExpectError("{1,}", kNothingToRepeat);
478 :
479 : // Check that we don't allow more than kMaxCapture captures
480 : const int kMaxCaptures = 1 << 16; // Must match RegExpParser::kMaxCaptures.
481 : const char* kTooManyCaptures = "Too many captures";
482 5 : std::ostringstream os;
483 327690 : for (int i = 0; i <= kMaxCaptures; i++) {
484 327685 : os << "()";
485 : }
486 10 : ExpectError(os.str().c_str(), kTooManyCaptures);
487 :
488 : const char* kInvalidCaptureName = "Invalid capture group name";
489 5 : ExpectError("(?<>.)", kInvalidCaptureName, true);
490 5 : ExpectError("(?<1>.)", kInvalidCaptureName, true);
491 5 : ExpectError("(?<_%>.)", kInvalidCaptureName, true);
492 5 : ExpectError("\\k<a", kInvalidCaptureName, true);
493 : const char* kDuplicateCaptureName = "Duplicate capture group name";
494 5 : ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
495 : const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
496 5 : ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
497 : const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
498 5 : ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
499 5 : ExpectError("(?<b>)\\k<a>", kInvalidCaptureReferenced, true);
500 : const char* kInvalidNamedReference = "Invalid named reference";
501 5 : ExpectError("\\ka", kInvalidNamedReference, true);
502 5 : }
503 :
504 :
505 327680 : static bool IsDigit(uc16 c) {
506 655360 : return ('0' <= c && c <= '9');
507 : }
508 :
509 :
510 327680 : static bool NotDigit(uc16 c) {
511 327680 : return !IsDigit(c);
512 : }
513 :
514 :
515 327680 : static bool IsWhiteSpaceOrLineTerminator(uc16 c) {
516 : // According to ECMA 5.1, 15.10.2.12 the CharacterClassEscape \s includes
517 : // WhiteSpace (7.2) and LineTerminator (7.3) values.
518 983040 : return v8::internal::IsWhiteSpaceOrLineTerminator(c);
519 : }
520 :
521 :
522 327680 : static bool NotWhiteSpaceNorLineTermiantor(uc16 c) {
523 327680 : return !IsWhiteSpaceOrLineTerminator(c);
524 : }
525 :
526 :
527 327680 : static bool NotWord(uc16 c) {
528 327680 : return !IsRegExpWord(c);
529 : }
530 :
531 :
532 35 : static void TestCharacterClassEscapes(uc16 c, bool (pred)(uc16 c)) {
533 35 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
534 12481805 : ZoneList<CharacterRange>* ranges =
535 35 : new(&zone) ZoneList<CharacterRange>(2, &zone);
536 35 : CharacterRange::AddClassEscape(c, ranges, &zone);
537 2293795 : for (uc32 i = 0; i < (1 << 16); i++) {
538 : bool in_class = false;
539 23980550 : for (int j = 0; !in_class && j < ranges->length(); j++) {
540 22412810 : CharacterRange& range = ranges->at(j);
541 22412810 : in_class = (range.from() <= i && i <= range.to());
542 : }
543 2293760 : CHECK_EQ(pred(i), in_class);
544 35 : }
545 35 : }
546 :
547 :
548 28342 : TEST(CharacterClassEscapes) {
549 5 : TestCharacterClassEscapes('.', IsRegExpNewline);
550 5 : TestCharacterClassEscapes('d', IsDigit);
551 5 : TestCharacterClassEscapes('D', NotDigit);
552 5 : TestCharacterClassEscapes('s', IsWhiteSpaceOrLineTerminator);
553 5 : TestCharacterClassEscapes('S', NotWhiteSpaceNorLineTermiantor);
554 5 : TestCharacterClassEscapes('w', IsRegExpWord);
555 5 : TestCharacterClassEscapes('W', NotWord);
556 5 : }
557 :
558 :
559 5 : static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
560 : bool is_one_byte, Zone* zone) {
561 : Isolate* isolate = CcTest::i_isolate();
562 5 : FlatStringReader reader(isolate, CStrVector(input));
563 : RegExpCompileData compile_data;
564 : JSRegExp::Flags flags = JSRegExp::kNone;
565 5 : if (multiline) flags = JSRegExp::kMultiline;
566 5 : if (unicode) flags = JSRegExp::kUnicode;
567 5 : if (!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), zone,
568 5 : &reader, flags, &compile_data))
569 : return nullptr;
570 : Handle<String> pattern = isolate->factory()
571 : ->NewStringFromUtf8(CStrVector(input))
572 10 : .ToHandleChecked();
573 : Handle<String> sample_subject =
574 10 : isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
575 : RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern,
576 5 : sample_subject, is_one_byte);
577 5 : return compile_data.node;
578 : }
579 :
580 :
581 5 : static void Execute(const char* input, bool multiline, bool unicode,
582 : bool is_one_byte, bool dot_output = false) {
583 5 : v8::HandleScope scope(CcTest::isolate());
584 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
585 5 : RegExpNode* node = Compile(input, multiline, unicode, is_one_byte, &zone);
586 5 : USE(node);
587 : #ifdef DEBUG
588 : if (dot_output) {
589 : RegExpEngine::DotPrint(input, node, false);
590 : }
591 : #endif // DEBUG
592 5 : }
593 :
594 :
595 : class TestConfig {
596 : public:
597 : typedef int Key;
598 : typedef int Value;
599 : static const int kNoKey;
600 : static int NoValue() { return 0; }
601 : static inline int Compare(int a, int b) {
602 4448805 : if (a < b)
603 : return -1;
604 2208405 : else if (a > b)
605 : return 1;
606 : else
607 : return 0;
608 : }
609 : };
610 :
611 :
612 : const int TestConfig::kNoKey = 0;
613 :
614 :
615 : static unsigned PseudoRandom(int i, int j) {
616 5940 : return ~(~((i * 781) ^ (j * 329)));
617 : }
618 :
619 :
620 28342 : TEST(SplayTreeSimple) {
621 : static const unsigned kLimit = 1000;
622 5 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
623 : ZoneSplayTree<TestConfig> tree(&zone);
624 : bool seen[kLimit];
625 5 : for (unsigned i = 0; i < kLimit; i++) seen[i] = false;
626 : #define CHECK_MAPS_EQUAL() do { \
627 : for (unsigned k = 0; k < kLimit; k++) \
628 : CHECK_EQ(seen[k], tree.Find(k, &loc)); \
629 : } while (false)
630 250 : for (int i = 0; i < 50; i++) {
631 815 : for (int j = 0; j < 50; j++) {
632 1065 : int next = PseudoRandom(i, j) % kLimit;
633 1065 : if (seen[next]) {
634 : // We've already seen this one. Check the value and remove
635 : // it.
636 : ZoneSplayTree<TestConfig>::Locator loc;
637 85 : CHECK(tree.Find(next, &loc));
638 85 : CHECK_EQ(next, loc.key());
639 85 : CHECK_EQ(3 * next, loc.value());
640 85 : tree.Remove(next);
641 85 : seen[next] = false;
642 85085 : CHECK_MAPS_EQUAL();
643 : } else {
644 : // Check that it wasn't there already and then add it.
645 : ZoneSplayTree<TestConfig>::Locator loc;
646 980 : CHECK(!tree.Find(next, &loc));
647 980 : CHECK(tree.Insert(next, &loc));
648 980 : CHECK_EQ(next, loc.key());
649 980 : loc.set_value(3 * next);
650 980 : seen[next] = true;
651 980980 : CHECK_MAPS_EQUAL();
652 : }
653 1065 : int val = PseudoRandom(j, i) % kLimit;
654 1065 : if (seen[val]) {
655 : ZoneSplayTree<TestConfig>::Locator loc;
656 135 : CHECK(tree.FindGreatestLessThan(val, &loc));
657 270 : CHECK_EQ(loc.key(), val);
658 : break;
659 : }
660 1860 : val = PseudoRandom(i + j, i - j) % kLimit;
661 930 : if (seen[val]) {
662 : ZoneSplayTree<TestConfig>::Locator loc;
663 115 : CHECK(tree.FindLeastGreaterThan(val, &loc));
664 230 : CHECK_EQ(loc.key(), val);
665 : break;
666 : }
667 : }
668 5 : }
669 5 : }
670 :
671 :
672 28342 : TEST(DispatchTableConstruction) {
673 : // Initialize test data.
674 : static const int kLimit = 1000;
675 : static const int kRangeCount = 8;
676 : static const int kRangeSize = 16;
677 : uc16 ranges[kRangeCount][2 * kRangeSize];
678 45 : for (int i = 0; i < kRangeCount; i++) {
679 40 : Vector<uc16> range(ranges[i], 2 * kRangeSize);
680 1320 : for (int j = 0; j < 2 * kRangeSize; j++) {
681 3840 : range[j] = PseudoRandom(i + 25, j + 87) % kLimit;
682 : }
683 : range.Sort();
684 1280 : for (int j = 1; j < 2 * kRangeSize; j++) {
685 3720 : CHECK(range[j-1] <= range[j]);
686 : }
687 : }
688 : // Enter test data into dispatch table.
689 5 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
690 : DispatchTable table(&zone);
691 45 : for (int i = 0; i < kRangeCount; i++) {
692 40 : uc16* range = ranges[i];
693 680 : for (int j = 0; j < 2 * kRangeSize; j += 2)
694 640 : table.AddRange(CharacterRange::Range(range[j], range[j + 1]), i, &zone);
695 : }
696 : // Check that the table looks as we would expect
697 5000 : for (int p = 0; p < kLimit; p++) {
698 5000 : OutSet* outs = table.Get(p);
699 45000 : for (int j = 0; j < kRangeCount; j++) {
700 40000 : uc16* range = ranges[j];
701 : bool is_on = false;
702 518600 : for (int k = 0; !is_on && (k < 2 * kRangeSize); k += 2)
703 478600 : is_on = (range[k] <= p && p <= range[k + 1]);
704 40000 : CHECK_EQ(is_on, outs->Get(j));
705 : }
706 5 : }
707 5 : }
708 :
709 :
710 : // Test of debug-only syntax.
711 : #ifdef DEBUG
712 :
713 : TEST(ParsePossessiveRepetition) {
714 : bool old_flag_value = FLAG_regexp_possessive_quantifier;
715 :
716 : // Enable possessive quantifier syntax.
717 : FLAG_regexp_possessive_quantifier = true;
718 :
719 : CheckParseEq("a*+", "(# 0 - p 'a')");
720 : CheckParseEq("a++", "(# 1 - p 'a')");
721 : CheckParseEq("a?+", "(# 0 1 p 'a')");
722 : CheckParseEq("a{10,20}+", "(# 10 20 p 'a')");
723 : CheckParseEq("za{10,20}+b", "(: 'z' (# 10 20 p 'a') 'b')");
724 :
725 : // Disable possessive quantifier syntax.
726 : FLAG_regexp_possessive_quantifier = false;
727 :
728 : CHECK_PARSE_ERROR("a*+");
729 : CHECK_PARSE_ERROR("a++");
730 : CHECK_PARSE_ERROR("a?+");
731 : CHECK_PARSE_ERROR("a{10,20}+");
732 : CHECK_PARSE_ERROR("a{10,20}+b");
733 :
734 : FLAG_regexp_possessive_quantifier = old_flag_value;
735 : }
736 :
737 : #endif
738 :
739 : // Tests of interpreter.
740 :
741 :
742 : #ifndef V8_INTERPRETED_REGEXP
743 :
744 : #if V8_TARGET_ARCH_IA32
745 : typedef RegExpMacroAssemblerIA32 ArchRegExpMacroAssembler;
746 : #elif V8_TARGET_ARCH_X64
747 : typedef RegExpMacroAssemblerX64 ArchRegExpMacroAssembler;
748 : #elif V8_TARGET_ARCH_ARM
749 : typedef RegExpMacroAssemblerARM ArchRegExpMacroAssembler;
750 : #elif V8_TARGET_ARCH_ARM64
751 : typedef RegExpMacroAssemblerARM64 ArchRegExpMacroAssembler;
752 : #elif V8_TARGET_ARCH_S390
753 : typedef RegExpMacroAssemblerS390 ArchRegExpMacroAssembler;
754 : #elif V8_TARGET_ARCH_PPC
755 : typedef RegExpMacroAssemblerPPC ArchRegExpMacroAssembler;
756 : #elif V8_TARGET_ARCH_MIPS
757 : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
758 : #elif V8_TARGET_ARCH_MIPS64
759 : typedef RegExpMacroAssemblerMIPS ArchRegExpMacroAssembler;
760 : #elif V8_TARGET_ARCH_X87
761 : typedef RegExpMacroAssemblerX87 ArchRegExpMacroAssembler;
762 : #endif
763 :
764 : class ContextInitializer {
765 : public:
766 55 : ContextInitializer()
767 : : scope_(CcTest::isolate()),
768 110 : env_(v8::Context::New(CcTest::isolate())) {
769 55 : env_->Enter();
770 55 : }
771 55 : ~ContextInitializer() {
772 55 : env_->Exit();
773 : }
774 : private:
775 : v8::HandleScope scope_;
776 : v8::Local<v8::Context> env_;
777 : };
778 :
779 70 : static ArchRegExpMacroAssembler::Result Execute(Code code, String input,
780 : int start_offset,
781 : Address input_start,
782 : Address input_end,
783 : int* captures) {
784 : return NativeRegExpMacroAssembler::Execute(
785 : code, input, start_offset, reinterpret_cast<byte*>(input_start),
786 70 : reinterpret_cast<byte*>(input_end), captures, 0, CcTest::i_isolate());
787 : }
788 :
789 28342 : TEST(MacroAssemblerNativeSuccess) {
790 5 : v8::V8::Initialize();
791 5 : ContextInitializer initializer;
792 : Isolate* isolate = CcTest::i_isolate();
793 : Factory* factory = isolate->factory();
794 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
795 :
796 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
797 10 : 4);
798 :
799 5 : m.Succeed();
800 :
801 5 : Handle<String> source = factory->NewStringFromStaticChars("");
802 5 : Handle<Object> code_object = m.GetCode(source);
803 5 : Handle<Code> code = Handle<Code>::cast(code_object);
804 :
805 5 : int captures[4] = {42, 37, 87, 117};
806 5 : Handle<String> input = factory->NewStringFromStaticChars("foofoo");
807 5 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
808 : Address start_adr = seq_input->GetCharsAddress();
809 :
810 : NativeRegExpMacroAssembler::Result result =
811 : Execute(*code,
812 : *input,
813 : 0,
814 : start_adr,
815 : start_adr + seq_input->length(),
816 10 : captures);
817 :
818 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
819 5 : CHECK_EQ(-1, captures[0]);
820 5 : CHECK_EQ(-1, captures[1]);
821 5 : CHECK_EQ(-1, captures[2]);
822 5 : CHECK_EQ(-1, captures[3]);
823 5 : }
824 :
825 :
826 28342 : TEST(MacroAssemblerNativeSimple) {
827 5 : v8::V8::Initialize();
828 5 : ContextInitializer initializer;
829 : Isolate* isolate = CcTest::i_isolate();
830 : Factory* factory = isolate->factory();
831 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
832 :
833 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
834 10 : 4);
835 :
836 5 : Label fail, backtrack;
837 5 : m.PushBacktrack(&fail);
838 5 : m.CheckNotAtStart(0, nullptr);
839 5 : m.LoadCurrentCharacter(2, nullptr);
840 5 : m.CheckNotCharacter('o', nullptr);
841 5 : m.LoadCurrentCharacter(1, nullptr, false);
842 5 : m.CheckNotCharacter('o', nullptr);
843 5 : m.LoadCurrentCharacter(0, nullptr, false);
844 5 : m.CheckNotCharacter('f', nullptr);
845 5 : m.WriteCurrentPositionToRegister(0, 0);
846 5 : m.WriteCurrentPositionToRegister(1, 3);
847 5 : m.AdvanceCurrentPosition(3);
848 5 : m.PushBacktrack(&backtrack);
849 5 : m.Succeed();
850 5 : m.Bind(&backtrack);
851 5 : m.Backtrack();
852 5 : m.Bind(&fail);
853 5 : m.Fail();
854 :
855 5 : Handle<String> source = factory->NewStringFromStaticChars("^foo");
856 5 : Handle<Object> code_object = m.GetCode(source);
857 5 : Handle<Code> code = Handle<Code>::cast(code_object);
858 :
859 5 : int captures[4] = {42, 37, 87, 117};
860 5 : Handle<String> input = factory->NewStringFromStaticChars("foofoo");
861 5 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
862 : Address start_adr = seq_input->GetCharsAddress();
863 :
864 : NativeRegExpMacroAssembler::Result result =
865 : Execute(*code,
866 : *input,
867 : 0,
868 : start_adr,
869 : start_adr + input->length(),
870 10 : captures);
871 :
872 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
873 5 : CHECK_EQ(0, captures[0]);
874 5 : CHECK_EQ(3, captures[1]);
875 5 : CHECK_EQ(-1, captures[2]);
876 5 : CHECK_EQ(-1, captures[3]);
877 :
878 5 : input = factory->NewStringFromStaticChars("barbarbar");
879 5 : seq_input = Handle<SeqOneByteString>::cast(input);
880 : start_adr = seq_input->GetCharsAddress();
881 :
882 : result = Execute(*code,
883 : *input,
884 : 0,
885 : start_adr,
886 : start_adr + input->length(),
887 10 : captures);
888 :
889 5 : CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
890 5 : }
891 :
892 :
893 28342 : TEST(MacroAssemblerNativeSimpleUC16) {
894 5 : v8::V8::Initialize();
895 5 : ContextInitializer initializer;
896 : Isolate* isolate = CcTest::i_isolate();
897 : Factory* factory = isolate->factory();
898 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
899 :
900 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
901 10 : 4);
902 :
903 5 : Label fail, backtrack;
904 5 : m.PushBacktrack(&fail);
905 5 : m.CheckNotAtStart(0, nullptr);
906 5 : m.LoadCurrentCharacter(2, nullptr);
907 5 : m.CheckNotCharacter('o', nullptr);
908 5 : m.LoadCurrentCharacter(1, nullptr, false);
909 5 : m.CheckNotCharacter('o', nullptr);
910 5 : m.LoadCurrentCharacter(0, nullptr, false);
911 5 : m.CheckNotCharacter('f', nullptr);
912 5 : m.WriteCurrentPositionToRegister(0, 0);
913 5 : m.WriteCurrentPositionToRegister(1, 3);
914 5 : m.AdvanceCurrentPosition(3);
915 5 : m.PushBacktrack(&backtrack);
916 5 : m.Succeed();
917 5 : m.Bind(&backtrack);
918 5 : m.Backtrack();
919 5 : m.Bind(&fail);
920 5 : m.Fail();
921 :
922 5 : Handle<String> source = factory->NewStringFromStaticChars("^foo");
923 5 : Handle<Object> code_object = m.GetCode(source);
924 5 : Handle<Code> code = Handle<Code>::cast(code_object);
925 :
926 5 : int captures[4] = {42, 37, 87, 117};
927 : const uc16 input_data[6] = {'f', 'o', 'o', 'f', 'o',
928 5 : static_cast<uc16>(0x2603)};
929 : Handle<String> input = factory->NewStringFromTwoByte(
930 10 : Vector<const uc16>(input_data, 6)).ToHandleChecked();
931 5 : Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
932 : Address start_adr = seq_input->GetCharsAddress();
933 :
934 : NativeRegExpMacroAssembler::Result result =
935 : Execute(*code,
936 : *input,
937 : 0,
938 : start_adr,
939 : start_adr + input->length(),
940 10 : captures);
941 :
942 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
943 5 : CHECK_EQ(0, captures[0]);
944 5 : CHECK_EQ(3, captures[1]);
945 5 : CHECK_EQ(-1, captures[2]);
946 5 : CHECK_EQ(-1, captures[3]);
947 :
948 : const uc16 input_data2[9] = {'b', 'a', 'r', 'b', 'a', 'r', 'b', 'a',
949 5 : static_cast<uc16>(0x2603)};
950 : input = factory->NewStringFromTwoByte(
951 10 : Vector<const uc16>(input_data2, 9)).ToHandleChecked();
952 5 : seq_input = Handle<SeqTwoByteString>::cast(input);
953 : start_adr = seq_input->GetCharsAddress();
954 :
955 : result = Execute(*code,
956 : *input,
957 : 0,
958 : start_adr,
959 5 : start_adr + input->length() * 2,
960 10 : captures);
961 :
962 5 : CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
963 5 : }
964 :
965 :
966 28342 : TEST(MacroAssemblerNativeBacktrack) {
967 5 : v8::V8::Initialize();
968 5 : ContextInitializer initializer;
969 : Isolate* isolate = CcTest::i_isolate();
970 : Factory* factory = isolate->factory();
971 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
972 :
973 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
974 10 : 0);
975 :
976 5 : Label fail;
977 5 : Label backtrack;
978 5 : m.LoadCurrentCharacter(10, &fail);
979 5 : m.Succeed();
980 5 : m.Bind(&fail);
981 5 : m.PushBacktrack(&backtrack);
982 5 : m.LoadCurrentCharacter(10, nullptr);
983 5 : m.Succeed();
984 5 : m.Bind(&backtrack);
985 5 : m.Fail();
986 :
987 5 : Handle<String> source = factory->NewStringFromStaticChars("..........");
988 5 : Handle<Object> code_object = m.GetCode(source);
989 5 : Handle<Code> code = Handle<Code>::cast(code_object);
990 :
991 5 : Handle<String> input = factory->NewStringFromStaticChars("foofoo");
992 5 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
993 : Address start_adr = seq_input->GetCharsAddress();
994 :
995 : NativeRegExpMacroAssembler::Result result = Execute(
996 10 : *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
997 :
998 5 : CHECK_EQ(NativeRegExpMacroAssembler::FAILURE, result);
999 5 : }
1000 :
1001 :
1002 28342 : TEST(MacroAssemblerNativeBackReferenceLATIN1) {
1003 5 : v8::V8::Initialize();
1004 5 : ContextInitializer initializer;
1005 : Isolate* isolate = CcTest::i_isolate();
1006 : Factory* factory = isolate->factory();
1007 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1008 :
1009 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1010 10 : 4);
1011 :
1012 5 : m.WriteCurrentPositionToRegister(0, 0);
1013 5 : m.AdvanceCurrentPosition(2);
1014 5 : m.WriteCurrentPositionToRegister(1, 0);
1015 5 : Label nomatch;
1016 5 : m.CheckNotBackReference(0, false, &nomatch);
1017 5 : m.Fail();
1018 5 : m.Bind(&nomatch);
1019 5 : m.AdvanceCurrentPosition(2);
1020 5 : Label missing_match;
1021 5 : m.CheckNotBackReference(0, false, &missing_match);
1022 5 : m.WriteCurrentPositionToRegister(2, 0);
1023 5 : m.Succeed();
1024 5 : m.Bind(&missing_match);
1025 5 : m.Fail();
1026 :
1027 5 : Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
1028 5 : Handle<Object> code_object = m.GetCode(source);
1029 5 : Handle<Code> code = Handle<Code>::cast(code_object);
1030 :
1031 5 : Handle<String> input = factory->NewStringFromStaticChars("fooofo");
1032 5 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1033 : Address start_adr = seq_input->GetCharsAddress();
1034 :
1035 : int output[4];
1036 : NativeRegExpMacroAssembler::Result result =
1037 : Execute(*code,
1038 : *input,
1039 : 0,
1040 : start_adr,
1041 : start_adr + input->length(),
1042 10 : output);
1043 :
1044 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1045 5 : CHECK_EQ(0, output[0]);
1046 5 : CHECK_EQ(2, output[1]);
1047 5 : CHECK_EQ(6, output[2]);
1048 5 : CHECK_EQ(-1, output[3]);
1049 5 : }
1050 :
1051 :
1052 28342 : TEST(MacroAssemblerNativeBackReferenceUC16) {
1053 5 : v8::V8::Initialize();
1054 5 : ContextInitializer initializer;
1055 : Isolate* isolate = CcTest::i_isolate();
1056 : Factory* factory = isolate->factory();
1057 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1058 :
1059 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::UC16,
1060 10 : 4);
1061 :
1062 5 : m.WriteCurrentPositionToRegister(0, 0);
1063 5 : m.AdvanceCurrentPosition(2);
1064 5 : m.WriteCurrentPositionToRegister(1, 0);
1065 5 : Label nomatch;
1066 5 : m.CheckNotBackReference(0, false, &nomatch);
1067 5 : m.Fail();
1068 5 : m.Bind(&nomatch);
1069 5 : m.AdvanceCurrentPosition(2);
1070 5 : Label missing_match;
1071 5 : m.CheckNotBackReference(0, false, &missing_match);
1072 5 : m.WriteCurrentPositionToRegister(2, 0);
1073 5 : m.Succeed();
1074 5 : m.Bind(&missing_match);
1075 5 : m.Fail();
1076 :
1077 5 : Handle<String> source = factory->NewStringFromStaticChars("^(..)..\1");
1078 5 : Handle<Object> code_object = m.GetCode(source);
1079 5 : Handle<Code> code = Handle<Code>::cast(code_object);
1080 :
1081 5 : const uc16 input_data[6] = {'f', 0x2028, 'o', 'o', 'f', 0x2028};
1082 : Handle<String> input = factory->NewStringFromTwoByte(
1083 10 : Vector<const uc16>(input_data, 6)).ToHandleChecked();
1084 5 : Handle<SeqTwoByteString> seq_input = Handle<SeqTwoByteString>::cast(input);
1085 : Address start_adr = seq_input->GetCharsAddress();
1086 :
1087 : int output[4];
1088 : NativeRegExpMacroAssembler::Result result =
1089 : Execute(*code,
1090 : *input,
1091 : 0,
1092 : start_adr,
1093 5 : start_adr + input->length() * 2,
1094 10 : output);
1095 :
1096 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1097 5 : CHECK_EQ(0, output[0]);
1098 5 : CHECK_EQ(2, output[1]);
1099 5 : CHECK_EQ(6, output[2]);
1100 5 : CHECK_EQ(-1, output[3]);
1101 5 : }
1102 :
1103 :
1104 :
1105 28342 : TEST(MacroAssemblernativeAtStart) {
1106 5 : v8::V8::Initialize();
1107 5 : ContextInitializer initializer;
1108 : Isolate* isolate = CcTest::i_isolate();
1109 : Factory* factory = isolate->factory();
1110 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1111 :
1112 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1113 10 : 0);
1114 :
1115 5 : Label not_at_start, newline, fail;
1116 5 : m.CheckNotAtStart(0, ¬_at_start);
1117 : // Check that prevchar = '\n' and current = 'f'.
1118 5 : m.CheckCharacter('\n', &newline);
1119 5 : m.Bind(&fail);
1120 5 : m.Fail();
1121 5 : m.Bind(&newline);
1122 5 : m.LoadCurrentCharacter(0, &fail);
1123 5 : m.CheckNotCharacter('f', &fail);
1124 5 : m.Succeed();
1125 :
1126 5 : m.Bind(¬_at_start);
1127 : // Check that prevchar = 'o' and current = 'b'.
1128 5 : Label prevo;
1129 5 : m.CheckCharacter('o', &prevo);
1130 5 : m.Fail();
1131 5 : m.Bind(&prevo);
1132 5 : m.LoadCurrentCharacter(0, &fail);
1133 5 : m.CheckNotCharacter('b', &fail);
1134 5 : m.Succeed();
1135 :
1136 5 : Handle<String> source = factory->NewStringFromStaticChars("(^f|ob)");
1137 5 : Handle<Object> code_object = m.GetCode(source);
1138 5 : Handle<Code> code = Handle<Code>::cast(code_object);
1139 :
1140 5 : Handle<String> input = factory->NewStringFromStaticChars("foobar");
1141 5 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1142 : Address start_adr = seq_input->GetCharsAddress();
1143 :
1144 : NativeRegExpMacroAssembler::Result result = Execute(
1145 10 : *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
1146 :
1147 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1148 :
1149 : result = Execute(*code, *input, 3, start_adr + 3, start_adr + input->length(),
1150 10 : nullptr);
1151 :
1152 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1153 5 : }
1154 :
1155 :
1156 28342 : TEST(MacroAssemblerNativeBackRefNoCase) {
1157 5 : v8::V8::Initialize();
1158 5 : ContextInitializer initializer;
1159 : Isolate* isolate = CcTest::i_isolate();
1160 : Factory* factory = isolate->factory();
1161 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1162 :
1163 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1164 10 : 4);
1165 :
1166 5 : Label fail, succ;
1167 :
1168 5 : m.WriteCurrentPositionToRegister(0, 0);
1169 5 : m.WriteCurrentPositionToRegister(2, 0);
1170 5 : m.AdvanceCurrentPosition(3);
1171 5 : m.WriteCurrentPositionToRegister(3, 0);
1172 5 : m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "AbC".
1173 5 : m.CheckNotBackReferenceIgnoreCase(2, false, false, &fail); // Match "ABC".
1174 5 : Label expected_fail;
1175 5 : m.CheckNotBackReferenceIgnoreCase(2, false, false, &expected_fail);
1176 5 : m.Bind(&fail);
1177 5 : m.Fail();
1178 :
1179 5 : m.Bind(&expected_fail);
1180 5 : m.AdvanceCurrentPosition(3); // Skip "xYz"
1181 5 : m.CheckNotBackReferenceIgnoreCase(2, false, false, &succ);
1182 5 : m.Fail();
1183 :
1184 5 : m.Bind(&succ);
1185 5 : m.WriteCurrentPositionToRegister(1, 0);
1186 5 : m.Succeed();
1187 :
1188 : Handle<String> source =
1189 5 : factory->NewStringFromStaticChars("^(abc)\1\1(?!\1)...(?!\1)");
1190 5 : Handle<Object> code_object = m.GetCode(source);
1191 5 : Handle<Code> code = Handle<Code>::cast(code_object);
1192 :
1193 5 : Handle<String> input = factory->NewStringFromStaticChars("aBcAbCABCxYzab");
1194 5 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1195 : Address start_adr = seq_input->GetCharsAddress();
1196 :
1197 : int output[4];
1198 : NativeRegExpMacroAssembler::Result result =
1199 : Execute(*code,
1200 : *input,
1201 : 0,
1202 : start_adr,
1203 : start_adr + input->length(),
1204 10 : output);
1205 :
1206 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1207 5 : CHECK_EQ(0, output[0]);
1208 5 : CHECK_EQ(12, output[1]);
1209 5 : CHECK_EQ(0, output[2]);
1210 5 : CHECK_EQ(3, output[3]);
1211 5 : }
1212 :
1213 :
1214 :
1215 28342 : TEST(MacroAssemblerNativeRegisters) {
1216 5 : v8::V8::Initialize();
1217 5 : ContextInitializer initializer;
1218 : Isolate* isolate = CcTest::i_isolate();
1219 : Factory* factory = isolate->factory();
1220 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1221 :
1222 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1223 10 : 6);
1224 :
1225 5 : uc16 foo_chars[3] = {'f', 'o', 'o'};
1226 : Vector<const uc16> foo(foo_chars, 3);
1227 :
1228 : enum registers { out1, out2, out3, out4, out5, out6, sp, loop_cnt };
1229 5 : Label fail;
1230 5 : Label backtrack;
1231 5 : m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
1232 5 : m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1233 5 : m.PushBacktrack(&backtrack);
1234 5 : m.WriteStackPointerToRegister(sp);
1235 : // Fill stack and registers
1236 5 : m.AdvanceCurrentPosition(2);
1237 5 : m.WriteCurrentPositionToRegister(out1, 0);
1238 5 : m.PushRegister(out1, RegExpMacroAssembler::kNoStackLimitCheck);
1239 5 : m.PushBacktrack(&fail);
1240 : // Drop backtrack stack frames.
1241 5 : m.ReadStackPointerFromRegister(sp);
1242 : // And take the first backtrack (to &backtrack)
1243 5 : m.Backtrack();
1244 :
1245 5 : m.PushCurrentPosition();
1246 5 : m.AdvanceCurrentPosition(2);
1247 5 : m.PopCurrentPosition();
1248 :
1249 5 : m.Bind(&backtrack);
1250 5 : m.PopRegister(out1);
1251 5 : m.ReadCurrentPositionFromRegister(out1);
1252 5 : m.AdvanceCurrentPosition(3);
1253 5 : m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
1254 :
1255 5 : Label loop;
1256 5 : m.SetRegister(loop_cnt, 0); // loop counter
1257 5 : m.Bind(&loop);
1258 5 : m.AdvanceRegister(loop_cnt, 1);
1259 5 : m.AdvanceCurrentPosition(1);
1260 5 : m.IfRegisterLT(loop_cnt, 3, &loop);
1261 5 : m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
1262 :
1263 5 : Label loop2;
1264 5 : m.SetRegister(loop_cnt, 2); // loop counter
1265 5 : m.Bind(&loop2);
1266 5 : m.AdvanceRegister(loop_cnt, -1);
1267 5 : m.AdvanceCurrentPosition(1);
1268 5 : m.IfRegisterGE(loop_cnt, 0, &loop2);
1269 5 : m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
1270 :
1271 5 : Label loop3;
1272 5 : Label exit_loop3;
1273 5 : m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1274 5 : m.PushRegister(out4, RegExpMacroAssembler::kNoStackLimitCheck);
1275 5 : m.ReadCurrentPositionFromRegister(out3);
1276 5 : m.Bind(&loop3);
1277 5 : m.AdvanceCurrentPosition(1);
1278 5 : m.CheckGreedyLoop(&exit_loop3);
1279 5 : m.GoTo(&loop3);
1280 5 : m.Bind(&exit_loop3);
1281 5 : m.PopCurrentPosition();
1282 5 : m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9,-1]
1283 :
1284 5 : m.Succeed();
1285 :
1286 5 : m.Bind(&fail);
1287 5 : m.Fail();
1288 :
1289 5 : Handle<String> source = factory->NewStringFromStaticChars("<loop test>");
1290 5 : Handle<Object> code_object = m.GetCode(source);
1291 5 : Handle<Code> code = Handle<Code>::cast(code_object);
1292 :
1293 : // String long enough for test (content doesn't matter).
1294 5 : Handle<String> input = factory->NewStringFromStaticChars("foofoofoofoofoo");
1295 5 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1296 : Address start_adr = seq_input->GetCharsAddress();
1297 :
1298 : int output[6];
1299 : NativeRegExpMacroAssembler::Result result =
1300 : Execute(*code,
1301 : *input,
1302 : 0,
1303 : start_adr,
1304 : start_adr + input->length(),
1305 10 : output);
1306 :
1307 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1308 5 : CHECK_EQ(0, output[0]);
1309 5 : CHECK_EQ(3, output[1]);
1310 5 : CHECK_EQ(6, output[2]);
1311 5 : CHECK_EQ(9, output[3]);
1312 5 : CHECK_EQ(9, output[4]);
1313 5 : CHECK_EQ(-1, output[5]);
1314 5 : }
1315 :
1316 :
1317 28342 : TEST(MacroAssemblerStackOverflow) {
1318 5 : v8::V8::Initialize();
1319 5 : ContextInitializer initializer;
1320 : Isolate* isolate = CcTest::i_isolate();
1321 : Factory* factory = isolate->factory();
1322 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1323 :
1324 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1325 10 : 0);
1326 :
1327 5 : Label loop;
1328 5 : m.Bind(&loop);
1329 5 : m.PushBacktrack(&loop);
1330 5 : m.GoTo(&loop);
1331 :
1332 : Handle<String> source =
1333 5 : factory->NewStringFromStaticChars("<stack overflow test>");
1334 5 : Handle<Object> code_object = m.GetCode(source);
1335 5 : Handle<Code> code = Handle<Code>::cast(code_object);
1336 :
1337 : // String long enough for test (content doesn't matter).
1338 5 : Handle<String> input = factory->NewStringFromStaticChars("dummy");
1339 5 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1340 : Address start_adr = seq_input->GetCharsAddress();
1341 :
1342 : NativeRegExpMacroAssembler::Result result = Execute(
1343 10 : *code, *input, 0, start_adr, start_adr + input->length(), nullptr);
1344 :
1345 5 : CHECK_EQ(NativeRegExpMacroAssembler::EXCEPTION, result);
1346 5 : CHECK(isolate->has_pending_exception());
1347 5 : isolate->clear_pending_exception();
1348 5 : }
1349 :
1350 :
1351 28342 : TEST(MacroAssemblerNativeLotsOfRegisters) {
1352 5 : v8::V8::Initialize();
1353 5 : ContextInitializer initializer;
1354 : Isolate* isolate = CcTest::i_isolate();
1355 : Factory* factory = isolate->factory();
1356 10 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1357 :
1358 : ArchRegExpMacroAssembler m(isolate, &zone, NativeRegExpMacroAssembler::LATIN1,
1359 10 : 2);
1360 :
1361 : // At least 2048, to ensure the allocated space for registers
1362 : // span one full page.
1363 : const int large_number = 8000;
1364 5 : m.WriteCurrentPositionToRegister(large_number, 42);
1365 5 : m.WriteCurrentPositionToRegister(0, 0);
1366 5 : m.WriteCurrentPositionToRegister(1, 1);
1367 5 : Label done;
1368 5 : m.CheckNotBackReference(0, false, &done); // Performs a system-stack push.
1369 5 : m.Bind(&done);
1370 5 : m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
1371 5 : m.PopRegister(1);
1372 5 : m.Succeed();
1373 :
1374 : Handle<String> source =
1375 5 : factory->NewStringFromStaticChars("<huge register space test>");
1376 5 : Handle<Object> code_object = m.GetCode(source);
1377 5 : Handle<Code> code = Handle<Code>::cast(code_object);
1378 :
1379 : // String long enough for test (content doesn't matter).
1380 5 : Handle<String> input = factory->NewStringFromStaticChars("sample text");
1381 5 : Handle<SeqOneByteString> seq_input = Handle<SeqOneByteString>::cast(input);
1382 : Address start_adr = seq_input->GetCharsAddress();
1383 :
1384 : int captures[2];
1385 : NativeRegExpMacroAssembler::Result result =
1386 : Execute(*code,
1387 : *input,
1388 : 0,
1389 : start_adr,
1390 : start_adr + input->length(),
1391 10 : captures);
1392 :
1393 5 : CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
1394 5 : CHECK_EQ(0, captures[0]);
1395 5 : CHECK_EQ(42, captures[1]);
1396 :
1397 5 : isolate->clear_pending_exception();
1398 5 : }
1399 :
1400 : #else // V8_INTERPRETED_REGEXP
1401 :
1402 : TEST(MacroAssembler) {
1403 : byte codes[1024];
1404 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1405 : RegExpMacroAssemblerIrregexp m(CcTest::i_isolate(), Vector<byte>(codes, 1024),
1406 : &zone);
1407 : // ^f(o)o.
1408 : Label start, fail, backtrack;
1409 :
1410 : m.SetRegister(4, 42);
1411 : m.PushRegister(4, RegExpMacroAssembler::kNoStackLimitCheck);
1412 : m.AdvanceRegister(4, 42);
1413 : m.GoTo(&start);
1414 : m.Fail();
1415 : m.Bind(&start);
1416 : m.PushBacktrack(&fail);
1417 : m.CheckNotAtStart(0, nullptr);
1418 : m.LoadCurrentCharacter(0, nullptr);
1419 : m.CheckNotCharacter('f', nullptr);
1420 : m.LoadCurrentCharacter(1, nullptr);
1421 : m.CheckNotCharacter('o', nullptr);
1422 : m.LoadCurrentCharacter(2, nullptr);
1423 : m.CheckNotCharacter('o', nullptr);
1424 : m.WriteCurrentPositionToRegister(0, 0);
1425 : m.WriteCurrentPositionToRegister(1, 3);
1426 : m.WriteCurrentPositionToRegister(2, 1);
1427 : m.WriteCurrentPositionToRegister(3, 2);
1428 : m.AdvanceCurrentPosition(3);
1429 : m.PushBacktrack(&backtrack);
1430 : m.Succeed();
1431 : m.Bind(&backtrack);
1432 : m.ClearRegisters(2, 3);
1433 : m.Backtrack();
1434 : m.Bind(&fail);
1435 : m.PopRegister(0);
1436 : m.Fail();
1437 :
1438 : Isolate* isolate = CcTest::i_isolate();
1439 : Factory* factory = isolate->factory();
1440 : HandleScope scope(isolate);
1441 :
1442 : Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
1443 : Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
1444 : int captures[5];
1445 :
1446 : const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
1447 : Handle<String> f1_16 = factory->NewStringFromTwoByte(
1448 : Vector<const uc16>(str1, 6)).ToHandleChecked();
1449 :
1450 : CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
1451 : CHECK_EQ(0, captures[0]);
1452 : CHECK_EQ(3, captures[1]);
1453 : CHECK_EQ(1, captures[2]);
1454 : CHECK_EQ(2, captures[3]);
1455 : CHECK_EQ(84, captures[4]);
1456 :
1457 : const uc16 str2[] = {'b', 'a', 'r', 'f', 'o', 'o'};
1458 : Handle<String> f2_16 = factory->NewStringFromTwoByte(
1459 : Vector<const uc16>(str2, 6)).ToHandleChecked();
1460 :
1461 : CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
1462 : CHECK_EQ(42, captures[0]);
1463 : }
1464 :
1465 : #endif // V8_INTERPRETED_REGEXP
1466 :
1467 :
1468 28342 : TEST(AddInverseToTable) {
1469 : static const int kLimit = 1000;
1470 : static const int kRangeCount = 16;
1471 55 : for (int t = 0; t < 10; t++) {
1472 50 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1473 : ZoneList<CharacterRange>* ranges =
1474 50 : new(&zone) ZoneList<CharacterRange>(kRangeCount, &zone);
1475 850 : for (int i = 0; i < kRangeCount; i++) {
1476 1600 : int from = PseudoRandom(t + 87, i + 25) % kLimit;
1477 1600 : int to = from + (PseudoRandom(i + 87, t + 25) % (kLimit / 20));
1478 800 : if (to > kLimit) to = kLimit;
1479 800 : ranges->Add(CharacterRange::Range(from, to), &zone);
1480 : }
1481 : DispatchTable table(&zone);
1482 : DispatchTableConstructor cons(&table, false, &zone);
1483 : cons.set_choice_index(0);
1484 50 : cons.AddInverse(ranges);
1485 50050 : for (int i = 0; i < kLimit; i++) {
1486 : bool is_on = false;
1487 675615 : for (int j = 0; !is_on && j < kRangeCount; j++)
1488 : is_on = ranges->at(j).Contains(i);
1489 50000 : OutSet* set = table.Get(i);
1490 50000 : CHECK_EQ(is_on, set->Get(0) == false);
1491 : }
1492 50 : }
1493 5 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1494 : ZoneList<CharacterRange>* ranges =
1495 5 : new(&zone) ZoneList<CharacterRange>(1, &zone);
1496 5 : ranges->Add(CharacterRange::Range(0xFFF0, 0xFFFE), &zone);
1497 : DispatchTable table(&zone);
1498 : DispatchTableConstructor cons(&table, false, &zone);
1499 : cons.set_choice_index(0);
1500 5 : cons.AddInverse(ranges);
1501 5 : CHECK(!table.Get(0xFFFE)->Get(0));
1502 10 : CHECK(table.Get(0xFFFF)->Get(0));
1503 5 : }
1504 :
1505 :
1506 10485380 : static uc32 canonicalize(uc32 c) {
1507 : unibrow::uchar canon[unibrow::Ecma262Canonicalize::kMaxWidth];
1508 10485380 : int count = unibrow::Ecma262Canonicalize::Convert(c, '\0', canon, nullptr);
1509 10485380 : if (count == 0) {
1510 : return c;
1511 : } else {
1512 4995 : CHECK_EQ(1, count);
1513 4995 : return canon[0];
1514 : }
1515 : }
1516 :
1517 :
1518 28342 : TEST(LatinCanonicalize) {
1519 : unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1520 130 : for (unibrow::uchar lower = 'a'; lower <= 'z'; lower++) {
1521 130 : unibrow::uchar upper = lower + ('A' - 'a');
1522 130 : CHECK_EQ(canonicalize(lower), canonicalize(upper));
1523 : unibrow::uchar uncanon[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1524 130 : int length = un_canonicalize.get(lower, '\0', uncanon);
1525 130 : CHECK_EQ(2, length);
1526 130 : CHECK_EQ(upper, uncanon[0]);
1527 130 : CHECK_EQ(lower, uncanon[1]);
1528 : }
1529 10485120 : for (uc32 c = 128; c < (1 << 21); c++)
1530 10485120 : CHECK_GE(canonicalize(c), 128);
1531 : #ifndef V8_INTL_SUPPORT
1532 : unibrow::Mapping<unibrow::ToUppercase> to_upper;
1533 : // Canonicalization is only defined for the Basic Multilingual Plane.
1534 : for (uc32 c = 0; c < (1 << 16); c++) {
1535 : unibrow::uchar upper[unibrow::ToUppercase::kMaxWidth];
1536 : int length = to_upper.get(c, '\0', upper);
1537 : if (length == 0) {
1538 : length = 1;
1539 : upper[0] = c;
1540 : }
1541 : uc32 u = upper[0];
1542 : if (length > 1 || (c >= 128 && u < 128))
1543 : u = c;
1544 : CHECK_EQ(u, canonicalize(c));
1545 : }
1546 : #endif
1547 5 : }
1548 :
1549 :
1550 324330 : static uc32 CanonRangeEnd(uc32 c) {
1551 : unibrow::uchar canon[unibrow::CanonicalizationRange::kMaxWidth];
1552 324330 : int count = unibrow::CanonicalizationRange::Convert(c, '\0', canon, nullptr);
1553 324330 : if (count == 0) {
1554 : return c;
1555 : } else {
1556 220 : CHECK_EQ(1, count);
1557 220 : return canon[0];
1558 : }
1559 : }
1560 :
1561 :
1562 28342 : TEST(RangeCanonicalization) {
1563 : // Check that we arrive at the same result when using the basic
1564 : // range canonicalization primitives as when using immediate
1565 : // canonicalization.
1566 : unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1567 : int block_start = 0;
1568 324335 : while (block_start <= 0xFFFF) {
1569 324330 : uc32 block_end = CanonRangeEnd(block_start);
1570 324330 : unsigned block_length = block_end - block_start + 1;
1571 324330 : if (block_length > 1) {
1572 : unibrow::uchar first[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1573 220 : int first_length = un_canonicalize.get(block_start, '\0', first);
1574 3570 : for (unsigned i = 1; i < block_length; i++) {
1575 : unibrow::uchar succ[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1576 3350 : int succ_length = un_canonicalize.get(block_start + i, '\0', succ);
1577 3350 : CHECK_EQ(first_length, succ_length);
1578 6700 : for (int j = 0; j < succ_length; j++) {
1579 6700 : int calc = first[j] + i;
1580 6700 : int found = succ[j];
1581 6700 : CHECK_EQ(calc, found);
1582 : }
1583 : }
1584 : }
1585 324330 : block_start = block_start + block_length;
1586 : }
1587 5 : }
1588 :
1589 :
1590 28342 : TEST(UncanonicalizeEquivalence) {
1591 : unibrow::Mapping<unibrow::Ecma262UnCanonicalize> un_canonicalize;
1592 : unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1593 327680 : for (int i = 0; i < (1 << 16); i++) {
1594 327680 : int length = un_canonicalize.get(i, '\0', chars);
1595 347750 : for (int j = 0; j < length; j++) {
1596 : unibrow::uchar chars2[unibrow::Ecma262UnCanonicalize::kMaxWidth];
1597 20070 : int length2 = un_canonicalize.get(chars[j], '\0', chars2);
1598 20070 : CHECK_EQ(length, length2);
1599 40930 : for (int k = 0; k < length; k++)
1600 40930 : CHECK_EQ(static_cast<int>(chars[k]), static_cast<int>(chars2[k]));
1601 : }
1602 : }
1603 5 : }
1604 :
1605 :
1606 55 : static void TestRangeCaseIndependence(Isolate* isolate, CharacterRange input,
1607 : Vector<CharacterRange> expected) {
1608 55 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1609 55 : int count = expected.length();
1610 55 : ZoneList<CharacterRange>* list =
1611 55 : new(&zone) ZoneList<CharacterRange>(count, &zone);
1612 55 : list->Add(input, &zone);
1613 55 : CharacterRange::AddCaseEquivalents(isolate, &zone, list, false);
1614 55 : list->Remove(0); // Remove the input before checking results.
1615 55 : CHECK_EQ(count, list->length());
1616 55 : for (int i = 0; i < list->length(); i++) {
1617 110 : CHECK_EQ(expected[i].from(), list->at(i).from());
1618 55 : CHECK_EQ(expected[i].to(), list->at(i).to());
1619 55 : }
1620 55 : }
1621 :
1622 :
1623 55 : static void TestSimpleRangeCaseIndependence(Isolate* isolate,
1624 : CharacterRange input,
1625 : CharacterRange expected) {
1626 : EmbeddedVector<CharacterRange, 1> vector;
1627 55 : vector[0] = expected;
1628 55 : TestRangeCaseIndependence(isolate, input, vector);
1629 55 : }
1630 :
1631 :
1632 28342 : TEST(CharacterRangeCaseIndependence) {
1633 : Isolate* isolate = CcTest::i_isolate();
1634 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('a'),
1635 5 : CharacterRange::Singleton('A'));
1636 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Singleton('z'),
1637 5 : CharacterRange::Singleton('Z'));
1638 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'z'),
1639 5 : CharacterRange::Range('A', 'Z'));
1640 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('c', 'f'),
1641 5 : CharacterRange::Range('C', 'F'));
1642 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('a', 'b'),
1643 5 : CharacterRange::Range('A', 'B'));
1644 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('y', 'z'),
1645 5 : CharacterRange::Range('Y', 'Z'));
1646 : TestSimpleRangeCaseIndependence(isolate,
1647 : CharacterRange::Range('a' - 1, 'z' + 1),
1648 5 : CharacterRange::Range('A', 'Z'));
1649 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'Z'),
1650 5 : CharacterRange::Range('a', 'z'));
1651 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('C', 'F'),
1652 5 : CharacterRange::Range('c', 'f'));
1653 : TestSimpleRangeCaseIndependence(isolate,
1654 : CharacterRange::Range('A' - 1, 'Z' + 1),
1655 5 : CharacterRange::Range('a', 'z'));
1656 : // Here we need to add [l-z] to complete the case independence of
1657 : // [A-Za-z] but we expect [a-z] to be added since we always add a
1658 : // whole block at a time.
1659 : TestSimpleRangeCaseIndependence(isolate, CharacterRange::Range('A', 'k'),
1660 5 : CharacterRange::Range('a', 'z'));
1661 5 : }
1662 :
1663 :
1664 66805525 : static bool InClass(uc32 c, ZoneList<CharacterRange>* ranges) {
1665 22282160 : if (ranges == nullptr) return false;
1666 66764570 : for (int i = 0; i < ranges->length(); i++) {
1667 27811745 : CharacterRange range = ranges->at(i);
1668 27811745 : if (range.from() <= c && c <= range.to())
1669 : return true;
1670 : }
1671 : return false;
1672 : }
1673 :
1674 :
1675 28342 : TEST(UnicodeRangeSplitter) {
1676 5 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1677 : ZoneList<CharacterRange>* base =
1678 5 : new(&zone) ZoneList<CharacterRange>(1, &zone);
1679 5 : base->Add(CharacterRange::Everything(), &zone);
1680 5 : UnicodeRangeSplitter splitter(&zone, base);
1681 : // BMP
1682 276485 : for (uc32 c = 0; c < 0xD800; c++) {
1683 276480 : CHECK(InClass(c, splitter.bmp()));
1684 276480 : CHECK(!InClass(c, splitter.lead_surrogates()));
1685 276480 : CHECK(!InClass(c, splitter.trail_surrogates()));
1686 276480 : CHECK(!InClass(c, splitter.non_bmp()));
1687 : }
1688 : // Lead surrogates
1689 5115 : for (uc32 c = 0xD800; c < 0xDBFF; c++) {
1690 5115 : CHECK(!InClass(c, splitter.bmp()));
1691 5115 : CHECK(InClass(c, splitter.lead_surrogates()));
1692 5115 : CHECK(!InClass(c, splitter.trail_surrogates()));
1693 5115 : CHECK(!InClass(c, splitter.non_bmp()));
1694 : }
1695 : // Trail surrogates
1696 5115 : for (uc32 c = 0xDC00; c < 0xDFFF; c++) {
1697 5115 : CHECK(!InClass(c, splitter.bmp()));
1698 5115 : CHECK(!InClass(c, splitter.lead_surrogates()));
1699 5115 : CHECK(InClass(c, splitter.trail_surrogates()));
1700 5115 : CHECK(!InClass(c, splitter.non_bmp()));
1701 : }
1702 : // BMP
1703 40955 : for (uc32 c = 0xE000; c < 0xFFFF; c++) {
1704 40955 : CHECK(InClass(c, splitter.bmp()));
1705 40955 : CHECK(!InClass(c, splitter.lead_surrogates()));
1706 40955 : CHECK(!InClass(c, splitter.trail_surrogates()));
1707 40955 : CHECK(!InClass(c, splitter.non_bmp()));
1708 : }
1709 : // Non-BMP
1710 5242875 : for (uc32 c = 0x10000; c < 0x10FFFF; c++) {
1711 5242875 : CHECK(!InClass(c, splitter.bmp()));
1712 5242875 : CHECK(!InClass(c, splitter.lead_surrogates()));
1713 5242875 : CHECK(!InClass(c, splitter.trail_surrogates()));
1714 5242875 : CHECK(InClass(c, splitter.non_bmp()));
1715 5 : }
1716 5 : }
1717 :
1718 :
1719 28342 : TEST(CanonicalizeCharacterSets) {
1720 5 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1721 20 : ZoneList<CharacterRange>* list =
1722 5 : new(&zone) ZoneList<CharacterRange>(4, &zone);
1723 : CharacterSet set(list);
1724 :
1725 5 : list->Add(CharacterRange::Range(10, 20), &zone);
1726 5 : list->Add(CharacterRange::Range(30, 40), &zone);
1727 5 : list->Add(CharacterRange::Range(50, 60), &zone);
1728 5 : set.Canonicalize();
1729 5 : CHECK_EQ(3, list->length());
1730 5 : CHECK_EQ(10, list->at(0).from());
1731 5 : CHECK_EQ(20, list->at(0).to());
1732 5 : CHECK_EQ(30, list->at(1).from());
1733 5 : CHECK_EQ(40, list->at(1).to());
1734 5 : CHECK_EQ(50, list->at(2).from());
1735 5 : CHECK_EQ(60, list->at(2).to());
1736 :
1737 : list->Rewind(0);
1738 5 : list->Add(CharacterRange::Range(10, 20), &zone);
1739 5 : list->Add(CharacterRange::Range(50, 60), &zone);
1740 5 : list->Add(CharacterRange::Range(30, 40), &zone);
1741 5 : set.Canonicalize();
1742 5 : CHECK_EQ(3, list->length());
1743 5 : CHECK_EQ(10, list->at(0).from());
1744 5 : CHECK_EQ(20, list->at(0).to());
1745 5 : CHECK_EQ(30, list->at(1).from());
1746 5 : CHECK_EQ(40, list->at(1).to());
1747 5 : CHECK_EQ(50, list->at(2).from());
1748 5 : CHECK_EQ(60, list->at(2).to());
1749 :
1750 : list->Rewind(0);
1751 5 : list->Add(CharacterRange::Range(30, 40), &zone);
1752 5 : list->Add(CharacterRange::Range(10, 20), &zone);
1753 5 : list->Add(CharacterRange::Range(25, 25), &zone);
1754 5 : list->Add(CharacterRange::Range(100, 100), &zone);
1755 5 : list->Add(CharacterRange::Range(1, 1), &zone);
1756 5 : set.Canonicalize();
1757 5 : CHECK_EQ(5, list->length());
1758 5 : CHECK_EQ(1, list->at(0).from());
1759 5 : CHECK_EQ(1, list->at(0).to());
1760 5 : CHECK_EQ(10, list->at(1).from());
1761 5 : CHECK_EQ(20, list->at(1).to());
1762 5 : CHECK_EQ(25, list->at(2).from());
1763 5 : CHECK_EQ(25, list->at(2).to());
1764 5 : CHECK_EQ(30, list->at(3).from());
1765 5 : CHECK_EQ(40, list->at(3).to());
1766 5 : CHECK_EQ(100, list->at(4).from());
1767 5 : CHECK_EQ(100, list->at(4).to());
1768 :
1769 : list->Rewind(0);
1770 5 : list->Add(CharacterRange::Range(10, 19), &zone);
1771 5 : list->Add(CharacterRange::Range(21, 30), &zone);
1772 5 : list->Add(CharacterRange::Range(20, 20), &zone);
1773 5 : set.Canonicalize();
1774 5 : CHECK_EQ(1, list->length());
1775 5 : CHECK_EQ(10, list->at(0).from());
1776 5 : CHECK_EQ(30, list->at(0).to());
1777 5 : }
1778 :
1779 :
1780 28342 : TEST(CharacterRangeMerge) {
1781 5 : Zone zone(CcTest::i_isolate()->allocator(), ZONE_NAME);
1782 5 : ZoneList<CharacterRange> l1(4, &zone);
1783 5 : ZoneList<CharacterRange> l2(4, &zone);
1784 : // Create all combinations of intersections of ranges, both singletons and
1785 : // longer.
1786 :
1787 : int offset = 0;
1788 :
1789 : // The five kinds of singleton intersections:
1790 : // X
1791 : // Y - outside before
1792 : // Y - outside touching start
1793 : // Y - overlap
1794 : // Y - outside touching end
1795 : // Y - outside after
1796 :
1797 30 : for (int i = 0; i < 5; i++) {
1798 25 : l1.Add(CharacterRange::Singleton(offset + 2), &zone);
1799 25 : l2.Add(CharacterRange::Singleton(offset + i), &zone);
1800 25 : offset += 6;
1801 : }
1802 :
1803 : // The seven kinds of singleton/non-singleton intersections:
1804 : // XXX
1805 : // Y - outside before
1806 : // Y - outside touching start
1807 : // Y - inside touching start
1808 : // Y - entirely inside
1809 : // Y - inside touching end
1810 : // Y - outside touching end
1811 : // Y - disjoint after
1812 :
1813 35 : for (int i = 0; i < 7; i++) {
1814 35 : l1.Add(CharacterRange::Range(offset + 2, offset + 4), &zone);
1815 35 : l2.Add(CharacterRange::Singleton(offset + i), &zone);
1816 35 : offset += 8;
1817 : }
1818 :
1819 : // The eleven kinds of non-singleton intersections:
1820 : //
1821 : // XXXXXXXX
1822 : // YYYY - outside before.
1823 : // YYYY - outside touching start.
1824 : // YYYY - overlapping start
1825 : // YYYY - inside touching start
1826 : // YYYY - entirely inside
1827 : // YYYY - inside touching end
1828 : // YYYY - overlapping end
1829 : // YYYY - outside touching end
1830 : // YYYY - outside after
1831 : // YYYYYYYY - identical
1832 : // YYYYYYYYYYYY - containing entirely.
1833 :
1834 45 : for (int i = 0; i < 9; i++) {
1835 45 : l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone); // Length 8.
1836 45 : l2.Add(CharacterRange::Range(offset + 2 * i, offset + 2 * i + 3), &zone);
1837 45 : offset += 22;
1838 : }
1839 5 : l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1840 5 : l2.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1841 : offset += 22;
1842 5 : l1.Add(CharacterRange::Range(offset + 6, offset + 15), &zone);
1843 5 : l2.Add(CharacterRange::Range(offset + 4, offset + 17), &zone);
1844 5 : offset += 22;
1845 :
1846 : // Different kinds of multi-range overlap:
1847 : // XXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXX
1848 : // YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y YYYY Y
1849 :
1850 5 : l1.Add(CharacterRange::Range(offset, offset + 21), &zone);
1851 5 : l1.Add(CharacterRange::Range(offset + 31, offset + 52), &zone);
1852 35 : for (int i = 0; i < 6; i++) {
1853 30 : l2.Add(CharacterRange::Range(offset + 2, offset + 5), &zone);
1854 30 : l2.Add(CharacterRange::Singleton(offset + 8), &zone);
1855 30 : offset += 9;
1856 : }
1857 :
1858 5 : CHECK(CharacterRange::IsCanonical(&l1));
1859 5 : CHECK(CharacterRange::IsCanonical(&l2));
1860 :
1861 5 : ZoneList<CharacterRange> first_only(4, &zone);
1862 5 : ZoneList<CharacterRange> second_only(4, &zone);
1863 5 : ZoneList<CharacterRange> both(4, &zone);
1864 5 : }
1865 :
1866 :
1867 28342 : TEST(Graph) {
1868 5 : Execute("\\b\\w+\\b", false, true, true);
1869 5 : }
1870 :
1871 :
1872 : namespace {
1873 :
1874 : int* global_use_counts = nullptr;
1875 :
1876 46 : void MockUseCounterCallback(v8::Isolate* isolate,
1877 : v8::Isolate::UseCounterFeature feature) {
1878 46 : ++global_use_counts[feature];
1879 46 : }
1880 : }
1881 :
1882 :
1883 : // Test that ES2015 RegExp compatibility fixes are in place, that they
1884 : // are not overly broad, and the appropriate UseCounters are incremented
1885 28342 : TEST(UseCountRegExp) {
1886 5 : v8::Isolate* isolate = CcTest::isolate();
1887 5 : v8::HandleScope scope(isolate);
1888 10 : LocalContext env;
1889 5 : int use_counts[v8::Isolate::kUseCounterFeatureCount] = {};
1890 5 : global_use_counts = use_counts;
1891 5 : CcTest::isolate()->SetUseCounterCallback(MockUseCounterCallback);
1892 :
1893 : // Compat fix: RegExp.prototype.sticky == undefined; UseCounter tracks it
1894 : v8::Local<v8::Value> resultSticky = CompileRun("RegExp.prototype.sticky");
1895 5 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1896 5 : CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1897 5 : CHECK(resultSticky->IsUndefined());
1898 :
1899 : // re.sticky has approriate value and doesn't touch UseCounter
1900 : v8::Local<v8::Value> resultReSticky = CompileRun("/a/.sticky");
1901 5 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1902 5 : CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1903 5 : CHECK(resultReSticky->IsFalse());
1904 :
1905 : // When the getter is caleld on another object, throw an exception
1906 : // and don't increment the UseCounter
1907 : v8::Local<v8::Value> resultStickyError = CompileRun(
1908 : "var exception;"
1909 : "try { "
1910 : " Object.getOwnPropertyDescriptor(RegExp.prototype, 'sticky')"
1911 : " .get.call(null);"
1912 : "} catch (e) {"
1913 : " exception = e;"
1914 : "}"
1915 : "exception");
1916 5 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1917 5 : CHECK_EQ(0, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1918 5 : CHECK(resultStickyError->IsObject());
1919 :
1920 : // RegExp.prototype.toString() returns '/(?:)/' as a compatibility fix;
1921 : // a UseCounter is incremented to track it.
1922 : v8::Local<v8::Value> resultToString =
1923 : CompileRun("RegExp.prototype.toString().length");
1924 5 : CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1925 5 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1926 5 : CHECK(resultToString->IsInt32());
1927 10 : CHECK_EQ(6,
1928 : resultToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1929 :
1930 : // .toString() works on normal RegExps
1931 : v8::Local<v8::Value> resultReToString = CompileRun("/a/.toString().length");
1932 5 : CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1933 5 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1934 5 : CHECK(resultReToString->IsInt32());
1935 10 : CHECK_EQ(
1936 : 3, resultReToString->Int32Value(isolate->GetCurrentContext()).FromJust());
1937 :
1938 : // .toString() throws on non-RegExps that aren't RegExp.prototype
1939 : v8::Local<v8::Value> resultToStringError = CompileRun(
1940 : "var exception;"
1941 : "try { RegExp.prototype.toString.call(null) }"
1942 : "catch (e) { exception = e; }"
1943 : "exception");
1944 5 : CHECK_EQ(2, use_counts[v8::Isolate::kRegExpPrototypeStickyGetter]);
1945 5 : CHECK_EQ(1, use_counts[v8::Isolate::kRegExpPrototypeToString]);
1946 10 : CHECK(resultToStringError->IsObject());
1947 5 : }
1948 :
1949 15 : class UncachedExternalString
1950 : : public v8::String::ExternalOneByteStringResource {
1951 : public:
1952 26 : const char* data() const override { return "abcdefghijklmnopqrstuvwxyz"; }
1953 20 : size_t length() const override { return 26; }
1954 5 : bool IsCacheable() const override { return false; }
1955 : };
1956 :
1957 28342 : TEST(UncachedExternalString) {
1958 5 : v8::Isolate* isolate = CcTest::isolate();
1959 5 : v8::HandleScope scope(isolate);
1960 10 : LocalContext env;
1961 : v8::Local<v8::String> external =
1962 5 : v8::String::NewExternalOneByte(isolate, new UncachedExternalString())
1963 5 : .ToLocalChecked();
1964 5 : CHECK(v8::Utils::OpenHandle(*external)->map() ==
1965 : ReadOnlyRoots(CcTest::i_isolate())
1966 : .uncached_external_one_byte_string_map());
1967 5 : v8::Local<v8::Object> global = env->Global();
1968 20 : global->Set(env.local(), v8_str("external"), external).FromJust();
1969 : CompileRun("var re = /y(.)/; re.test('ab');");
1970 10 : ExpectString("external.substring(1).match(re)[1]", "z");
1971 5 : }
1972 :
1973 : } // namespace test_regexp
1974 : } // namespace internal
1975 85011 : } // namespace v8
|