Line data Source code
1 : // Copyright 2011 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_CHAR_PREDICATES_H_
6 : #define V8_CHAR_PREDICATES_H_
7 :
8 : #include "src/globals.h"
9 : #include "src/unicode.h"
10 :
11 : namespace v8 {
12 : namespace internal {
13 :
14 : // Unicode character predicates as defined by ECMA-262, 3rd,
15 : // used for lexical analysis.
16 :
17 : inline int AsciiAlphaToLower(uc32 c);
18 : inline bool IsCarriageReturn(uc32 c);
19 : inline bool IsLineFeed(uc32 c);
20 : inline bool IsAsciiIdentifier(uc32 c);
21 : inline bool IsAlphaNumeric(uc32 c);
22 : inline bool IsDecimalDigit(uc32 c);
23 : inline bool IsHexDigit(uc32 c);
24 : inline bool IsOctalDigit(uc32 c);
25 : inline bool IsBinaryDigit(uc32 c);
26 : inline bool IsRegExpWord(uc32 c);
27 : inline bool IsRegExpNewline(uc32 c);
28 :
29 : struct V8_EXPORT_PRIVATE SupplementaryPlanes {
30 : static bool IsIDStart(uc32 c);
31 : static bool IsIDPart(uc32 c);
32 : };
33 :
34 :
35 : // ES6 draft section 11.6
36 : // This includes '_', '$' and '\', and ID_Start according to
37 : // http://www.unicode.org/reports/tr31/, which consists of categories
38 : // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
39 : // 'Pattern_Syntax' or 'Pattern_White_Space'.
40 : // For code points in the SMPs, we can resort to ICU (if available).
41 : struct IdentifierStart {
42 1636657 : static inline bool Is(uc32 c) {
43 1636657 : if (c > 0xFFFF) return SupplementaryPlanes::IsIDStart(c);
44 1636553 : return unibrow::ID_Start::Is(c);
45 : }
46 : };
47 :
48 :
49 : // ES6 draft section 11.6
50 : // This includes \u200c and \u200d, and ID_Continue according to
51 : // http://www.unicode.org/reports/tr31/, which consists of ID_Start,
52 : // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
53 : // 'Pattern_Syntax' or 'Pattern_White_Space'.
54 : // For code points in the SMPs, we can resort to ICU (if available).
55 : struct IdentifierPart {
56 28419 : static inline bool Is(uc32 c) {
57 28419 : if (c > 0xFFFF) return SupplementaryPlanes::IsIDPart(c);
58 28228 : return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
59 : }
60 : };
61 :
62 :
63 : // ES6 draft section 11.2
64 : // This includes all code points of Unicode category 'Zs'.
65 : // \u180e stops being one as of Unicode 6.3.0, but ES6 adheres to Unicode 5.1,
66 : // so it is also included.
67 : // Further included are \u0009, \u000b, \u0020, \u00a0, \u000c, and \ufeff.
68 : // There are no category 'Zs' code points in the SMPs.
69 : struct WhiteSpace {
70 1158628 : static inline bool Is(uc32 c) { return unibrow::WhiteSpace::Is(c); }
71 : };
72 :
73 :
74 : // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
75 : // This consists of \000a, \000d, \u2028, and \u2029.
76 : struct WhiteSpaceOrLineTerminator {
77 1031204 : static inline bool Is(uc32 c) {
78 1031204 : return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c);
79 : }
80 : };
81 :
82 : } // namespace internal
83 : } // namespace v8
84 :
85 : #endif // V8_CHAR_PREDICATES_H_
|