Line data Source code
1 : // Copyright 2011 the V8 project authors. All rights reserved.
2 : // Use of this source code is governed by a BSD-style license that can be
3 : // found in the LICENSE file.
4 :
5 : #ifndef V8_CHAR_PREDICATES_H_
6 : #define V8_CHAR_PREDICATES_H_
7 :
8 : #include "src/globals.h"
9 : #include "src/unicode.h"
10 :
11 : namespace v8 {
12 : namespace internal {
13 :
14 : // Unicode character predicates as defined by ECMA-262, 3rd,
15 : // used for lexical analysis.
16 :
17 : inline constexpr int AsciiAlphaToLower(uc32 c);
18 : inline constexpr bool IsCarriageReturn(uc32 c);
19 : inline constexpr bool IsLineFeed(uc32 c);
20 : inline constexpr bool IsAsciiIdentifier(uc32 c);
21 : inline constexpr bool IsAlphaNumeric(uc32 c);
22 : inline constexpr bool IsDecimalDigit(uc32 c);
23 : inline constexpr bool IsHexDigit(uc32 c);
24 : inline constexpr bool IsOctalDigit(uc32 c);
25 : inline constexpr bool IsBinaryDigit(uc32 c);
26 : inline constexpr bool IsRegExpWord(uc32 c);
27 : inline constexpr bool IsRegExpNewline(uc32 c);
28 :
29 : // ES#sec-names-and-keywords
30 : // This includes '_', '$' and '\', and ID_Start according to
31 : // http://www.unicode.org/reports/tr31/, which consists of categories
32 : // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
33 : // 'Pattern_Syntax' or 'Pattern_White_Space'.
34 : inline bool IsIdentifierStart(uc32 c);
35 : #ifdef V8_INTL_SUPPORT
36 : V8_EXPORT_PRIVATE bool IsIdentifierStartSlow(uc32 c);
37 : #else
38 : inline bool IsIdentifierStartSlow(uc32 c) {
39 : // Non-BMP characters are not supported without I18N.
40 : return (c <= 0xFFFF) ? unibrow::ID_Start::Is(c) : false;
41 : }
42 : #endif
43 :
44 : // ES#sec-names-and-keywords
45 : // This includes \u200c and \u200d, and ID_Continue according to
46 : // http://www.unicode.org/reports/tr31/, which consists of ID_Start,
47 : // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
48 : // 'Pattern_Syntax' or 'Pattern_White_Space'.
49 : inline bool IsIdentifierPart(uc32 c);
50 : #ifdef V8_INTL_SUPPORT
51 : V8_EXPORT_PRIVATE bool IsIdentifierPartSlow(uc32 c);
52 : #else
53 : inline bool IsIdentifierPartSlow(uc32 c) {
54 : // Non-BMP charaacters are not supported without I18N.
55 : if (c <= 0xFFFF) {
56 : return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
57 : }
58 : return false;
59 : }
60 : #endif
61 :
62 : // ES6 draft section 11.2
63 : // This includes all code points of Unicode category 'Zs'.
64 : // Further included are \u0009, \u000b, \u000c, and \ufeff.
65 : inline bool IsWhiteSpace(uc32 c);
66 : #ifdef V8_INTL_SUPPORT
67 : V8_EXPORT_PRIVATE bool IsWhiteSpaceSlow(uc32 c);
68 : #else
69 : inline bool IsWhiteSpaceSlow(uc32 c) { return unibrow::WhiteSpace::Is(c); }
70 : #endif
71 :
72 : // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
73 : // This includes all the characters with Unicode category 'Z' (= Zs+Zl+Zp)
74 : // as well as \u0009 - \u000d and \ufeff.
75 : inline bool IsWhiteSpaceOrLineTerminator(uc32 c);
76 3434851 : inline bool IsWhiteSpaceOrLineTerminatorSlow(uc32 c) {
77 6857403 : return IsWhiteSpaceSlow(c) || unibrow::IsLineTerminator(c);
78 : }
79 :
80 : inline bool IsLineTerminatorSequence(uc32 c, uc32 next);
81 :
82 : } // namespace internal
83 : } // namespace v8
84 :
85 : #endif // V8_CHAR_PREDICATES_H_
|