/src/skia/third_party/externals/icu/source/common/rbbiscan.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | // |
4 | | // rbbiscan.h |
5 | | // |
6 | | // Copyright (C) 2002-2016, International Business Machines Corporation and others. |
7 | | // All Rights Reserved. |
8 | | // |
9 | | // This file contains declarations for class RBBIRuleScanner |
10 | | // |
11 | | |
12 | | |
13 | | #ifndef RBBISCAN_H |
14 | | #define RBBISCAN_H |
15 | | |
16 | | #include "unicode/utypes.h" |
17 | | #include "unicode/uobject.h" |
18 | | #include "unicode/rbbi.h" |
19 | | #include "unicode/uniset.h" |
20 | | #include "unicode/parseerr.h" |
21 | | #include "uhash.h" |
22 | | #include "uvector.h" |
23 | | #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that |
24 | | // looks up references to $variables within a set. |
25 | | #include "rbbinode.h" |
26 | | #include "rbbirpt.h" |
27 | | |
28 | | U_NAMESPACE_BEGIN |
29 | | |
30 | | class RBBIRuleBuilder; |
31 | | class RBBISymbolTable; |
32 | | |
33 | | |
34 | | //-------------------------------------------------------------------------------- |
35 | | // |
36 | | // class RBBIRuleScanner does the lowest level, character-at-a-time |
37 | | // scanning of break iterator rules. |
38 | | // |
39 | | // The output of the scanner is parse trees for |
40 | | // the rule expressions and a list of all Unicode Sets |
41 | | // encountered. |
42 | | // |
43 | | //-------------------------------------------------------------------------------- |
44 | | |
45 | | class RBBIRuleScanner : public UMemory { |
46 | | public: |
47 | | |
48 | | enum { |
49 | | kStackSize = 100 // The size of the state stack for |
50 | | }; // rules parsing. Corresponds roughly |
51 | | // to the depth of parentheses nesting |
52 | | // that is allowed in the rules. |
53 | | |
54 | | struct RBBIRuleChar { |
55 | | UChar32 fChar; |
56 | | UBool fEscaped; |
57 | 0 | RBBIRuleChar() : fChar(0), fEscaped(false) {} |
58 | | }; |
59 | | |
60 | | RBBIRuleScanner(RBBIRuleBuilder *rb); |
61 | | |
62 | | |
63 | | virtual ~RBBIRuleScanner(); |
64 | | |
65 | | void nextChar(RBBIRuleChar &c); // Get the next char from the input stream. |
66 | | // Return false if at end. |
67 | | |
68 | | UBool push(const RBBIRuleChar &c); // Push (unget) one character. |
69 | | // Only a single character may be pushed. |
70 | | |
71 | | void parse(); // Parse the rules, generating two parse |
72 | | // trees, one each for the forward and |
73 | | // reverse rules, |
74 | | // and a list of UnicodeSets encountered. |
75 | | |
76 | | int32_t numRules(); // Return the number of rules that have been seen. |
77 | | |
78 | | /** |
79 | | * Return a rules string without unnecessary |
80 | | * characters. |
81 | | */ |
82 | | static UnicodeString stripRules(const UnicodeString &rules); |
83 | | private: |
84 | | |
85 | | UBool doParseActions(int32_t a); |
86 | | void error(UErrorCode e); // error reporting convenience function. |
87 | | void fixOpStack(RBBINode::OpPrecedence p); |
88 | | // a character. |
89 | | void findSetFor(const UnicodeString &s, RBBINode *node, UnicodeSet *setToAdopt = NULL); |
90 | | |
91 | | UChar32 nextCharLL(); |
92 | | #ifdef RBBI_DEBUG |
93 | | void printNodeStack(const char *title); |
94 | | #endif |
95 | | RBBINode *pushNewNode(RBBINode::NodeType t); |
96 | | void scanSet(); |
97 | | |
98 | | |
99 | | RBBIRuleBuilder *fRB; // The rule builder that we are part of. |
100 | | |
101 | | int32_t fScanIndex; // Index of current character being processed |
102 | | // in the rule input string. |
103 | | int32_t fNextIndex; // Index of the next character, which |
104 | | // is the first character not yet scanned. |
105 | | UBool fQuoteMode; // Scan is in a 'quoted region' |
106 | | int32_t fLineNum; // Line number in input file. |
107 | | int32_t fCharNum; // Char position within the line. |
108 | | UChar32 fLastChar; // Previous char, needed to count CR-LF |
109 | | // as a single line, not two. |
110 | | |
111 | | RBBIRuleChar fC; // Current char for parse state machine |
112 | | // processing. |
113 | | UnicodeString fVarName; // $variableName, valid when we've just |
114 | | // scanned one. |
115 | | |
116 | | RBBIRuleTableEl **fStateTable; // State Transition Table for RBBI Rule |
117 | | // parsing. index by p[state][char-class] |
118 | | |
119 | | uint16_t fStack[kStackSize]; // State stack, holds state pushes |
120 | | int32_t fStackPtr; // and pops as specified in the state |
121 | | // transition rules. |
122 | | |
123 | | RBBINode *fNodeStack[kStackSize]; // Node stack, holds nodes created |
124 | | // during the parse of a rule |
125 | | int32_t fNodeStackPtr; |
126 | | |
127 | | |
128 | | UBool fReverseRule; // True if the rule currently being scanned |
129 | | // is a reverse direction rule (if it |
130 | | // starts with a '!') |
131 | | |
132 | | UBool fLookAheadRule; // True if the rule includes a '/' |
133 | | // somewhere within it. |
134 | | |
135 | | UBool fNoChainInRule; // True if the current rule starts with a '^'. |
136 | | |
137 | | RBBISymbolTable *fSymbolTable; // symbol table, holds definitions of |
138 | | // $variable symbols. |
139 | | |
140 | | UHashtable *fSetTable; // UnicocodeSet hash table, holds indexes to |
141 | | // the sets created while parsing rules. |
142 | | // The key is the string used for creating |
143 | | // the set. |
144 | | |
145 | | UnicodeSet fRuleSets[10]; // Unicode Sets that are needed during |
146 | | // the scanning of RBBI rules. The |
147 | | // indicies for these are assigned by the |
148 | | // perl script that builds the state tables. |
149 | | // See rbbirpt.h. |
150 | | |
151 | | int32_t fRuleNum; // Counts each rule as it is scanned. |
152 | | |
153 | | int32_t fOptionStart; // Input index of start of a !!option |
154 | | // keyword, while being scanned. |
155 | | |
156 | | UnicodeSet *gRuleSet_rule_char; |
157 | | UnicodeSet *gRuleSet_white_space; |
158 | | UnicodeSet *gRuleSet_name_char; |
159 | | UnicodeSet *gRuleSet_name_start_char; |
160 | | |
161 | | RBBIRuleScanner(const RBBIRuleScanner &other); // forbid copying of this class |
162 | | RBBIRuleScanner &operator=(const RBBIRuleScanner &other); // forbid copying of this class |
163 | | }; |
164 | | |
165 | | U_NAMESPACE_END |
166 | | |
167 | | #endif |