/src/skia/third_party/externals/icu/source/common/rbbistbl.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | // |
4 | | // file: rbbistbl.cpp Implementation of the ICU RBBISymbolTable class |
5 | | // |
6 | | /* |
7 | | *************************************************************************** |
8 | | * Copyright (C) 2002-2014 International Business Machines Corporation |
9 | | * and others. All rights reserved. |
10 | | *************************************************************************** |
11 | | */ |
12 | | |
13 | | #include "unicode/utypes.h" |
14 | | |
15 | | #if !UCONFIG_NO_BREAK_ITERATION |
16 | | |
17 | | #include "unicode/unistr.h" |
18 | | #include "unicode/uniset.h" |
19 | | #include "unicode/uchar.h" |
20 | | #include "unicode/parsepos.h" |
21 | | |
22 | | #include "cstr.h" |
23 | | #include "rbbinode.h" |
24 | | #include "rbbirb.h" |
25 | | #include "umutex.h" |
26 | | |
27 | | |
28 | | // |
29 | | // RBBISymbolTableEntry_deleter Used by the UHashTable to delete the contents |
30 | | // when the hash table is deleted. |
31 | | // |
32 | | U_CDECL_BEGIN |
33 | 0 | static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { |
34 | 0 | icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p; |
35 | 0 | delete px; |
36 | 0 | } |
37 | | U_CDECL_END |
38 | | |
39 | | |
40 | | |
41 | | U_NAMESPACE_BEGIN |
42 | | |
43 | | RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status) |
44 | | :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff)) |
45 | 0 | { |
46 | 0 | fHashTable = NULL; |
47 | 0 | fCachedSetLookup = NULL; |
48 | | |
49 | 0 | fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status); |
50 | | // uhash_open checks status |
51 | 0 | if (U_FAILURE(status)) { |
52 | 0 | return; |
53 | 0 | } |
54 | 0 | uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter); |
55 | 0 | } |
56 | | |
57 | | |
58 | | |
59 | | RBBISymbolTable::~RBBISymbolTable() |
60 | 0 | { |
61 | 0 | uhash_close(fHashTable); |
62 | 0 | } |
63 | | |
64 | | |
65 | | // |
66 | | // RBBISymbolTable::lookup This function from the abstract symbol table inteface |
67 | | // looks up a variable name and returns a UnicodeString |
68 | | // containing the substitution text. |
69 | | // |
70 | | // The variable name does NOT include the leading $. |
71 | | // |
72 | | const UnicodeString *RBBISymbolTable::lookup(const UnicodeString& s) const |
73 | 0 | { |
74 | 0 | RBBISymbolTableEntry *el; |
75 | 0 | RBBINode *varRefNode; |
76 | 0 | RBBINode *exprNode; |
77 | 0 | RBBINode *usetNode; |
78 | 0 | const UnicodeString *retString; |
79 | 0 | RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const |
80 | |
|
81 | 0 | el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s); |
82 | 0 | if (el == NULL) { |
83 | 0 | return NULL; |
84 | 0 | } |
85 | | |
86 | 0 | varRefNode = el->val; |
87 | 0 | exprNode = varRefNode->fLeftChild; // Root node of expression for variable |
88 | 0 | if (exprNode->fType == RBBINode::setRef) { |
89 | | // The $variable refers to a single UnicodeSet |
90 | | // return the ffffString, which will subsequently be interpreted as a |
91 | | // stand-in character for the set by RBBISymbolTable::lookupMatcher() |
92 | 0 | usetNode = exprNode->fLeftChild; |
93 | 0 | This->fCachedSetLookup = usetNode->fInputSet; |
94 | 0 | retString = &ffffString; |
95 | 0 | } |
96 | 0 | else |
97 | 0 | { |
98 | | // The variable refers to something other than just a set. |
99 | | // return the original source string for the expression |
100 | 0 | retString = &exprNode->fText; |
101 | 0 | This->fCachedSetLookup = NULL; |
102 | 0 | } |
103 | 0 | return retString; |
104 | 0 | } |
105 | | |
106 | | |
107 | | |
108 | | // |
109 | | // RBBISymbolTable::lookupMatcher This function from the abstract symbol table |
110 | | // interface maps a single stand-in character to a |
111 | | // pointer to a Unicode Set. The Unicode Set code uses this |
112 | | // mechanism to get all references to the same $variable |
113 | | // name to refer to a single common Unicode Set instance. |
114 | | // |
115 | | // This implementation cheats a little, and does not maintain a map of stand-in chars |
116 | | // to sets. Instead, it takes advantage of the fact that the UnicodeSet |
117 | | // constructor will always call this function right after calling lookup(), |
118 | | // and we just need to remember what set to return between these two calls. |
119 | | const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const |
120 | 0 | { |
121 | 0 | UnicodeSet *retVal = NULL; |
122 | 0 | RBBISymbolTable *This = (RBBISymbolTable *)this; // cast off const |
123 | 0 | if (ch == 0xffff) { |
124 | 0 | retVal = fCachedSetLookup; |
125 | 0 | This->fCachedSetLookup = 0; |
126 | 0 | } |
127 | 0 | return retVal; |
128 | 0 | } |
129 | | |
130 | | // |
131 | | // RBBISymbolTable::parseReference This function from the abstract symbol table interface |
132 | | // looks for a $variable name in the source text. |
133 | | // It does not look it up, only scans for it. |
134 | | // It is used by the UnicodeSet parser. |
135 | | // |
136 | | // This implementation is lifted pretty much verbatim |
137 | | // from the rules based transliterator implementation. |
138 | | // I didn't see an obvious way of sharing it. |
139 | | // |
140 | | UnicodeString RBBISymbolTable::parseReference(const UnicodeString& text, |
141 | | ParsePosition& pos, int32_t limit) const |
142 | 0 | { |
143 | 0 | int32_t start = pos.getIndex(); |
144 | 0 | int32_t i = start; |
145 | 0 | UnicodeString result; |
146 | 0 | while (i < limit) { |
147 | 0 | UChar c = text.charAt(i); |
148 | 0 | if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { |
149 | 0 | break; |
150 | 0 | } |
151 | 0 | ++i; |
152 | 0 | } |
153 | 0 | if (i == start) { // No valid name chars |
154 | 0 | return result; // Indicate failure with empty string |
155 | 0 | } |
156 | 0 | pos.setIndex(i); |
157 | 0 | text.extractBetween(start, i, result); |
158 | 0 | return result; |
159 | 0 | } |
160 | | |
161 | | |
162 | | |
163 | | // |
164 | | // RBBISymbolTable::lookupNode Given a key (a variable name), return the |
165 | | // corresponding RBBI Node. If there is no entry |
166 | | // in the table for this name, return NULL. |
167 | | // |
168 | 0 | RBBINode *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ |
169 | |
|
170 | 0 | RBBINode *retNode = NULL; |
171 | 0 | RBBISymbolTableEntry *el; |
172 | |
|
173 | 0 | el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); |
174 | 0 | if (el != NULL) { |
175 | 0 | retNode = el->val; |
176 | 0 | } |
177 | 0 | return retNode; |
178 | 0 | } |
179 | | |
180 | | |
181 | | // |
182 | | // RBBISymbolTable::addEntry Add a new entry to the symbol table. |
183 | | // Indicate an error if the name already exists - |
184 | | // this will only occur in the case of duplicate |
185 | | // variable assignments. |
186 | | // |
187 | 0 | void RBBISymbolTable::addEntry (const UnicodeString &key, RBBINode *val, UErrorCode &err) { |
188 | 0 | RBBISymbolTableEntry *e; |
189 | | /* test for buffer overflows */ |
190 | 0 | if (U_FAILURE(err)) { |
191 | 0 | return; |
192 | 0 | } |
193 | 0 | e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key); |
194 | 0 | if (e != NULL) { |
195 | 0 | err = U_BRK_VARIABLE_REDFINITION; |
196 | 0 | return; |
197 | 0 | } |
198 | | |
199 | 0 | e = new RBBISymbolTableEntry; |
200 | 0 | if (e == NULL) { |
201 | 0 | err = U_MEMORY_ALLOCATION_ERROR; |
202 | 0 | return; |
203 | 0 | } |
204 | 0 | e->key = key; |
205 | 0 | e->val = val; |
206 | 0 | uhash_put( fHashTable, &e->key, e, &err); |
207 | 0 | } |
208 | | |
209 | | |
210 | 0 | RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} |
211 | | |
212 | 0 | RBBISymbolTableEntry::~RBBISymbolTableEntry() { |
213 | | // The "val" of a symbol table entry is a variable reference node. |
214 | | // The l. child of the val is the rhs expression from the assignment. |
215 | | // Unlike other node types, children of variable reference nodes are not |
216 | | // automatically recursively deleted. We do it manually here. |
217 | 0 | delete val->fLeftChild; |
218 | 0 | val->fLeftChild = NULL; |
219 | |
|
220 | 0 | delete val; |
221 | | |
222 | | // Note: the key UnicodeString is destructed by virtue of being in the object by value. |
223 | 0 | } |
224 | | |
225 | | |
226 | | // |
227 | | // RBBISymbolTable::print Debugging function, dump out the symbol table contents. |
228 | | // |
229 | | #ifdef RBBI_DEBUG |
230 | | void RBBISymbolTable::rbbiSymtablePrint() const { |
231 | | RBBIDebugPrintf("Variable Definitions Symbol Table\n" |
232 | | "Name Node serial String Val\n" |
233 | | "-------------------------------------------------------------------\n"); |
234 | | |
235 | | int32_t pos = UHASH_FIRST; |
236 | | const UHashElement *e = NULL; |
237 | | for (;;) { |
238 | | e = uhash_nextElement(fHashTable, &pos); |
239 | | if (e == NULL ) { |
240 | | break; |
241 | | } |
242 | | RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; |
243 | | |
244 | | RBBIDebugPrintf("%-19s %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum); |
245 | | RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)()); |
246 | | } |
247 | | |
248 | | RBBIDebugPrintf("\nParsed Variable Definitions\n"); |
249 | | pos = -1; |
250 | | for (;;) { |
251 | | e = uhash_nextElement(fHashTable, &pos); |
252 | | if (e == NULL ) { |
253 | | break; |
254 | | } |
255 | | RBBISymbolTableEntry *s = (RBBISymbolTableEntry *)e->value.pointer; |
256 | | RBBIDebugPrintf("%s\n", CStr(s->key)()); |
257 | | RBBINode::printTree(s->val, TRUE); |
258 | | RBBINode::printTree(s->val->fLeftChild, FALSE); |
259 | | RBBIDebugPrintf("\n"); |
260 | | } |
261 | | } |
262 | | #endif |
263 | | |
264 | | |
265 | | |
266 | | |
267 | | |
268 | | U_NAMESPACE_END |
269 | | |
270 | | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |