/src/icu/source/common/rbbistbl.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | //  | 
4  |  | //  file:  rbbistbl.cpp    Implementation of the ICU RBBISymbolTable class  | 
5  |  | //  | 
6  |  | /*  | 
7  |  | ***************************************************************************  | 
8  |  | *   Copyright (C) 2002-2014 International Business Machines Corporation  | 
9  |  | *   and others. All rights reserved.  | 
10  |  | ***************************************************************************  | 
11  |  | */  | 
12  |  |  | 
13  |  | #include "unicode/utypes.h"  | 
14  |  |  | 
15  |  | #if !UCONFIG_NO_BREAK_ITERATION  | 
16  |  |  | 
17  |  | #include "unicode/unistr.h"  | 
18  |  | #include "unicode/uniset.h"  | 
19  |  | #include "unicode/uchar.h"  | 
20  |  | #include "unicode/parsepos.h"  | 
21  |  |  | 
22  |  | #include "cstr.h"  | 
23  |  | #include "rbbinode.h"  | 
24  |  | #include "rbbirb.h"  | 
25  |  | #include "umutex.h"  | 
26  |  |  | 
27  |  |  | 
28  |  | //  | 
29  |  | //  RBBISymbolTableEntry_deleter    Used by the UHashTable to delete the contents  | 
30  |  | //                                  when the hash table is deleted.  | 
31  |  | //  | 
32  |  | U_CDECL_BEGIN  | 
33  | 0  | static void U_CALLCONV RBBISymbolTableEntry_deleter(void *p) { | 
34  | 0  |     icu::RBBISymbolTableEntry *px = (icu::RBBISymbolTableEntry *)p;  | 
35  | 0  |     delete px;  | 
36  | 0  | }  | 
37  |  | U_CDECL_END  | 
38  |  |  | 
39  |  |  | 
40  |  |  | 
41  |  | U_NAMESPACE_BEGIN  | 
42  |  |  | 
43  |  | RBBISymbolTable::RBBISymbolTable(RBBIRuleScanner *rs, const UnicodeString &rules, UErrorCode &status)  | 
44  | 0  |     :fRules(rules), fRuleScanner(rs), ffffString(UChar(0xffff))  | 
45  | 0  | { | 
46  | 0  |     fHashTable       = NULL;  | 
47  | 0  |     fCachedSetLookup = NULL;  | 
48  |  |       | 
49  | 0  |     fHashTable = uhash_open(uhash_hashUnicodeString, uhash_compareUnicodeString, NULL, &status);  | 
50  |  |     // uhash_open checks status  | 
51  | 0  |     if (U_FAILURE(status)) { | 
52  | 0  |         return;  | 
53  | 0  |     }  | 
54  | 0  |     uhash_setValueDeleter(fHashTable, RBBISymbolTableEntry_deleter);  | 
55  | 0  | }  | 
56  |  |  | 
57  |  |  | 
58  |  |  | 
59  |  | RBBISymbolTable::~RBBISymbolTable()  | 
60  | 0  | { | 
61  | 0  |     uhash_close(fHashTable);  | 
62  | 0  | }  | 
63  |  |  | 
64  |  |  | 
65  |  | //  | 
66  |  | //  RBBISymbolTable::lookup       This function from the abstract symbol table interface  | 
67  |  | //                                looks up a variable name and returns a UnicodeString  | 
68  |  | //                                containing the substitution text.  | 
69  |  | //  | 
70  |  | //                                The variable name does NOT include the leading $.  | 
71  |  | //  | 
72  |  | const UnicodeString  *RBBISymbolTable::lookup(const UnicodeString& s) const  | 
73  | 0  | { | 
74  | 0  |     RBBISymbolTableEntry  *el;  | 
75  | 0  |     RBBINode              *varRefNode;  | 
76  | 0  |     RBBINode              *exprNode;  | 
77  | 0  |     RBBINode              *usetNode;  | 
78  | 0  |     const UnicodeString   *retString;  | 
79  | 0  |     RBBISymbolTable       *This = (RBBISymbolTable *)this;   // cast off const  | 
80  |  | 
  | 
81  | 0  |     el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &s);  | 
82  | 0  |     if (el == NULL) { | 
83  | 0  |         return NULL;  | 
84  | 0  |     }  | 
85  |  |  | 
86  | 0  |     varRefNode = el->val;  | 
87  | 0  |     exprNode   = varRefNode->fLeftChild;     // Root node of expression for variable  | 
88  | 0  |     if (exprNode->fType == RBBINode::setRef) { | 
89  |  |         // The $variable refers to a single UnicodeSet  | 
90  |  |         //   return the ffffString, which will subsequently be interpreted as a  | 
91  |  |         //   stand-in character for the set by RBBISymbolTable::lookupMatcher()  | 
92  | 0  |         usetNode = exprNode->fLeftChild;  | 
93  | 0  |         This->fCachedSetLookup = usetNode->fInputSet;  | 
94  | 0  |         retString = &ffffString;  | 
95  | 0  |     }  | 
96  | 0  |     else  | 
97  | 0  |     { | 
98  |  |         // The variable refers to something other than just a set.  | 
99  |  |         // return the original source string for the expression  | 
100  | 0  |         retString = &exprNode->fText;  | 
101  | 0  |         This->fCachedSetLookup = NULL;  | 
102  | 0  |     }  | 
103  | 0  |     return retString;  | 
104  | 0  | }  | 
105  |  |  | 
106  |  |  | 
107  |  |  | 
108  |  | //  | 
109  |  | //  RBBISymbolTable::lookupMatcher   This function from the abstract symbol table  | 
110  |  | //                                   interface maps a single stand-in character to a  | 
111  |  | //                                   pointer to a Unicode Set.   The Unicode Set code uses this  | 
112  |  | //                                   mechanism to get all references to the same $variable  | 
113  |  | //                                   name to refer to a single common Unicode Set instance.  | 
114  |  | //  | 
115  |  | //    This implementation cheats a little, and does not maintain a map of stand-in chars  | 
116  |  | //    to sets.  Instead, it takes advantage of the fact that  the UnicodeSet  | 
117  |  | //    constructor will always call this function right after calling lookup(),  | 
118  |  | //    and we just need to remember what set to return between these two calls.  | 
119  |  | const UnicodeFunctor *RBBISymbolTable::lookupMatcher(UChar32 ch) const  | 
120  | 0  | { | 
121  | 0  |     UnicodeSet *retVal = NULL;  | 
122  | 0  |     RBBISymbolTable *This = (RBBISymbolTable *)this;   // cast off const  | 
123  | 0  |     if (ch == 0xffff) { | 
124  | 0  |         retVal = fCachedSetLookup;  | 
125  | 0  |         This->fCachedSetLookup = 0;  | 
126  | 0  |     }  | 
127  | 0  |     return retVal;  | 
128  | 0  | }  | 
129  |  |  | 
130  |  | //  | 
131  |  | // RBBISymbolTable::parseReference   This function from the abstract symbol table interface  | 
132  |  | //                                   looks for a $variable name in the source text.  | 
133  |  | //                                   It does not look it up, only scans for it.  | 
134  |  | //                                   It is used by the UnicodeSet parser.  | 
135  |  | //  | 
136  |  | //                                   This implementation is lifted pretty much verbatim  | 
137  |  | //                                   from the rules based transliterator implementation.  | 
138  |  | //                                   I didn't see an obvious way of sharing it.  | 
139  |  | //  | 
140  |  | UnicodeString   RBBISymbolTable::parseReference(const UnicodeString& text,  | 
141  |  |                                                 ParsePosition& pos, int32_t limit) const  | 
142  | 0  | { | 
143  | 0  |     int32_t start = pos.getIndex();  | 
144  | 0  |     int32_t i = start;  | 
145  | 0  |     UnicodeString result;  | 
146  | 0  |     while (i < limit) { | 
147  | 0  |         UChar c = text.charAt(i);  | 
148  | 0  |         if ((i==start && !u_isIDStart(c)) || !u_isIDPart(c)) { | 
149  | 0  |             break;  | 
150  | 0  |         }  | 
151  | 0  |         ++i;  | 
152  | 0  |     }  | 
153  | 0  |     if (i == start) { // No valid name chars | 
154  | 0  |         return result; // Indicate failure with empty string  | 
155  | 0  |     }  | 
156  | 0  |     pos.setIndex(i);  | 
157  | 0  |     text.extractBetween(start, i, result);  | 
158  | 0  |     return result;  | 
159  | 0  | }  | 
160  |  |  | 
161  |  |  | 
162  |  |  | 
163  |  | //  | 
164  |  | // RBBISymbolTable::lookupNode      Given a key (a variable name), return the  | 
165  |  | //                                  corresponding RBBI Node.  If there is no entry  | 
166  |  | //                                  in the table for this name, return NULL.  | 
167  |  | //  | 
168  | 0  | RBBINode       *RBBISymbolTable::lookupNode(const UnicodeString &key) const{ | 
169  |  | 
  | 
170  | 0  |     RBBINode             *retNode = NULL;  | 
171  | 0  |     RBBISymbolTableEntry *el;  | 
172  |  | 
  | 
173  | 0  |     el = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);  | 
174  | 0  |     if (el != NULL) { | 
175  | 0  |         retNode = el->val;  | 
176  | 0  |     }  | 
177  | 0  |     return retNode;  | 
178  | 0  | }  | 
179  |  |  | 
180  |  |  | 
181  |  | //  | 
182  |  | //    RBBISymbolTable::addEntry     Add a new entry to the symbol table.  | 
183  |  | //                                  Indicate an error if the name already exists -  | 
184  |  | //                                    this will only occur in the case of duplicate  | 
185  |  | //                                    variable assignments.  | 
186  |  | //  | 
187  | 0  | void            RBBISymbolTable::addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err) { | 
188  | 0  |     RBBISymbolTableEntry *e;  | 
189  |  |     /* test for buffer overflows */  | 
190  | 0  |     if (U_FAILURE(err)) { | 
191  | 0  |         return;  | 
192  | 0  |     }  | 
193  | 0  |     e = (RBBISymbolTableEntry *)uhash_get(fHashTable, &key);  | 
194  | 0  |     if (e != NULL) { | 
195  | 0  |         err = U_BRK_VARIABLE_REDFINITION;  | 
196  | 0  |         return;  | 
197  | 0  |     }  | 
198  |  |  | 
199  | 0  |     e = new RBBISymbolTableEntry;  | 
200  | 0  |     if (e == NULL) { | 
201  | 0  |         err = U_MEMORY_ALLOCATION_ERROR;  | 
202  | 0  |         return;  | 
203  | 0  |     }  | 
204  | 0  |     e->key = key;  | 
205  | 0  |     e->val = val;  | 
206  | 0  |     uhash_put( fHashTable, &e->key, e, &err);  | 
207  | 0  | }  | 
208  |  |  | 
209  |  |  | 
210  | 0  | RBBISymbolTableEntry::RBBISymbolTableEntry() : UMemory(), key(), val(NULL) {} | 
211  |  |  | 
212  | 0  | RBBISymbolTableEntry::~RBBISymbolTableEntry() { | 
213  |  |     // The "val" of a symbol table entry is a variable reference node.  | 
214  |  |     // The l. child of the val is the rhs expression from the assignment.  | 
215  |  |     // Unlike other node types, children of variable reference nodes are not  | 
216  |  |     //    automatically recursively deleted.  We do it manually here.  | 
217  | 0  |     delete val->fLeftChild;  | 
218  | 0  |     val->fLeftChild = NULL;  | 
219  |  | 
  | 
220  | 0  |     delete  val;  | 
221  |  |  | 
222  |  |     // Note: the key UnicodeString is destructed by virtue of being in the object by value.  | 
223  | 0  | }  | 
224  |  |  | 
225  |  |  | 
226  |  | //  | 
227  |  | //  RBBISymbolTable::print    Debugging function, dump out the symbol table contents.  | 
228  |  | //  | 
229  |  | #ifdef RBBI_DEBUG  | 
230  |  | void RBBISymbolTable::rbbiSymtablePrint() const { | 
231  |  |     RBBIDebugPrintf("Variable Definitions Symbol Table\n" | 
232  |  |            "Name                  Node         serial  String Val\n"  | 
233  |  |            "-------------------------------------------------------------------\n");  | 
234  |  |  | 
235  |  |     int32_t pos = UHASH_FIRST;  | 
236  |  |     const UHashElement  *e   = NULL;  | 
237  |  |     for (;;) { | 
238  |  |         e = uhash_nextElement(fHashTable,  &pos);  | 
239  |  |         if (e == NULL ) { | 
240  |  |             break;  | 
241  |  |         }  | 
242  |  |         RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;  | 
243  |  |  | 
244  |  |         RBBIDebugPrintf("%-19s   %8p %7d ", CStr(s->key)(), (void *)s->val, s->val->fSerialNum); | 
245  |  |         RBBIDebugPrintf(" %s\n", CStr(s->val->fLeftChild->fText)()); | 
246  |  |     }  | 
247  |  |  | 
248  |  |     RBBIDebugPrintf("\nParsed Variable Definitions\n"); | 
249  |  |     pos = -1;  | 
250  |  |     for (;;) { | 
251  |  |         e = uhash_nextElement(fHashTable,  &pos);  | 
252  |  |         if (e == NULL ) { | 
253  |  |             break;  | 
254  |  |         }  | 
255  |  |         RBBISymbolTableEntry  *s   = (RBBISymbolTableEntry *)e->value.pointer;  | 
256  |  |         RBBIDebugPrintf("%s\n", CStr(s->key)()); | 
257  |  |         RBBINode::printTree(s->val, TRUE);  | 
258  |  |         RBBINode::printTree(s->val->fLeftChild, FALSE);  | 
259  |  |         RBBIDebugPrintf("\n"); | 
260  |  |     }  | 
261  |  | }  | 
262  |  | #endif  | 
263  |  |  | 
264  |  |  | 
265  |  |  | 
266  |  |  | 
267  |  |  | 
268  |  | U_NAMESPACE_END  | 
269  |  |  | 
270  |  | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */  |