/src/icu/source/common/rbbinode.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | *************************************************************************** |
5 | | * Copyright (C) 2002-2016 International Business Machines Corporation * |
6 | | * and others. All rights reserved. * |
7 | | *************************************************************************** |
8 | | */ |
9 | | |
10 | | // |
11 | | // File: rbbinode.cpp |
12 | | // |
13 | | // Implementation of class RBBINode, which represents a node in the |
14 | | // tree generated when parsing the Rules Based Break Iterator rules. |
15 | | // |
16 | | // This "Class" is actually closer to a struct. |
17 | | // Code using it is expected to directly access fields much of the time. |
18 | | // |
19 | | |
20 | | #include "unicode/utypes.h" |
21 | | |
22 | | #if !UCONFIG_NO_BREAK_ITERATION |
23 | | |
24 | | #include "unicode/unistr.h" |
25 | | #include "unicode/uniset.h" |
26 | | #include "unicode/uchar.h" |
27 | | #include "unicode/parsepos.h" |
28 | | |
29 | | #include "cstr.h" |
30 | | #include "uvector.h" |
31 | | |
32 | | #include "rbbirb.h" |
33 | | #include "rbbinode.h" |
34 | | |
35 | | #include "uassert.h" |
36 | | |
37 | | |
38 | | U_NAMESPACE_BEGIN |
39 | | |
40 | | #ifdef RBBI_DEBUG |
41 | | static int gLastSerial = 0; |
42 | | #endif |
43 | | |
44 | | |
45 | | //------------------------------------------------------------------------- |
46 | | // |
47 | | // Constructor. Just set the fields to reasonable default values. |
48 | | // |
49 | | //------------------------------------------------------------------------- |
50 | 0 | RBBINode::RBBINode(NodeType t) : UMemory() { |
51 | | #ifdef RBBI_DEBUG |
52 | | fSerialNum = ++gLastSerial; |
53 | | #endif |
54 | 0 | fType = t; |
55 | 0 | fParent = NULL; |
56 | 0 | fLeftChild = NULL; |
57 | 0 | fRightChild = NULL; |
58 | 0 | fInputSet = NULL; |
59 | 0 | fFirstPos = 0; |
60 | 0 | fLastPos = 0; |
61 | 0 | fNullable = FALSE; |
62 | 0 | fLookAheadEnd = FALSE; |
63 | 0 | fRuleRoot = FALSE; |
64 | 0 | fChainIn = FALSE; |
65 | 0 | fVal = 0; |
66 | 0 | fPrecedence = precZero; |
67 | |
|
68 | 0 | UErrorCode status = U_ZERO_ERROR; |
69 | 0 | fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere |
70 | 0 | fLastPosSet = new UVector(status); |
71 | 0 | fFollowPos = new UVector(status); |
72 | 0 | if (t==opCat) {fPrecedence = precOpCat;} |
73 | 0 | else if (t==opOr) {fPrecedence = precOpOr;} |
74 | 0 | else if (t==opStart) {fPrecedence = precStart;} |
75 | 0 | else if (t==opLParen) {fPrecedence = precLParen;} |
76 | |
|
77 | 0 | } |
78 | | |
79 | | |
80 | 0 | RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { |
81 | | #ifdef RBBI_DEBUG |
82 | | fSerialNum = ++gLastSerial; |
83 | | #endif |
84 | 0 | fType = other.fType; |
85 | 0 | fParent = NULL; |
86 | 0 | fLeftChild = NULL; |
87 | 0 | fRightChild = NULL; |
88 | 0 | fInputSet = other.fInputSet; |
89 | 0 | fPrecedence = other.fPrecedence; |
90 | 0 | fText = other.fText; |
91 | 0 | fFirstPos = other.fFirstPos; |
92 | 0 | fLastPos = other.fLastPos; |
93 | 0 | fNullable = other.fNullable; |
94 | 0 | fVal = other.fVal; |
95 | 0 | fRuleRoot = FALSE; |
96 | 0 | fChainIn = other.fChainIn; |
97 | 0 | UErrorCode status = U_ZERO_ERROR; |
98 | 0 | fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere |
99 | 0 | fLastPosSet = new UVector(status); |
100 | 0 | fFollowPos = new UVector(status); |
101 | 0 | } |
102 | | |
103 | | |
104 | | //------------------------------------------------------------------------- |
105 | | // |
106 | | // Destructor. Deletes both this node AND any child nodes, |
107 | | // except in the case of variable reference nodes. For |
108 | | // these, the l. child points back to the definition, which |
109 | | // is common for all references to the variable, meaning |
110 | | // it can't be deleted here. |
111 | | // |
112 | | //------------------------------------------------------------------------- |
113 | 0 | RBBINode::~RBBINode() { |
114 | | // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); |
115 | 0 | delete fInputSet; |
116 | 0 | fInputSet = NULL; |
117 | |
|
118 | 0 | switch (this->fType) { |
119 | 0 | case varRef: |
120 | 0 | case setRef: |
121 | | // for these node types, multiple instances point to the same "children" |
122 | | // Storage ownership of children handled elsewhere. Don't delete here. |
123 | 0 | break; |
124 | | |
125 | 0 | default: |
126 | 0 | delete fLeftChild; |
127 | 0 | fLeftChild = NULL; |
128 | 0 | delete fRightChild; |
129 | 0 | fRightChild = NULL; |
130 | 0 | } |
131 | | |
132 | | |
133 | 0 | delete fFirstPosSet; |
134 | 0 | delete fLastPosSet; |
135 | 0 | delete fFollowPos; |
136 | |
|
137 | 0 | } |
138 | | |
139 | | |
140 | | //------------------------------------------------------------------------- |
141 | | // |
142 | | // cloneTree Make a copy of the subtree rooted at this node. |
143 | | // Discard any variable references encountered along the way, |
144 | | // and replace with copies of the variable's definitions. |
145 | | // Used to replicate the expression underneath variable |
146 | | // references in preparation for generating the DFA tables. |
147 | | // |
148 | | //------------------------------------------------------------------------- |
149 | 0 | RBBINode *RBBINode::cloneTree() { |
150 | 0 | RBBINode *n; |
151 | |
|
152 | 0 | if (fType == RBBINode::varRef) { |
153 | | // If the current node is a variable reference, skip over it |
154 | | // and clone the definition of the variable instead. |
155 | 0 | n = fLeftChild->cloneTree(); |
156 | 0 | } else if (fType == RBBINode::uset) { |
157 | 0 | n = this; |
158 | 0 | } else { |
159 | 0 | n = new RBBINode(*this); |
160 | | // Check for null pointer. |
161 | 0 | if (n != NULL) { |
162 | 0 | if (fLeftChild != NULL) { |
163 | 0 | n->fLeftChild = fLeftChild->cloneTree(); |
164 | 0 | n->fLeftChild->fParent = n; |
165 | 0 | } |
166 | 0 | if (fRightChild != NULL) { |
167 | 0 | n->fRightChild = fRightChild->cloneTree(); |
168 | 0 | n->fRightChild->fParent = n; |
169 | 0 | } |
170 | 0 | } |
171 | 0 | } |
172 | 0 | return n; |
173 | 0 | } |
174 | | |
175 | | |
176 | | |
177 | | //------------------------------------------------------------------------- |
178 | | // |
179 | | // flattenVariables Walk a parse tree, replacing any variable |
180 | | // references with a copy of the variable's definition. |
181 | | // Aside from variables, the tree is not changed. |
182 | | // |
183 | | // Return the root of the tree. If the root was not a variable |
184 | | // reference, it remains unchanged - the root we started with |
185 | | // is the root we return. If, however, the root was a variable |
186 | | // reference, the root of the newly cloned replacement tree will |
187 | | // be returned, and the original tree deleted. |
188 | | // |
189 | | // This function works by recursively walking the tree |
190 | | // without doing anything until a variable reference is |
191 | | // found, then calling cloneTree() at that point. Any |
192 | | // nested references are handled by cloneTree(), not here. |
193 | | // |
194 | | //------------------------------------------------------------------------- |
195 | 0 | RBBINode *RBBINode::flattenVariables() { |
196 | 0 | if (fType == varRef) { |
197 | 0 | RBBINode *retNode = fLeftChild->cloneTree(); |
198 | 0 | if (retNode != NULL) { |
199 | 0 | retNode->fRuleRoot = this->fRuleRoot; |
200 | 0 | retNode->fChainIn = this->fChainIn; |
201 | 0 | } |
202 | 0 | delete this; // TODO: undefined behavior. Fix. |
203 | 0 | return retNode; |
204 | 0 | } |
205 | | |
206 | 0 | if (fLeftChild != NULL) { |
207 | 0 | fLeftChild = fLeftChild->flattenVariables(); |
208 | 0 | fLeftChild->fParent = this; |
209 | 0 | } |
210 | 0 | if (fRightChild != NULL) { |
211 | 0 | fRightChild = fRightChild->flattenVariables(); |
212 | 0 | fRightChild->fParent = this; |
213 | 0 | } |
214 | 0 | return this; |
215 | 0 | } |
216 | | |
217 | | |
218 | | //------------------------------------------------------------------------- |
219 | | // |
220 | | // flattenSets Walk the parse tree, replacing any nodes of type setRef |
221 | | // with a copy of the expression tree for the set. A set's |
222 | | // equivalent expression tree is precomputed and saved as |
223 | | // the left child of the uset node. |
224 | | // |
225 | | //------------------------------------------------------------------------- |
226 | 0 | void RBBINode::flattenSets() { |
227 | 0 | U_ASSERT(fType != setRef); |
228 | |
|
229 | 0 | if (fLeftChild != NULL) { |
230 | 0 | if (fLeftChild->fType==setRef) { |
231 | 0 | RBBINode *setRefNode = fLeftChild; |
232 | 0 | RBBINode *usetNode = setRefNode->fLeftChild; |
233 | 0 | RBBINode *replTree = usetNode->fLeftChild; |
234 | 0 | fLeftChild = replTree->cloneTree(); |
235 | 0 | fLeftChild->fParent = this; |
236 | 0 | delete setRefNode; |
237 | 0 | } else { |
238 | 0 | fLeftChild->flattenSets(); |
239 | 0 | } |
240 | 0 | } |
241 | |
|
242 | 0 | if (fRightChild != NULL) { |
243 | 0 | if (fRightChild->fType==setRef) { |
244 | 0 | RBBINode *setRefNode = fRightChild; |
245 | 0 | RBBINode *usetNode = setRefNode->fLeftChild; |
246 | 0 | RBBINode *replTree = usetNode->fLeftChild; |
247 | 0 | fRightChild = replTree->cloneTree(); |
248 | 0 | fRightChild->fParent = this; |
249 | 0 | delete setRefNode; |
250 | 0 | } else { |
251 | 0 | fRightChild->flattenSets(); |
252 | 0 | } |
253 | 0 | } |
254 | 0 | } |
255 | | |
256 | | |
257 | | |
258 | | //------------------------------------------------------------------------- |
259 | | // |
260 | | // findNodes() Locate all the nodes of the specified type, starting |
261 | | // at the specified root. |
262 | | // |
263 | | //------------------------------------------------------------------------- |
264 | 0 | void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { |
265 | | /* test for buffer overflows */ |
266 | 0 | if (U_FAILURE(status)) { |
267 | 0 | return; |
268 | 0 | } |
269 | 0 | if (fType == kind) { |
270 | 0 | dest->addElementX(this, status); |
271 | 0 | } |
272 | 0 | if (fLeftChild != NULL) { |
273 | 0 | fLeftChild->findNodes(dest, kind, status); |
274 | 0 | } |
275 | 0 | if (fRightChild != NULL) { |
276 | 0 | fRightChild->findNodes(dest, kind, status); |
277 | 0 | } |
278 | 0 | } |
279 | | |
280 | | |
281 | | //------------------------------------------------------------------------- |
282 | | // |
283 | | // print. Print out a single node, for debugging. |
284 | | // |
285 | | //------------------------------------------------------------------------- |
286 | | #ifdef RBBI_DEBUG |
287 | | |
288 | | static int32_t serial(const RBBINode *node) { |
289 | | return (node == NULL? -1 : node->fSerialNum); |
290 | | } |
291 | | |
292 | | |
293 | | void RBBINode::printNode(const RBBINode *node) { |
294 | | static const char * const nodeTypeNames[] = { |
295 | | "setRef", |
296 | | "uset", |
297 | | "varRef", |
298 | | "leafChar", |
299 | | "lookAhead", |
300 | | "tag", |
301 | | "endMark", |
302 | | "opStart", |
303 | | "opCat", |
304 | | "opOr", |
305 | | "opStar", |
306 | | "opPlus", |
307 | | "opQuestion", |
308 | | "opBreak", |
309 | | "opReverse", |
310 | | "opLParen" |
311 | | }; |
312 | | |
313 | | if (node==NULL) { |
314 | | RBBIDebugPrintf("%10p", (void *)node); |
315 | | } else { |
316 | | RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ", |
317 | | (void *)node, node->fSerialNum, nodeTypeNames[node->fType], |
318 | | node->fRuleRoot?'R':' ', node->fChainIn?'C':' ', |
319 | | serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent), |
320 | | node->fFirstPos, node->fVal); |
321 | | if (node->fType == varRef) { |
322 | | RBBI_DEBUG_printUnicodeString(node->fText); |
323 | | } |
324 | | } |
325 | | RBBIDebugPrintf("\n"); |
326 | | } |
327 | | #endif |
328 | | |
329 | | |
330 | | #ifdef RBBI_DEBUG |
331 | | U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) { |
332 | | RBBIDebugPrintf("%*s", minWidth, CStr(s)()); |
333 | | } |
334 | | #endif |
335 | | |
336 | | |
337 | | //------------------------------------------------------------------------- |
338 | | // |
339 | | // print. Print out the tree of nodes rooted at "this" |
340 | | // |
341 | | //------------------------------------------------------------------------- |
342 | | #ifdef RBBI_DEBUG |
343 | | void RBBINode::printNodeHeader() { |
344 | | RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n"); |
345 | | } |
346 | | |
347 | | void RBBINode::printTree(const RBBINode *node, UBool printHeading) { |
348 | | if (printHeading) { |
349 | | printNodeHeader(); |
350 | | } |
351 | | printNode(node); |
352 | | if (node != NULL) { |
353 | | // Only dump the definition under a variable reference if asked to. |
354 | | // Unconditionally dump children of all other node types. |
355 | | if (node->fType != varRef) { |
356 | | if (node->fLeftChild != NULL) { |
357 | | printTree(node->fLeftChild, FALSE); |
358 | | } |
359 | | |
360 | | if (node->fRightChild != NULL) { |
361 | | printTree(node->fRightChild, FALSE); |
362 | | } |
363 | | } |
364 | | } |
365 | | } |
366 | | #endif |
367 | | |
368 | | |
369 | | |
370 | | U_NAMESPACE_END |
371 | | |
372 | | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |