/src/icu/source/common/rbbitblb.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | **********************************************************************  | 
5  |  | *   Copyright (c) 2002-2016, International Business Machines  | 
6  |  | *   Corporation and others.  All Rights Reserved.  | 
7  |  | **********************************************************************  | 
8  |  | */  | 
9  |  | //  | 
10  |  | //  rbbitblb.cpp  | 
11  |  | //  | 
12  |  |  | 
13  |  |  | 
14  |  | #include "unicode/utypes.h"  | 
15  |  |  | 
16  |  | #if !UCONFIG_NO_BREAK_ITERATION  | 
17  |  |  | 
18  |  | #include "unicode/unistr.h"  | 
19  |  | #include "rbbitblb.h"  | 
20  |  | #include "rbbirb.h"  | 
21  |  | #include "rbbiscan.h"  | 
22  |  | #include "rbbisetb.h"  | 
23  |  | #include "rbbidata.h"  | 
24  |  | #include "cstring.h"  | 
25  |  | #include "uassert.h"  | 
26  |  | #include "uvectr32.h"  | 
27  |  | #include "cmemory.h"  | 
28  |  |  | 
29  |  | U_NAMESPACE_BEGIN  | 
30  |  |  | 
31  |  | const int32_t kMaxStateFor8BitsTable = 255;  | 
32  |  |  | 
33  |  | RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :  | 
34  | 0  |         fRB(rb),  | 
35  | 0  |         fTree(*rootNode),  | 
36  | 0  |         fStatus(&status),  | 
37  | 0  |         fDStates(nullptr),  | 
38  | 0  |         fSafeTable(nullptr) { | 
39  | 0  |     if (U_FAILURE(status)) { | 
40  | 0  |         return;  | 
41  | 0  |     }  | 
42  |  |     // fDStates is UVector<RBBIStateDescriptor *>  | 
43  | 0  |     fDStates = new UVector(status);  | 
44  | 0  |     if (U_SUCCESS(status) && fDStates == nullptr ) { | 
45  | 0  |         status = U_MEMORY_ALLOCATION_ERROR;  | 
46  | 0  |     }  | 
47  | 0  | }  | 
48  |  |  | 
49  |  |  | 
50  |  |  | 
51  | 0  | RBBITableBuilder::~RBBITableBuilder() { | 
52  | 0  |     int i;  | 
53  | 0  |     for (i=0; i<fDStates->size(); i++) { | 
54  | 0  |         delete (RBBIStateDescriptor *)fDStates->elementAt(i);  | 
55  | 0  |     }  | 
56  | 0  |     delete fDStates;  | 
57  | 0  |     delete fSafeTable;  | 
58  | 0  |     delete fLookAheadRuleMap;  | 
59  | 0  | }  | 
60  |  |  | 
61  |  |  | 
62  |  | //-----------------------------------------------------------------------------  | 
63  |  | //  | 
64  |  | //   RBBITableBuilder::buildForwardTable  -  This is the main function for building  | 
65  |  | //                               the DFA state transition table from the RBBI rules parse tree.  | 
66  |  | //  | 
67  |  | //-----------------------------------------------------------------------------  | 
68  | 0  | void  RBBITableBuilder::buildForwardTable() { | 
69  |  | 
  | 
70  | 0  |     if (U_FAILURE(*fStatus)) { | 
71  | 0  |         return;  | 
72  | 0  |     }  | 
73  |  |  | 
74  |  |     // If there were no rules, just return.  This situation can easily arise  | 
75  |  |     //   for the reverse rules.  | 
76  | 0  |     if (fTree==NULL) { | 
77  | 0  |         return;  | 
78  | 0  |     }  | 
79  |  |  | 
80  |  |     //  | 
81  |  |     // Walk through the tree, replacing any references to $variables with a copy of the  | 
82  |  |     //   parse tree for the substitution expression.  | 
83  |  |     //  | 
84  | 0  |     fTree = fTree->flattenVariables();  | 
85  |  | #ifdef RBBI_DEBUG  | 
86  |  |     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) { | 
87  |  |         RBBIDebugPuts("\nParse tree after flattening variable references."); | 
88  |  |         RBBINode::printTree(fTree, TRUE);  | 
89  |  |     }  | 
90  |  | #endif  | 
91  |  |  | 
92  |  |     //  | 
93  |  |     // If the rules contained any references to {bof}  | 
94  |  |     //   add a {bof} <cat> <former root of tree> to the | 
95  |  |     //   tree.  Means that all matches must start out with the   | 
96  |  |     //   {bof} fake character. | 
97  |  |     //   | 
98  | 0  |     if (fRB->fSetBuilder->sawBOF()) { | 
99  | 0  |         RBBINode *bofTop    = new RBBINode(RBBINode::opCat);  | 
100  | 0  |         RBBINode *bofLeaf   = new RBBINode(RBBINode::leafChar);  | 
101  |  |         // Delete and exit if memory allocation failed.  | 
102  | 0  |         if (bofTop == NULL || bofLeaf == NULL) { | 
103  | 0  |             *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
104  | 0  |             delete bofTop;  | 
105  | 0  |             delete bofLeaf;  | 
106  | 0  |             return;  | 
107  | 0  |         }  | 
108  | 0  |         bofTop->fLeftChild  = bofLeaf;  | 
109  | 0  |         bofTop->fRightChild = fTree;  | 
110  | 0  |         bofLeaf->fParent    = bofTop;  | 
111  | 0  |         bofLeaf->fVal       = 2;      // Reserved value for {bof}. | 
112  | 0  |         fTree               = bofTop;  | 
113  | 0  |     }  | 
114  |  |  | 
115  |  |     //  | 
116  |  |     // Add a unique right-end marker to the expression.  | 
117  |  |     //   Appears as a cat-node, left child being the original tree,  | 
118  |  |     //   right child being the end marker.  | 
119  |  |     //  | 
120  | 0  |     RBBINode *cn = new RBBINode(RBBINode::opCat);  | 
121  |  |     // Exit if memory allocation failed.  | 
122  | 0  |     if (cn == NULL) { | 
123  | 0  |         *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
124  | 0  |         return;  | 
125  | 0  |     }  | 
126  | 0  |     cn->fLeftChild = fTree;  | 
127  | 0  |     fTree->fParent = cn;  | 
128  | 0  |     RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark);  | 
129  |  |     // Delete and exit if memory allocation failed.  | 
130  | 0  |     if (cn->fRightChild == NULL) { | 
131  | 0  |         *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
132  | 0  |         delete cn;  | 
133  | 0  |         return;  | 
134  | 0  |     }  | 
135  | 0  |     cn->fRightChild->fParent = cn;  | 
136  | 0  |     fTree = cn;  | 
137  |  |  | 
138  |  |     //  | 
139  |  |     //  Replace all references to UnicodeSets with the tree for the equivalent  | 
140  |  |     //      expression.  | 
141  |  |     //  | 
142  | 0  |     fTree->flattenSets();  | 
143  |  | #ifdef RBBI_DEBUG  | 
144  |  |     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) { | 
145  |  |         RBBIDebugPuts("\nParse tree after flattening Unicode Set references."); | 
146  |  |         RBBINode::printTree(fTree, TRUE);  | 
147  |  |     }  | 
148  |  | #endif  | 
149  |  |  | 
150  |  |  | 
151  |  |     //  | 
152  |  |     // calculate the functions nullable, firstpos, lastpos and followpos on  | 
153  |  |     // nodes in the parse tree.  | 
154  |  |     //    See the algorithm description in Aho.  | 
155  |  |     //    Understanding how this works by looking at the code alone will be  | 
156  |  |     //       nearly impossible.  | 
157  |  |     //  | 
158  | 0  |     calcNullable(fTree);  | 
159  | 0  |     calcFirstPos(fTree);  | 
160  | 0  |     calcLastPos(fTree);  | 
161  | 0  |     calcFollowPos(fTree);  | 
162  | 0  |     if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) { | 
163  | 0  |         RBBIDebugPuts("\n"); | 
164  | 0  |         printPosSets(fTree);  | 
165  | 0  |     }  | 
166  |  |  | 
167  |  |     //  | 
168  |  |     //  For "chained" rules, modify the followPos sets  | 
169  |  |     //  | 
170  | 0  |     if (fRB->fChainRules) { | 
171  | 0  |         calcChainedFollowPos(fTree, endMarkerNode);  | 
172  | 0  |     }  | 
173  |  |  | 
174  |  |     //  | 
175  |  |     //  BOF (start of input) test fixup.  | 
176  |  |     //  | 
177  | 0  |     if (fRB->fSetBuilder->sawBOF()) { | 
178  | 0  |         bofFixup();  | 
179  | 0  |     }  | 
180  |  |  | 
181  |  |     //  | 
182  |  |     // Build the DFA state transition tables.  | 
183  |  |     //  | 
184  | 0  |     buildStateTable();  | 
185  | 0  |     mapLookAheadRules();  | 
186  | 0  |     flagAcceptingStates();  | 
187  | 0  |     flagLookAheadStates();  | 
188  | 0  |     flagTaggedStates();  | 
189  |  |  | 
190  |  |     //  | 
191  |  |     // Update the global table of rule status {tag} values | 
192  |  |     // The rule builder has a global vector of status values that are common  | 
193  |  |     //    for all tables.  Merge the ones from this table into the global set.  | 
194  |  |     //  | 
195  | 0  |     mergeRuleStatusVals();  | 
196  | 0  | }  | 
197  |  |  | 
198  |  |  | 
199  |  |  | 
200  |  | //-----------------------------------------------------------------------------  | 
201  |  | //  | 
202  |  | //   calcNullable.    Impossible to explain succinctly.  See Aho, section 3.9  | 
203  |  | //  | 
204  |  | //-----------------------------------------------------------------------------  | 
205  | 0  | void RBBITableBuilder::calcNullable(RBBINode *n) { | 
206  | 0  |     if (n == NULL) { | 
207  | 0  |         return;  | 
208  | 0  |     }  | 
209  | 0  |     if (n->fType == RBBINode::setRef ||  | 
210  | 0  |         n->fType == RBBINode::endMark ) { | 
211  |  |         // These are non-empty leaf node types.  | 
212  | 0  |         n->fNullable = FALSE;  | 
213  | 0  |         return;  | 
214  | 0  |     }  | 
215  |  |  | 
216  | 0  |     if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) { | 
217  |  |         // Lookahead marker node.  It's a leaf, so no recursion on children.  | 
218  |  |         // It's nullable because it does not match any literal text from the input stream.  | 
219  | 0  |         n->fNullable = TRUE;  | 
220  | 0  |         return;  | 
221  | 0  |     }  | 
222  |  |  | 
223  |  |  | 
224  |  |     // The node is not a leaf.  | 
225  |  |     //  Calculate nullable on its children.  | 
226  | 0  |     calcNullable(n->fLeftChild);  | 
227  | 0  |     calcNullable(n->fRightChild);  | 
228  |  |  | 
229  |  |     // Apply functions from table 3.40 in Aho  | 
230  | 0  |     if (n->fType == RBBINode::opOr) { | 
231  | 0  |         n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable;  | 
232  | 0  |     }  | 
233  | 0  |     else if (n->fType == RBBINode::opCat) { | 
234  | 0  |         n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable;  | 
235  | 0  |     }  | 
236  | 0  |     else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) { | 
237  | 0  |         n->fNullable = TRUE;  | 
238  | 0  |     }  | 
239  | 0  |     else { | 
240  | 0  |         n->fNullable = FALSE;  | 
241  | 0  |     }  | 
242  | 0  | }  | 
243  |  |  | 
244  |  |  | 
245  |  |  | 
246  |  |  | 
247  |  | //-----------------------------------------------------------------------------  | 
248  |  | //  | 
249  |  | //   calcFirstPos.    Impossible to explain succinctly.  See Aho, section 3.9  | 
250  |  | //  | 
251  |  | //-----------------------------------------------------------------------------  | 
252  | 0  | void RBBITableBuilder::calcFirstPos(RBBINode *n) { | 
253  | 0  |     if (n == NULL) { | 
254  | 0  |         return;  | 
255  | 0  |     }  | 
256  | 0  |     if (n->fType == RBBINode::leafChar  ||  | 
257  | 0  |         n->fType == RBBINode::endMark   ||  | 
258  | 0  |         n->fType == RBBINode::lookAhead ||  | 
259  | 0  |         n->fType == RBBINode::tag) { | 
260  |  |         // These are non-empty leaf node types.  | 
261  |  |         // Note: In order to maintain the sort invariant on the set,  | 
262  |  |         // this function should only be called on a node whose set is  | 
263  |  |         // empty to start with.  | 
264  | 0  |         n->fFirstPosSet->addElementX(n, *fStatus);  | 
265  | 0  |         return;  | 
266  | 0  |     }  | 
267  |  |  | 
268  |  |     // The node is not a leaf.  | 
269  |  |     //  Calculate firstPos on its children.  | 
270  | 0  |     calcFirstPos(n->fLeftChild);  | 
271  | 0  |     calcFirstPos(n->fRightChild);  | 
272  |  |  | 
273  |  |     // Apply functions from table 3.40 in Aho  | 
274  | 0  |     if (n->fType == RBBINode::opOr) { | 
275  | 0  |         setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);  | 
276  | 0  |         setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);  | 
277  | 0  |     }  | 
278  | 0  |     else if (n->fType == RBBINode::opCat) { | 
279  | 0  |         setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);  | 
280  | 0  |         if (n->fLeftChild->fNullable) { | 
281  | 0  |             setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);  | 
282  | 0  |         }  | 
283  | 0  |     }  | 
284  | 0  |     else if (n->fType == RBBINode::opStar ||  | 
285  | 0  |              n->fType == RBBINode::opQuestion ||  | 
286  | 0  |              n->fType == RBBINode::opPlus) { | 
287  | 0  |         setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);  | 
288  | 0  |     }  | 
289  | 0  | }  | 
290  |  |  | 
291  |  |  | 
292  |  |  | 
293  |  | //-----------------------------------------------------------------------------  | 
294  |  | //  | 
295  |  | //   calcLastPos.    Impossible to explain succinctly.  See Aho, section 3.9  | 
296  |  | //  | 
297  |  | //-----------------------------------------------------------------------------  | 
298  | 0  | void RBBITableBuilder::calcLastPos(RBBINode *n) { | 
299  | 0  |     if (n == NULL) { | 
300  | 0  |         return;  | 
301  | 0  |     }  | 
302  | 0  |     if (n->fType == RBBINode::leafChar  ||  | 
303  | 0  |         n->fType == RBBINode::endMark   ||  | 
304  | 0  |         n->fType == RBBINode::lookAhead ||  | 
305  | 0  |         n->fType == RBBINode::tag) { | 
306  |  |         // These are non-empty leaf node types.  | 
307  |  |         // Note: In order to maintain the sort invariant on the set,  | 
308  |  |         // this function should only be called on a node whose set is  | 
309  |  |         // empty to start with.  | 
310  | 0  |         n->fLastPosSet->addElementX(n, *fStatus);  | 
311  | 0  |         return;  | 
312  | 0  |     }  | 
313  |  |  | 
314  |  |     // The node is not a leaf.  | 
315  |  |     //  Calculate lastPos on its children.  | 
316  | 0  |     calcLastPos(n->fLeftChild);  | 
317  | 0  |     calcLastPos(n->fRightChild);  | 
318  |  |  | 
319  |  |     // Apply functions from table 3.40 in Aho  | 
320  | 0  |     if (n->fType == RBBINode::opOr) { | 
321  | 0  |         setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);  | 
322  | 0  |         setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);  | 
323  | 0  |     }  | 
324  | 0  |     else if (n->fType == RBBINode::opCat) { | 
325  | 0  |         setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);  | 
326  | 0  |         if (n->fRightChild->fNullable) { | 
327  | 0  |             setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);  | 
328  | 0  |         }  | 
329  | 0  |     }  | 
330  | 0  |     else if (n->fType == RBBINode::opStar     ||  | 
331  | 0  |              n->fType == RBBINode::opQuestion ||  | 
332  | 0  |              n->fType == RBBINode::opPlus) { | 
333  | 0  |         setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);  | 
334  | 0  |     }  | 
335  | 0  | }  | 
336  |  |  | 
337  |  |  | 
338  |  |  | 
339  |  | //-----------------------------------------------------------------------------  | 
340  |  | //  | 
341  |  | //   calcFollowPos.    Impossible to explain succinctly.  See Aho, section 3.9  | 
342  |  | //  | 
343  |  | //-----------------------------------------------------------------------------  | 
344  | 0  | void RBBITableBuilder::calcFollowPos(RBBINode *n) { | 
345  | 0  |     if (n == NULL ||  | 
346  | 0  |         n->fType == RBBINode::leafChar ||  | 
347  | 0  |         n->fType == RBBINode::endMark) { | 
348  | 0  |         return;  | 
349  | 0  |     }  | 
350  |  |  | 
351  | 0  |     calcFollowPos(n->fLeftChild);  | 
352  | 0  |     calcFollowPos(n->fRightChild);  | 
353  |  |  | 
354  |  |     // Aho rule #1  | 
355  | 0  |     if (n->fType == RBBINode::opCat) { | 
356  | 0  |         RBBINode *i;   // is 'i' in Aho's description  | 
357  | 0  |         uint32_t     ix;  | 
358  |  | 
  | 
359  | 0  |         UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet;  | 
360  |  | 
  | 
361  | 0  |         for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) { | 
362  | 0  |             i = (RBBINode *)LastPosOfLeftChild->elementAt(ix);  | 
363  | 0  |             setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet);  | 
364  | 0  |         }  | 
365  | 0  |     }  | 
366  |  |  | 
367  |  |     // Aho rule #2  | 
368  | 0  |     if (n->fType == RBBINode::opStar ||  | 
369  | 0  |         n->fType == RBBINode::opPlus) { | 
370  | 0  |         RBBINode   *i;  // again, n and i are the names from Aho's description.  | 
371  | 0  |         uint32_t    ix;  | 
372  |  | 
  | 
373  | 0  |         for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) { | 
374  | 0  |             i = (RBBINode *)n->fLastPosSet->elementAt(ix);  | 
375  | 0  |             setAdd(i->fFollowPos, n->fFirstPosSet);  | 
376  | 0  |         }  | 
377  | 0  |     }  | 
378  |  |  | 
379  |  |  | 
380  |  | 
  | 
381  | 0  | }  | 
382  |  |  | 
383  |  | //-----------------------------------------------------------------------------  | 
384  |  | //  | 
385  |  | //    addRuleRootNodes    Recursively walk a parse tree, adding all nodes flagged  | 
386  |  | //                        as roots of a rule to a destination vector.  | 
387  |  | //  | 
388  |  | //-----------------------------------------------------------------------------  | 
389  | 0  | void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) { | 
390  | 0  |     if (node == NULL || U_FAILURE(*fStatus)) { | 
391  | 0  |         return;  | 
392  | 0  |     }  | 
393  | 0  |     if (node->fRuleRoot) { | 
394  | 0  |         dest->addElementX(node, *fStatus);  | 
395  |  |         // Note: rules cannot nest. If we found a rule start node,  | 
396  |  |         //       no child node can also be a start node.  | 
397  | 0  |         return;  | 
398  | 0  |     }  | 
399  | 0  |     addRuleRootNodes(dest, node->fLeftChild);  | 
400  | 0  |     addRuleRootNodes(dest, node->fRightChild);  | 
401  | 0  | }  | 
402  |  |  | 
403  |  | //-----------------------------------------------------------------------------  | 
404  |  | //  | 
405  |  | //   calcChainedFollowPos.    Modify the previously calculated followPos sets  | 
406  |  | //                            to implement rule chaining.  NOT described by Aho  | 
407  |  | //  | 
408  |  | //-----------------------------------------------------------------------------  | 
409  | 0  | void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNode) { | 
410  |  | 
  | 
411  | 0  |     UVector         leafNodes(*fStatus);  | 
412  | 0  |     if (U_FAILURE(*fStatus)) { | 
413  | 0  |         return;  | 
414  | 0  |     }  | 
415  |  |  | 
416  |  |     // get a list all leaf nodes  | 
417  | 0  |     tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus);  | 
418  | 0  |     if (U_FAILURE(*fStatus)) { | 
419  | 0  |         return;  | 
420  | 0  |     }  | 
421  |  |  | 
422  |  |     // Collect all leaf nodes that can start matches for rules  | 
423  |  |     // with inbound chaining enabled, which is the union of the   | 
424  |  |     // firstPosition sets from each of the rule root nodes.  | 
425  |  |       | 
426  | 0  |     UVector ruleRootNodes(*fStatus);  | 
427  | 0  |     addRuleRootNodes(&ruleRootNodes, tree);  | 
428  |  | 
  | 
429  | 0  |     UVector matchStartNodes(*fStatus);  | 
430  | 0  |     for (int j=0; j<ruleRootNodes.size(); ++j) { | 
431  | 0  |         RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));  | 
432  | 0  |         if (node->fChainIn) { | 
433  | 0  |             setAdd(&matchStartNodes, node->fFirstPosSet);  | 
434  | 0  |         }  | 
435  | 0  |     }  | 
436  | 0  |     if (U_FAILURE(*fStatus)) { | 
437  | 0  |         return;  | 
438  | 0  |     }  | 
439  |  |  | 
440  | 0  |     int32_t  endNodeIx;  | 
441  | 0  |     int32_t  startNodeIx;  | 
442  |  | 
  | 
443  | 0  |     for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) { | 
444  | 0  |         RBBINode *endNode   = (RBBINode *)leafNodes.elementAt(endNodeIx);  | 
445  |  |  | 
446  |  |         // Identify leaf nodes that correspond to overall rule match positions.  | 
447  |  |         // These include the endMarkNode in their followPos sets.  | 
448  |  |         //  | 
449  |  |         // Note: do not consider other end marker nodes, those that are added to  | 
450  |  |         //       look-ahead rules. These can't chain; a match immediately stops  | 
451  |  |         //       further matching. This leaves exactly one end marker node, the one  | 
452  |  |         //       at the end of the complete tree.  | 
453  |  | 
  | 
454  | 0  |         if (!endNode->fFollowPos->contains(endMarkNode)) { | 
455  | 0  |             continue;  | 
456  | 0  |         }  | 
457  |  |  | 
458  |  |         // We've got a node that can end a match.  | 
459  |  |  | 
460  |  |         // !!LBCMNoChain implementation:  If this node's val correspond to  | 
461  |  |         // the Line Break $CM char class, don't chain from it.  | 
462  |  |         // TODO:  Remove this. !!LBCMNoChain is deprecated, and is not used  | 
463  |  |         //        by any of the standard ICU rules.  | 
464  | 0  |         if (fRB->fLBCMNoChain) { | 
465  | 0  |             UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal);  | 
466  | 0  |             if (c != -1) { | 
467  |  |                 // c == -1 occurs with sets containing only the {eof} marker string. | 
468  | 0  |                 ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK);  | 
469  | 0  |                 if (cLBProp == U_LB_COMBINING_MARK) { | 
470  | 0  |                     continue;  | 
471  | 0  |                 }  | 
472  | 0  |             }  | 
473  | 0  |         }  | 
474  |  |  | 
475  |  |         // Now iterate over the nodes that can start a match, looking for ones  | 
476  |  |         //   with the same char class as our ending node.  | 
477  | 0  |         RBBINode *startNode;  | 
478  | 0  |         for (startNodeIx = 0; startNodeIx<matchStartNodes.size(); startNodeIx++) { | 
479  | 0  |             startNode = (RBBINode *)matchStartNodes.elementAt(startNodeIx);  | 
480  | 0  |             if (startNode->fType != RBBINode::leafChar) { | 
481  | 0  |                 continue;  | 
482  | 0  |             }  | 
483  |  |  | 
484  | 0  |             if (endNode->fVal == startNode->fVal) { | 
485  |  |                 // The end val (character class) of one possible match is the  | 
486  |  |                 //   same as the start of another.  | 
487  |  |  | 
488  |  |                 // Add all nodes from the followPos of the start node to the  | 
489  |  |                 //  followPos set of the end node, which will have the effect of  | 
490  |  |                 //  letting matches transition from a match state at endNode  | 
491  |  |                 //  to the second char of a match starting with startNode.  | 
492  | 0  |                 setAdd(endNode->fFollowPos, startNode->fFollowPos);  | 
493  | 0  |             }  | 
494  | 0  |         }  | 
495  | 0  |     }  | 
496  | 0  | }  | 
497  |  |  | 
498  |  |  | 
499  |  | //-----------------------------------------------------------------------------  | 
500  |  | //  | 
501  |  | //   bofFixup.    Fixup for state tables that include {bof} beginning of input testing. | 
502  |  | //                Do an swizzle similar to chaining, modifying the followPos set of  | 
503  |  | //                the bofNode to include the followPos nodes from other {bot} nodes | 
504  |  | //                scattered through the tree.  | 
505  |  | //  | 
506  |  | //                This function has much in common with calcChainedFollowPos().  | 
507  |  | //  | 
508  |  | //-----------------------------------------------------------------------------  | 
509  | 0  | void RBBITableBuilder::bofFixup() { | 
510  |  | 
  | 
511  | 0  |     if (U_FAILURE(*fStatus)) { | 
512  | 0  |         return;  | 
513  | 0  |     }  | 
514  |  |  | 
515  |  |     //   The parse tree looks like this ...  | 
516  |  |     //         fTree root  --->       <cat>  | 
517  |  |     //                               /     \       .  | 
518  |  |     //                            <cat>   <#end node>  | 
519  |  |     //                           /     \  .  | 
520  |  |     //                     <bofNode>   rest  | 
521  |  |     //                               of tree  | 
522  |  |     //  | 
523  |  |     //    We will be adding things to the followPos set of the <bofNode>  | 
524  |  |     //  | 
525  | 0  |     RBBINode  *bofNode = fTree->fLeftChild->fLeftChild;  | 
526  | 0  |     U_ASSERT(bofNode->fType == RBBINode::leafChar);  | 
527  | 0  |     U_ASSERT(bofNode->fVal == 2);  | 
528  |  |  | 
529  |  |     // Get all nodes that can be the start a match of the user-written rules  | 
530  |  |     //  (excluding the fake bofNode)  | 
531  |  |     //  We want the nodes that can start a match in the  | 
532  |  |     //     part labeled "rest of tree"  | 
533  |  |     //   | 
534  | 0  |     UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet;  | 
535  |  | 
  | 
536  | 0  |     RBBINode *startNode;  | 
537  | 0  |     int       startNodeIx;  | 
538  | 0  |     for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) { | 
539  | 0  |         startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx);  | 
540  | 0  |         if (startNode->fType != RBBINode::leafChar) { | 
541  | 0  |             continue;  | 
542  | 0  |         }  | 
543  |  |  | 
544  | 0  |         if (startNode->fVal == bofNode->fVal) { | 
545  |  |             //  We found a leaf node corresponding to a {bof} that was | 
546  |  |             //    explicitly written into a rule.  | 
547  |  |             //  Add everything from the followPos set of this node to the  | 
548  |  |             //    followPos set of the fake bofNode at the start of the tree.  | 
549  |  |             //    | 
550  | 0  |             setAdd(bofNode->fFollowPos, startNode->fFollowPos);  | 
551  | 0  |         }  | 
552  | 0  |     }  | 
553  | 0  | }  | 
554  |  |  | 
555  |  | //-----------------------------------------------------------------------------  | 
556  |  | //  | 
557  |  | //   buildStateTable()    Determine the set of runtime DFA states and the  | 
558  |  | //                        transition tables for these states, by the algorithm  | 
559  |  | //                        of fig. 3.44 in Aho.  | 
560  |  | //  | 
561  |  | //                        Most of the comments are quotes of Aho's psuedo-code.  | 
562  |  | //  | 
563  |  | //-----------------------------------------------------------------------------  | 
564  | 0  | void RBBITableBuilder::buildStateTable() { | 
565  | 0  |     if (U_FAILURE(*fStatus)) { | 
566  | 0  |         return;  | 
567  | 0  |     }  | 
568  | 0  |     RBBIStateDescriptor *failState;  | 
569  |  |     // Set it to NULL to avoid uninitialized warning  | 
570  | 0  |     RBBIStateDescriptor *initialState = NULL;   | 
571  |  |     //  | 
572  |  |     // Add a dummy state 0 - the stop state.  Not from Aho.  | 
573  | 0  |     int      lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1;  | 
574  | 0  |     failState = new RBBIStateDescriptor(lastInputSymbol, fStatus);  | 
575  | 0  |     if (failState == NULL) { | 
576  | 0  |         *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
577  | 0  |         goto ExitBuildSTdeleteall;  | 
578  | 0  |     }  | 
579  | 0  |     failState->fPositions = new UVector(*fStatus);  | 
580  | 0  |     if (failState->fPositions == NULL) { | 
581  | 0  |         *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
582  | 0  |     }  | 
583  | 0  |     if (failState->fPositions == NULL || U_FAILURE(*fStatus)) { | 
584  | 0  |         goto ExitBuildSTdeleteall;  | 
585  | 0  |     }  | 
586  | 0  |     fDStates->addElementX(failState, *fStatus);  | 
587  | 0  |     if (U_FAILURE(*fStatus)) { | 
588  | 0  |         goto ExitBuildSTdeleteall;  | 
589  | 0  |     }  | 
590  |  |  | 
591  |  |     // initially, the only unmarked state in Dstates is firstpos(root),  | 
592  |  |     //       where toot is the root of the syntax tree for (r)#;  | 
593  | 0  |     initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus);  | 
594  | 0  |     if (initialState == NULL) { | 
595  | 0  |         *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
596  | 0  |     }  | 
597  | 0  |     if (U_FAILURE(*fStatus)) { | 
598  | 0  |         goto ExitBuildSTdeleteall;  | 
599  | 0  |     }  | 
600  | 0  |     initialState->fPositions = new UVector(*fStatus);  | 
601  | 0  |     if (initialState->fPositions == NULL) { | 
602  | 0  |         *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
603  | 0  |     }  | 
604  | 0  |     if (U_FAILURE(*fStatus)) { | 
605  | 0  |         goto ExitBuildSTdeleteall;  | 
606  | 0  |     }  | 
607  | 0  |     setAdd(initialState->fPositions, fTree->fFirstPosSet);  | 
608  | 0  |     fDStates->addElementX(initialState, *fStatus);  | 
609  | 0  |     if (U_FAILURE(*fStatus)) { | 
610  | 0  |         goto ExitBuildSTdeleteall;  | 
611  | 0  |     }  | 
612  |  |  | 
613  |  |     // while there is an unmarked state T in Dstates do begin  | 
614  | 0  |     for (;;) { | 
615  | 0  |         RBBIStateDescriptor *T = NULL;  | 
616  | 0  |         int32_t              tx;  | 
617  | 0  |         for (tx=1; tx<fDStates->size(); tx++) { | 
618  | 0  |             RBBIStateDescriptor *temp;  | 
619  | 0  |             temp = (RBBIStateDescriptor *)fDStates->elementAt(tx);  | 
620  | 0  |             if (temp->fMarked == FALSE) { | 
621  | 0  |                 T = temp;  | 
622  | 0  |                 break;  | 
623  | 0  |             }  | 
624  | 0  |         }  | 
625  | 0  |         if (T == NULL) { | 
626  | 0  |             break;  | 
627  | 0  |         }  | 
628  |  |  | 
629  |  |         // mark T;  | 
630  | 0  |         T->fMarked = TRUE;  | 
631  |  |  | 
632  |  |         // for each input symbol a do begin  | 
633  | 0  |         int32_t  a;  | 
634  | 0  |         for (a = 1; a<=lastInputSymbol; a++) { | 
635  |  |             // let U be the set of positions that are in followpos(p)  | 
636  |  |             //    for some position p in T  | 
637  |  |             //    such that the symbol at position p is a;  | 
638  | 0  |             UVector    *U = NULL;  | 
639  | 0  |             RBBINode   *p;  | 
640  | 0  |             int32_t     px;  | 
641  | 0  |             for (px=0; px<T->fPositions->size(); px++) { | 
642  | 0  |                 p = (RBBINode *)T->fPositions->elementAt(px);  | 
643  | 0  |                 if ((p->fType == RBBINode::leafChar) &&  (p->fVal == a)) { | 
644  | 0  |                     if (U == NULL) { | 
645  | 0  |                         U = new UVector(*fStatus);  | 
646  | 0  |                         if (U == NULL) { | 
647  | 0  |                           *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
648  | 0  |                           goto ExitBuildSTdeleteall;  | 
649  | 0  |                         }  | 
650  | 0  |                     }  | 
651  | 0  |                     setAdd(U, p->fFollowPos);  | 
652  | 0  |                 }  | 
653  | 0  |             }  | 
654  |  |  | 
655  |  |             // if U is not empty and not in DStates then  | 
656  | 0  |             int32_t  ux = 0;  | 
657  | 0  |             UBool    UinDstates = FALSE;  | 
658  | 0  |             if (U != NULL) { | 
659  | 0  |                 U_ASSERT(U->size() > 0);  | 
660  | 0  |                 int  ix;  | 
661  | 0  |                 for (ix=0; ix<fDStates->size(); ix++) { | 
662  | 0  |                     RBBIStateDescriptor *temp2;  | 
663  | 0  |                     temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix);  | 
664  | 0  |                     if (setEquals(U, temp2->fPositions)) { | 
665  | 0  |                         delete U;  | 
666  | 0  |                         U  = temp2->fPositions;  | 
667  | 0  |                         ux = ix;  | 
668  | 0  |                         UinDstates = TRUE;  | 
669  | 0  |                         break;  | 
670  | 0  |                     }  | 
671  | 0  |                 }  | 
672  |  |  | 
673  |  |                 // Add U as an unmarked state to Dstates  | 
674  | 0  |                 if (!UinDstates)  | 
675  | 0  |                 { | 
676  | 0  |                     RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus);  | 
677  | 0  |                     if (newState == NULL) { | 
678  | 0  |                       *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
679  | 0  |                     }  | 
680  | 0  |                     if (U_FAILURE(*fStatus)) { | 
681  | 0  |                         goto ExitBuildSTdeleteall;  | 
682  | 0  |                     }  | 
683  | 0  |                     newState->fPositions = U;  | 
684  | 0  |                     fDStates->addElementX(newState, *fStatus);  | 
685  | 0  |                     if (U_FAILURE(*fStatus)) { | 
686  | 0  |                         return;  | 
687  | 0  |                     }  | 
688  | 0  |                     ux = fDStates->size()-1;  | 
689  | 0  |                 }  | 
690  |  |  | 
691  |  |                 // Dtran[T, a] := U;  | 
692  | 0  |                 T->fDtran->setElementAt(ux, a);  | 
693  | 0  |             }  | 
694  | 0  |         }  | 
695  | 0  |     }  | 
696  | 0  |     return;  | 
697  |  |     // delete local pointers only if error occurred.  | 
698  | 0  | ExitBuildSTdeleteall:  | 
699  | 0  |     delete initialState;  | 
700  | 0  |     delete failState;  | 
701  | 0  | }  | 
702  |  |  | 
703  |  |  | 
704  |  | /**  | 
705  |  |  * mapLookAheadRules  | 
706  |  |  *  | 
707  |  |  */  | 
708  | 0  | void RBBITableBuilder::mapLookAheadRules() { | 
709  | 0  |     fLookAheadRuleMap =  new UVector32(fRB->fScanner->numRules() + 1, *fStatus);  | 
710  | 0  |     if (fLookAheadRuleMap == nullptr) { | 
711  | 0  |         *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
712  | 0  |     }  | 
713  | 0  |     if (U_FAILURE(*fStatus)) { | 
714  | 0  |         return;  | 
715  | 0  |     }  | 
716  | 0  |     fLookAheadRuleMap->setSize(fRB->fScanner->numRules() + 1);  | 
717  |  | 
  | 
718  | 0  |     for (int32_t n=0; n<fDStates->size(); n++) { | 
719  | 0  |         RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);  | 
720  | 0  |         int32_t laSlotForState = 0;  | 
721  |  |  | 
722  |  |         // Establish the look-ahead slot for this state, if the state covers  | 
723  |  |         // any look-ahead nodes - corresponding to the '/' in look-ahead rules.  | 
724  |  |  | 
725  |  |         // If any of the look-ahead nodes already have a slot assigned, use it,  | 
726  |  |         // otherwise assign a new one.  | 
727  |  | 
  | 
728  | 0  |         bool sawLookAheadNode = false;  | 
729  | 0  |         for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) { | 
730  | 0  |             RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));  | 
731  | 0  |             if (node->fType != RBBINode::NodeType::lookAhead) { | 
732  | 0  |                 continue;  | 
733  | 0  |             }  | 
734  | 0  |             sawLookAheadNode = true;  | 
735  | 0  |             int32_t ruleNum = node->fVal;     // Set when rule was originally parsed.  | 
736  | 0  |             U_ASSERT(ruleNum < fLookAheadRuleMap->size());  | 
737  | 0  |             U_ASSERT(ruleNum > 0);  | 
738  | 0  |             int32_t laSlot = fLookAheadRuleMap->elementAti(ruleNum);  | 
739  | 0  |             if (laSlot != 0) { | 
740  | 0  |                 if (laSlotForState == 0) { | 
741  | 0  |                     laSlotForState = laSlot;  | 
742  | 0  |                 } else { | 
743  |  |                     // TODO: figure out if this can fail, change to setting an error code if so.  | 
744  | 0  |                     U_ASSERT(laSlot == laSlotForState);  | 
745  | 0  |                 }  | 
746  | 0  |             }  | 
747  | 0  |         }  | 
748  | 0  |         if (!sawLookAheadNode) { | 
749  | 0  |             continue;  | 
750  | 0  |         }  | 
751  |  |  | 
752  | 0  |         if (laSlotForState == 0) { | 
753  | 0  |             laSlotForState = ++fLASlotsInUse;  | 
754  | 0  |         }  | 
755  |  |  | 
756  |  |         // For each look ahead node covered by this state,  | 
757  |  |         // set the mapping from the node's rule number to the look ahead slot.  | 
758  |  |         // There can be multiple nodes/rule numbers going to the same la slot.  | 
759  |  | 
  | 
760  | 0  |         for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) { | 
761  | 0  |             RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));  | 
762  | 0  |             if (node->fType != RBBINode::NodeType::lookAhead) { | 
763  | 0  |                 continue;  | 
764  | 0  |             }  | 
765  | 0  |             int32_t ruleNum = node->fVal;     // Set when rule was originally parsed.  | 
766  | 0  |             int32_t existingVal = fLookAheadRuleMap->elementAti(ruleNum);  | 
767  | 0  |             (void)existingVal;  | 
768  | 0  |             U_ASSERT(existingVal == 0 || existingVal == laSlotForState);  | 
769  | 0  |             fLookAheadRuleMap->setElementAt(laSlotForState, ruleNum);  | 
770  | 0  |         }  | 
771  | 0  |     }  | 
772  |  | 
  | 
773  | 0  | }  | 
774  |  |  | 
775  |  | //-----------------------------------------------------------------------------  | 
776  |  | //  | 
777  |  | //   flagAcceptingStates    Identify accepting states.  | 
778  |  | //                          First get a list of all of the end marker nodes.  | 
779  |  | //                          Then, for each state s,  | 
780  |  | //                              if s contains one of the end marker nodes in its list of tree positions then  | 
781  |  | //                                  s is an accepting state.  | 
782  |  | //  | 
783  |  | //-----------------------------------------------------------------------------  | 
784  | 0  | void     RBBITableBuilder::flagAcceptingStates() { | 
785  | 0  |     if (U_FAILURE(*fStatus)) { | 
786  | 0  |         return;  | 
787  | 0  |     }  | 
788  | 0  |     UVector     endMarkerNodes(*fStatus);  | 
789  | 0  |     RBBINode    *endMarker;  | 
790  | 0  |     int32_t     i;  | 
791  | 0  |     int32_t     n;  | 
792  |  | 
  | 
793  | 0  |     if (U_FAILURE(*fStatus)) { | 
794  | 0  |         return;  | 
795  | 0  |     }  | 
796  |  |  | 
797  | 0  |     fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus);  | 
798  | 0  |     if (U_FAILURE(*fStatus)) { | 
799  | 0  |         return;  | 
800  | 0  |     }  | 
801  |  |  | 
802  | 0  |     for (i=0; i<endMarkerNodes.size(); i++) { | 
803  | 0  |         endMarker = (RBBINode *)endMarkerNodes.elementAt(i);  | 
804  | 0  |         for (n=0; n<fDStates->size(); n++) { | 
805  | 0  |             RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);  | 
806  | 0  |             if (sd->fPositions->indexOf(endMarker) >= 0) { | 
807  |  |                 // Any non-zero value for fAccepting means this is an accepting node.  | 
808  |  |                 // The value is what will be returned to the user as the break status.  | 
809  |  |                 // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1).  | 
810  |  | 
  | 
811  | 0  |                 if (sd->fAccepting==0) { | 
812  |  |                     // State hasn't been marked as accepting yet.  Do it now.  | 
813  | 0  |                     sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);  | 
814  | 0  |                     if (sd->fAccepting == 0) { | 
815  | 0  |                         sd->fAccepting = ACCEPTING_UNCONDITIONAL;  | 
816  | 0  |                     }  | 
817  | 0  |                 }  | 
818  | 0  |                 if (sd->fAccepting==ACCEPTING_UNCONDITIONAL && endMarker->fVal != 0) { | 
819  |  |                     // Both lookahead and non-lookahead accepting for this state.  | 
820  |  |                     // Favor the look-ahead, because a look-ahead match needs to  | 
821  |  |                     // immediately stop the run-time engine. First match, not longest.  | 
822  | 0  |                     sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);  | 
823  | 0  |                 }  | 
824  |  |                 // implicit else:  | 
825  |  |                 // if sd->fAccepting already had a value other than 0 or 1, leave it be.  | 
826  | 0  |             }  | 
827  | 0  |         }  | 
828  | 0  |     }  | 
829  | 0  | }  | 
830  |  |  | 
831  |  |  | 
832  |  | //-----------------------------------------------------------------------------  | 
833  |  | //  | 
834  |  | //    flagLookAheadStates   Very similar to flagAcceptingStates, above.  | 
835  |  | //  | 
836  |  | //-----------------------------------------------------------------------------  | 
837  | 0  | void     RBBITableBuilder::flagLookAheadStates() { | 
838  | 0  |     if (U_FAILURE(*fStatus)) { | 
839  | 0  |         return;  | 
840  | 0  |     }  | 
841  | 0  |     UVector     lookAheadNodes(*fStatus);  | 
842  | 0  |     RBBINode    *lookAheadNode;  | 
843  | 0  |     int32_t     i;  | 
844  | 0  |     int32_t     n;  | 
845  |  | 
  | 
846  | 0  |     fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus);  | 
847  | 0  |     if (U_FAILURE(*fStatus)) { | 
848  | 0  |         return;  | 
849  | 0  |     }  | 
850  | 0  |     for (i=0; i<lookAheadNodes.size(); i++) { | 
851  | 0  |         lookAheadNode = (RBBINode *)lookAheadNodes.elementAt(i);  | 
852  | 0  |         U_ASSERT(lookAheadNode->fType == RBBINode::NodeType::lookAhead);  | 
853  |  | 
  | 
854  | 0  |         for (n=0; n<fDStates->size(); n++) { | 
855  | 0  |             RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);  | 
856  | 0  |             int32_t positionsIdx = sd->fPositions->indexOf(lookAheadNode);  | 
857  | 0  |             if (positionsIdx >= 0) { | 
858  | 0  |                 U_ASSERT(lookAheadNode == sd->fPositions->elementAt(positionsIdx));  | 
859  | 0  |                 uint32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal);  | 
860  | 0  |                 U_ASSERT(sd->fLookAhead == 0 || sd->fLookAhead == lookaheadSlot);  | 
861  |  |                 // if (sd->fLookAhead != 0 && sd->fLookAhead != lookaheadSlot) { | 
862  |  |                 //     printf("%s:%d Bingo. sd->fLookAhead:%d   lookaheadSlot:%d\n", | 
863  |  |                 //            __FILE__, __LINE__, sd->fLookAhead, lookaheadSlot);  | 
864  |  |                 // }  | 
865  | 0  |                 sd->fLookAhead = lookaheadSlot;  | 
866  | 0  |             }  | 
867  | 0  |         }  | 
868  | 0  |     }  | 
869  | 0  | }  | 
870  |  |  | 
871  |  |  | 
872  |  |  | 
873  |  |  | 
874  |  | //-----------------------------------------------------------------------------  | 
875  |  | //  | 
876  |  | //    flagTaggedStates  | 
877  |  | //  | 
878  |  | //-----------------------------------------------------------------------------  | 
879  | 0  | void     RBBITableBuilder::flagTaggedStates() { | 
880  | 0  |     if (U_FAILURE(*fStatus)) { | 
881  | 0  |         return;  | 
882  | 0  |     }  | 
883  | 0  |     UVector     tagNodes(*fStatus);  | 
884  | 0  |     RBBINode    *tagNode;  | 
885  | 0  |     int32_t     i;  | 
886  | 0  |     int32_t     n;  | 
887  |  | 
  | 
888  | 0  |     if (U_FAILURE(*fStatus)) { | 
889  | 0  |         return;  | 
890  | 0  |     }  | 
891  | 0  |     fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus);  | 
892  | 0  |     if (U_FAILURE(*fStatus)) { | 
893  | 0  |         return;  | 
894  | 0  |     }  | 
895  | 0  |     for (i=0; i<tagNodes.size(); i++) {                   // For each tag node t (all of 'em) | 
896  | 0  |         tagNode = (RBBINode *)tagNodes.elementAt(i);  | 
897  |  | 
  | 
898  | 0  |         for (n=0; n<fDStates->size(); n++) {              //    For each state  s (row in the state table) | 
899  | 0  |             RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);  | 
900  | 0  |             if (sd->fPositions->indexOf(tagNode) >= 0) {  //       if  s include the tag node t | 
901  | 0  |                 sortedAdd(&sd->fTagVals, tagNode->fVal);  | 
902  | 0  |             }  | 
903  | 0  |         }  | 
904  | 0  |     }  | 
905  | 0  | }  | 
906  |  |  | 
907  |  |  | 
908  |  |  | 
909  |  |  | 
910  |  | //-----------------------------------------------------------------------------  | 
911  |  | //  | 
912  |  | //  mergeRuleStatusVals  | 
913  |  | //  | 
914  |  | //      Update the global table of rule status {tag} values | 
915  |  | //      The rule builder has a global vector of status values that are common  | 
916  |  | //      for all tables.  Merge the ones from this table into the global set.  | 
917  |  | //  | 
918  |  | //-----------------------------------------------------------------------------  | 
919  | 0  | void  RBBITableBuilder::mergeRuleStatusVals() { | 
920  |  |     //  | 
921  |  |     //  The basic outline of what happens here is this...  | 
922  |  |     //  | 
923  |  |     //    for each state in this state table  | 
924  |  |     //       if the status tag list for this state is in the global statuses list  | 
925  |  |     //           record where and  | 
926  |  |     //           continue with the next state  | 
927  |  |     //       else  | 
928  |  |     //           add the tag list for this state to the global list.  | 
929  |  |     //  | 
930  | 0  |     int i;  | 
931  | 0  |     int n;  | 
932  |  |  | 
933  |  |     // Pre-set a single tag of {0} into the table. | 
934  |  |     //   We will need this as a default, for rule sets with no explicit tagging.  | 
935  | 0  |     if (fRB->fRuleStatusVals->size() == 0) { | 
936  | 0  |         fRB->fRuleStatusVals->addElement(1, *fStatus);  // Num of statuses in group  | 
937  | 0  |         fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus);  //   and our single status of zero  | 
938  | 0  |     }  | 
939  |  |  | 
940  |  |     //    For each state  | 
941  | 0  |     for (n=0; n<fDStates->size(); n++) { | 
942  | 0  |         RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);  | 
943  | 0  |         UVector *thisStatesTagValues = sd->fTagVals;  | 
944  | 0  |         if (thisStatesTagValues == NULL) { | 
945  |  |             // No tag values are explicitly associated with this state.  | 
946  |  |             //   Set the default tag value.  | 
947  | 0  |             sd->fTagsIdx = 0;  | 
948  | 0  |             continue;  | 
949  | 0  |         }  | 
950  |  |  | 
951  |  |         // There are tag(s) associated with this state.  | 
952  |  |         //   fTagsIdx will be the index into the global tag list for this state's tag values.  | 
953  |  |         //   Initial value of -1 flags that we haven't got it set yet.  | 
954  | 0  |         sd->fTagsIdx = -1;  | 
955  | 0  |         int32_t  thisTagGroupStart = 0;   // indexes into the global rule status vals list  | 
956  | 0  |         int32_t  nextTagGroupStart = 0;  | 
957  |  |  | 
958  |  |         // Loop runs once per group of tags in the global list  | 
959  | 0  |         while (nextTagGroupStart < fRB->fRuleStatusVals->size()) { | 
960  | 0  |             thisTagGroupStart = nextTagGroupStart;  | 
961  | 0  |             nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1;  | 
962  | 0  |             if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) { | 
963  |  |                 // The number of tags for this state is different from  | 
964  |  |                 //    the number of tags in this group from the global list.  | 
965  |  |                 //    Continue with the next group from the global list.  | 
966  | 0  |                 continue;  | 
967  | 0  |             }  | 
968  |  |             // The lengths match, go ahead and compare the actual tag values  | 
969  |  |             //    between this state and the group from the global list.  | 
970  | 0  |             for (i=0; i<thisStatesTagValues->size(); i++) { | 
971  | 0  |                 if (thisStatesTagValues->elementAti(i) !=  | 
972  | 0  |                     fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) { | 
973  |  |                     // Mismatch.  | 
974  | 0  |                     break;  | 
975  | 0  |                 }  | 
976  | 0  |             }  | 
977  |  | 
  | 
978  | 0  |             if (i == thisStatesTagValues->size()) { | 
979  |  |                 // We found a set of tag values in the global list that match  | 
980  |  |                 //   those for this state.  Use them.  | 
981  | 0  |                 sd->fTagsIdx = thisTagGroupStart;  | 
982  | 0  |                 break;  | 
983  | 0  |             }  | 
984  | 0  |         }  | 
985  |  | 
  | 
986  | 0  |         if (sd->fTagsIdx == -1) { | 
987  |  |             // No suitable entry in the global tag list already.  Add one  | 
988  | 0  |             sd->fTagsIdx = fRB->fRuleStatusVals->size();  | 
989  | 0  |             fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus);  | 
990  | 0  |             for (i=0; i<thisStatesTagValues->size(); i++) { | 
991  | 0  |                 fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus);  | 
992  | 0  |             }  | 
993  | 0  |         }  | 
994  | 0  |     }  | 
995  | 0  | }  | 
996  |  |  | 
997  |  |  | 
998  |  |  | 
999  |  |  | 
1000  |  |  | 
1001  |  |  | 
1002  |  |  | 
1003  |  | //-----------------------------------------------------------------------------  | 
1004  |  | //  | 
1005  |  | //  sortedAdd  Add a value to a vector of sorted values (ints).  | 
1006  |  | //             Do not replicate entries; if the value is already there, do not  | 
1007  |  | //                add a second one.  | 
1008  |  | //             Lazily create the vector if it does not already exist.  | 
1009  |  | //  | 
1010  |  | //-----------------------------------------------------------------------------  | 
1011  | 0  | void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) { | 
1012  | 0  |     int32_t i;  | 
1013  |  | 
  | 
1014  | 0  |     if (*vector == NULL) { | 
1015  | 0  |         *vector = new UVector(*fStatus);  | 
1016  | 0  |     }  | 
1017  | 0  |     if (*vector == NULL || U_FAILURE(*fStatus)) { | 
1018  | 0  |         return;  | 
1019  | 0  |     }  | 
1020  | 0  |     UVector *vec = *vector;  | 
1021  | 0  |     int32_t  vSize = vec->size();  | 
1022  | 0  |     for (i=0; i<vSize; i++) { | 
1023  | 0  |         int32_t valAtI = vec->elementAti(i);  | 
1024  | 0  |         if (valAtI == val) { | 
1025  |  |             // The value is already in the vector.  Don't add it again.  | 
1026  | 0  |             return;  | 
1027  | 0  |         }  | 
1028  | 0  |         if (valAtI > val) { | 
1029  | 0  |             break;  | 
1030  | 0  |         }  | 
1031  | 0  |     }  | 
1032  | 0  |     vec->insertElementAt(val, i, *fStatus);  | 
1033  | 0  | }  | 
1034  |  |  | 
1035  |  |  | 
1036  |  |  | 
1037  |  | //-----------------------------------------------------------------------------  | 
1038  |  | //  | 
1039  |  | //  setAdd     Set operation on UVector  | 
1040  |  | //             dest = dest union source  | 
1041  |  | //             Elements may only appear once and must be sorted.  | 
1042  |  | //  | 
1043  |  | //-----------------------------------------------------------------------------  | 
1044  | 0  | void RBBITableBuilder::setAdd(UVector *dest, UVector *source) { | 
1045  | 0  |     int32_t destOriginalSize = dest->size();  | 
1046  | 0  |     int32_t sourceSize       = source->size();  | 
1047  | 0  |     int32_t di           = 0;  | 
1048  | 0  |     MaybeStackArray<void *, 16> destArray, sourceArray;  // Handle small cases without malloc  | 
1049  | 0  |     void **destPtr, **sourcePtr;  | 
1050  | 0  |     void **destLim, **sourceLim;  | 
1051  |  | 
  | 
1052  | 0  |     if (destOriginalSize > destArray.getCapacity()) { | 
1053  | 0  |         if (destArray.resize(destOriginalSize) == NULL) { | 
1054  | 0  |             return;  | 
1055  | 0  |         }  | 
1056  | 0  |     }  | 
1057  | 0  |     destPtr = destArray.getAlias();  | 
1058  | 0  |     destLim = destPtr + destOriginalSize;  // destArray.getArrayLimit()?  | 
1059  |  | 
  | 
1060  | 0  |     if (sourceSize > sourceArray.getCapacity()) { | 
1061  | 0  |         if (sourceArray.resize(sourceSize) == NULL) { | 
1062  | 0  |             return;  | 
1063  | 0  |         }  | 
1064  | 0  |     }  | 
1065  | 0  |     sourcePtr = sourceArray.getAlias();  | 
1066  | 0  |     sourceLim = sourcePtr + sourceSize;  // sourceArray.getArrayLimit()?  | 
1067  |  |  | 
1068  |  |     // Avoid multiple "get element" calls by getting the contents into arrays  | 
1069  | 0  |     (void) dest->toArray(destPtr);  | 
1070  | 0  |     (void) source->toArray(sourcePtr);  | 
1071  |  | 
  | 
1072  | 0  |     dest->setSize(sourceSize+destOriginalSize, *fStatus);  | 
1073  |  | 
  | 
1074  | 0  |     while (sourcePtr < sourceLim && destPtr < destLim) { | 
1075  | 0  |         if (*destPtr == *sourcePtr) { | 
1076  | 0  |             dest->setElementAt(*sourcePtr++, di++);  | 
1077  | 0  |             destPtr++;  | 
1078  | 0  |         }  | 
1079  |  |         // This check is required for machines with segmented memory, like i5/OS.  | 
1080  |  |         // Direct pointer comparison is not recommended.  | 
1081  | 0  |         else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) { | 
1082  | 0  |             dest->setElementAt(*destPtr++, di++);  | 
1083  | 0  |         }  | 
1084  | 0  |         else { /* *sourcePtr < *destPtr */ | 
1085  | 0  |             dest->setElementAt(*sourcePtr++, di++);  | 
1086  | 0  |         }  | 
1087  | 0  |     }  | 
1088  |  |  | 
1089  |  |     // At most one of these two cleanup loops will execute  | 
1090  | 0  |     while (destPtr < destLim) { | 
1091  | 0  |         dest->setElementAt(*destPtr++, di++);  | 
1092  | 0  |     }  | 
1093  | 0  |     while (sourcePtr < sourceLim) { | 
1094  | 0  |         dest->setElementAt(*sourcePtr++, di++);  | 
1095  | 0  |     }  | 
1096  |  | 
  | 
1097  | 0  |     dest->setSize(di, *fStatus);  | 
1098  | 0  | }  | 
1099  |  |  | 
1100  |  |  | 
1101  |  |  | 
1102  |  | //-----------------------------------------------------------------------------  | 
1103  |  | //  | 
1104  |  | //  setEqual    Set operation on UVector.  | 
1105  |  | //              Compare for equality.  | 
1106  |  | //              Elements must be sorted.  | 
1107  |  | //  | 
1108  |  | //-----------------------------------------------------------------------------  | 
1109  | 0  | UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) { | 
1110  | 0  |     return a->equals(*b);  | 
1111  | 0  | }  | 
1112  |  |  | 
1113  |  |  | 
1114  |  | //-----------------------------------------------------------------------------  | 
1115  |  | //  | 
1116  |  | //  printPosSets   Debug function.  Dump Nullable, firstpos, lastpos and followpos  | 
1117  |  | //                 for each node in the tree.  | 
1118  |  | //  | 
1119  |  | //-----------------------------------------------------------------------------  | 
1120  |  | #ifdef RBBI_DEBUG  | 
1121  |  | void RBBITableBuilder::printPosSets(RBBINode *n) { | 
1122  |  |     if (n==NULL) { | 
1123  |  |         return;  | 
1124  |  |     }  | 
1125  |  |     printf("\n"); | 
1126  |  |     RBBINode::printNodeHeader();  | 
1127  |  |     RBBINode::printNode(n);  | 
1128  |  |     RBBIDebugPrintf("         Nullable:  %s\n", n->fNullable?"TRUE":"FALSE"); | 
1129  |  |  | 
1130  |  |     RBBIDebugPrintf("         firstpos:  "); | 
1131  |  |     printSet(n->fFirstPosSet);  | 
1132  |  |  | 
1133  |  |     RBBIDebugPrintf("         lastpos:   "); | 
1134  |  |     printSet(n->fLastPosSet);  | 
1135  |  |  | 
1136  |  |     RBBIDebugPrintf("         followpos: "); | 
1137  |  |     printSet(n->fFollowPos);  | 
1138  |  |  | 
1139  |  |     printPosSets(n->fLeftChild);  | 
1140  |  |     printPosSets(n->fRightChild);  | 
1141  |  | }  | 
1142  |  | #endif  | 
1143  |  |  | 
1144  |  | //  | 
1145  |  | //    findDuplCharClassFrom()  | 
1146  |  | //  | 
1147  | 0  | bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) { | 
1148  | 0  |     int32_t numStates = fDStates->size();  | 
1149  | 0  |     int32_t numCols = fRB->fSetBuilder->getNumCharCategories();  | 
1150  |  | 
  | 
1151  | 0  |     for (; categories->first < numCols-1; categories->first++) { | 
1152  |  |         // Note: dictionary & non-dictionary columns cannot be merged.  | 
1153  |  |         //       The limitSecond value prevents considering mixed pairs.  | 
1154  |  |         //       Dictionary categories are >= DictCategoriesStart.  | 
1155  |  |         //       Non dict categories are   <  DictCategoriesStart.  | 
1156  | 0  |         int limitSecond = categories->first < fRB->fSetBuilder->getDictCategoriesStart() ?  | 
1157  | 0  |             fRB->fSetBuilder->getDictCategoriesStart() : numCols;  | 
1158  | 0  |         for (categories->second=categories->first+1; categories->second < limitSecond; categories->second++) { | 
1159  |  |             // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).  | 
1160  | 0  |             uint16_t table_base = 0;  | 
1161  | 0  |             uint16_t table_dupl = 1;  | 
1162  | 0  |             for (int32_t state=0; state<numStates; state++) { | 
1163  | 0  |                 RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);  | 
1164  | 0  |                 table_base = (uint16_t)sd->fDtran->elementAti(categories->first);  | 
1165  | 0  |                 table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);  | 
1166  | 0  |                 if (table_base != table_dupl) { | 
1167  | 0  |                     break;  | 
1168  | 0  |                 }  | 
1169  | 0  |             }  | 
1170  | 0  |             if (table_base == table_dupl) { | 
1171  | 0  |                 return true;  | 
1172  | 0  |             }  | 
1173  | 0  |         }  | 
1174  | 0  |     }  | 
1175  | 0  |     return false;  | 
1176  | 0  | }  | 
1177  |  |  | 
1178  |  |  | 
1179  |  | //  | 
1180  |  | //    removeColumn()  | 
1181  |  | //  | 
1182  | 0  | void RBBITableBuilder::removeColumn(int32_t column) { | 
1183  | 0  |     int32_t numStates = fDStates->size();  | 
1184  | 0  |     for (int32_t state=0; state<numStates; state++) { | 
1185  | 0  |         RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);  | 
1186  | 0  |         U_ASSERT(column < sd->fDtran->size());  | 
1187  | 0  |         sd->fDtran->removeElementAt(column);  | 
1188  | 0  |     }  | 
1189  | 0  | }  | 
1190  |  |  | 
1191  |  | /*  | 
1192  |  |  * findDuplicateState  | 
1193  |  |  */  | 
1194  | 0  | bool RBBITableBuilder::findDuplicateState(IntPair *states) { | 
1195  | 0  |     int32_t numStates = fDStates->size();  | 
1196  | 0  |     int32_t numCols = fRB->fSetBuilder->getNumCharCategories();  | 
1197  |  | 
  | 
1198  | 0  |     for (; states->first<numStates-1; states->first++) { | 
1199  | 0  |         RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first);  | 
1200  | 0  |         for (states->second=states->first+1; states->second<numStates; states->second++) { | 
1201  | 0  |             RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second);  | 
1202  | 0  |             if (firstSD->fAccepting != duplSD->fAccepting ||  | 
1203  | 0  |                 firstSD->fLookAhead != duplSD->fLookAhead ||  | 
1204  | 0  |                 firstSD->fTagsIdx   != duplSD->fTagsIdx) { | 
1205  | 0  |                 continue;  | 
1206  | 0  |             }  | 
1207  | 0  |             bool rowsMatch = true;  | 
1208  | 0  |             for (int32_t col=0; col < numCols; ++col) { | 
1209  | 0  |                 int32_t firstVal = firstSD->fDtran->elementAti(col);  | 
1210  | 0  |                 int32_t duplVal = duplSD->fDtran->elementAti(col);  | 
1211  | 0  |                 if (!((firstVal == duplVal) ||  | 
1212  | 0  |                         ((firstVal == states->first || firstVal == states->second) &&  | 
1213  | 0  |                         (duplVal  == states->first || duplVal  == states->second)))) { | 
1214  | 0  |                     rowsMatch = false;  | 
1215  | 0  |                     break;  | 
1216  | 0  |                 }  | 
1217  | 0  |             }  | 
1218  | 0  |             if (rowsMatch) { | 
1219  | 0  |                 return true;  | 
1220  | 0  |             }  | 
1221  | 0  |         }  | 
1222  | 0  |     }  | 
1223  | 0  |     return false;  | 
1224  | 0  | }  | 
1225  |  |  | 
1226  |  |  | 
1227  | 0  | bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) { | 
1228  | 0  |     int32_t numStates = fSafeTable->size();  | 
1229  |  | 
  | 
1230  | 0  |     for (; states->first<numStates-1; states->first++) { | 
1231  | 0  |         UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));  | 
1232  | 0  |         for (states->second=states->first+1; states->second<numStates; states->second++) { | 
1233  | 0  |             UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));  | 
1234  | 0  |             bool rowsMatch = true;  | 
1235  | 0  |             int32_t numCols = firstRow->length();  | 
1236  | 0  |             for (int32_t col=0; col < numCols; ++col) { | 
1237  | 0  |                 int32_t firstVal = firstRow->charAt(col);  | 
1238  | 0  |                 int32_t duplVal = duplRow->charAt(col);  | 
1239  | 0  |                 if (!((firstVal == duplVal) ||  | 
1240  | 0  |                         ((firstVal == states->first || firstVal == states->second) &&  | 
1241  | 0  |                         (duplVal  == states->first || duplVal  == states->second)))) { | 
1242  | 0  |                     rowsMatch = false;  | 
1243  | 0  |                     break;  | 
1244  | 0  |                 }  | 
1245  | 0  |             }  | 
1246  | 0  |             if (rowsMatch) { | 
1247  | 0  |                 return true;  | 
1248  | 0  |             }  | 
1249  | 0  |         }  | 
1250  | 0  |     }  | 
1251  | 0  |     return false;  | 
1252  | 0  | }  | 
1253  |  |  | 
1254  |  |  | 
1255  | 0  | void RBBITableBuilder::removeState(IntPair duplStates) { | 
1256  | 0  |     const int32_t keepState = duplStates.first;  | 
1257  | 0  |     const int32_t duplState = duplStates.second;  | 
1258  | 0  |     U_ASSERT(keepState < duplState);  | 
1259  | 0  |     U_ASSERT(duplState < fDStates->size());  | 
1260  |  | 
  | 
1261  | 0  |     RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);  | 
1262  | 0  |     fDStates->removeElementAt(duplState);  | 
1263  | 0  |     delete duplSD;  | 
1264  |  | 
  | 
1265  | 0  |     int32_t numStates = fDStates->size();  | 
1266  | 0  |     int32_t numCols = fRB->fSetBuilder->getNumCharCategories();  | 
1267  | 0  |     for (int32_t state=0; state<numStates; ++state) { | 
1268  | 0  |         RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);  | 
1269  | 0  |         for (int32_t col=0; col<numCols; col++) { | 
1270  | 0  |             int32_t existingVal = sd->fDtran->elementAti(col);  | 
1271  | 0  |             int32_t newVal = existingVal;  | 
1272  | 0  |             if (existingVal == duplState) { | 
1273  | 0  |                 newVal = keepState;  | 
1274  | 0  |             } else if (existingVal > duplState) { | 
1275  | 0  |                 newVal = existingVal - 1;  | 
1276  | 0  |             }  | 
1277  | 0  |             sd->fDtran->setElementAt(newVal, col);  | 
1278  | 0  |         }  | 
1279  | 0  |     }  | 
1280  | 0  | }  | 
1281  |  |  | 
1282  | 0  | void RBBITableBuilder::removeSafeState(IntPair duplStates) { | 
1283  | 0  |     const int32_t keepState = duplStates.first;  | 
1284  | 0  |     const int32_t duplState = duplStates.second;  | 
1285  | 0  |     U_ASSERT(keepState < duplState);  | 
1286  | 0  |     U_ASSERT(duplState < fSafeTable->size());  | 
1287  |  | 
  | 
1288  | 0  |     fSafeTable->removeElementAt(duplState);   // Note that fSafeTable has a deleter function  | 
1289  |  |                                               // and will auto-delete the removed element.  | 
1290  | 0  |     int32_t numStates = fSafeTable->size();  | 
1291  | 0  |     for (int32_t state=0; state<numStates; ++state) { | 
1292  | 0  |         UnicodeString *sd = (UnicodeString *)fSafeTable->elementAt(state);  | 
1293  | 0  |         int32_t numCols = sd->length();  | 
1294  | 0  |         for (int32_t col=0; col<numCols; col++) { | 
1295  | 0  |             int32_t existingVal = sd->charAt(col);  | 
1296  | 0  |             int32_t newVal = existingVal;  | 
1297  | 0  |             if (existingVal == duplState) { | 
1298  | 0  |                 newVal = keepState;  | 
1299  | 0  |             } else if (existingVal > duplState) { | 
1300  | 0  |                 newVal = existingVal - 1;  | 
1301  | 0  |             }  | 
1302  | 0  |             sd->setCharAt(col, static_cast<char16_t>(newVal));  | 
1303  | 0  |         }  | 
1304  | 0  |     }  | 
1305  | 0  | }  | 
1306  |  |  | 
1307  |  |  | 
1308  |  | /*  | 
1309  |  |  * RemoveDuplicateStates  | 
1310  |  |  */  | 
1311  | 0  | int32_t RBBITableBuilder::removeDuplicateStates() { | 
1312  | 0  |     IntPair dupls = {3, 0}; | 
1313  | 0  |     int32_t numStatesRemoved = 0;  | 
1314  |  | 
  | 
1315  | 0  |     while (findDuplicateState(&dupls)) { | 
1316  |  |         // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second); | 
1317  | 0  |         removeState(dupls);  | 
1318  | 0  |         ++numStatesRemoved;  | 
1319  | 0  |     }  | 
1320  | 0  |     return numStatesRemoved;  | 
1321  | 0  | }  | 
1322  |  |  | 
1323  |  |  | 
1324  |  | //-----------------------------------------------------------------------------  | 
1325  |  | //  | 
1326  |  | //   getTableSize()    Calculate the size of the runtime form of this  | 
1327  |  | //                     state transition table.  | 
1328  |  | //  | 
1329  |  | //-----------------------------------------------------------------------------  | 
1330  | 0  | int32_t  RBBITableBuilder::getTableSize() const { | 
1331  | 0  |     int32_t    size = 0;  | 
1332  | 0  |     int32_t    numRows;  | 
1333  | 0  |     int32_t    numCols;  | 
1334  | 0  |     int32_t    rowSize;  | 
1335  |  | 
  | 
1336  | 0  |     if (fTree == NULL) { | 
1337  | 0  |         return 0;  | 
1338  | 0  |     }  | 
1339  |  |  | 
1340  | 0  |     size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.  | 
1341  |  | 
  | 
1342  | 0  |     numRows = fDStates->size();  | 
1343  | 0  |     numCols = fRB->fSetBuilder->getNumCharCategories();  | 
1344  |  | 
  | 
1345  | 0  |     if (use8BitsForTable()) { | 
1346  | 0  |         rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;  | 
1347  | 0  |     } else { | 
1348  | 0  |         rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;  | 
1349  | 0  |     }  | 
1350  | 0  |     size   += numRows * rowSize;  | 
1351  | 0  |     return size;  | 
1352  | 0  | }  | 
1353  |  |  | 
1354  | 0  | bool RBBITableBuilder::use8BitsForTable() const { | 
1355  | 0  |     return fDStates->size() <= kMaxStateFor8BitsTable;  | 
1356  | 0  | }  | 
1357  |  |  | 
1358  |  | //-----------------------------------------------------------------------------  | 
1359  |  | //  | 
1360  |  | //   exportTable()    export the state transition table in the format required  | 
1361  |  | //                    by the runtime engine.  getTableSize() bytes of memory  | 
1362  |  | //                    must be available at the output address "where".  | 
1363  |  | //  | 
1364  |  | //-----------------------------------------------------------------------------  | 
1365  | 0  | void RBBITableBuilder::exportTable(void *where) { | 
1366  | 0  |     RBBIStateTable    *table = (RBBIStateTable *)where;  | 
1367  | 0  |     uint32_t           state;  | 
1368  | 0  |     int                col;  | 
1369  |  | 
  | 
1370  | 0  |     if (U_FAILURE(*fStatus) || fTree == NULL) { | 
1371  | 0  |         return;  | 
1372  | 0  |     }  | 
1373  |  |  | 
1374  | 0  |     int32_t catCount = fRB->fSetBuilder->getNumCharCategories();  | 
1375  | 0  |     if (catCount > 0x7fff ||  | 
1376  | 0  |         fDStates->size() > 0x7fff) { | 
1377  | 0  |         *fStatus = U_BRK_INTERNAL_ERROR;  | 
1378  | 0  |         return;  | 
1379  | 0  |     }  | 
1380  |  |  | 
1381  | 0  |     table->fNumStates = fDStates->size();  | 
1382  | 0  |     table->fDictCategoriesStart = fRB->fSetBuilder->getDictCategoriesStart();  | 
1383  | 0  |     table->fLookAheadResultsSize = fLASlotsInUse == ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1;  | 
1384  | 0  |     table->fFlags     = 0;  | 
1385  | 0  |     if (use8BitsForTable()) { | 
1386  | 0  |         table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;  | 
1387  | 0  |         table->fFlags  |= RBBI_8BITS_ROWS;  | 
1388  | 0  |     } else { | 
1389  | 0  |         table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;  | 
1390  | 0  |     }  | 
1391  | 0  |     if (fRB->fLookAheadHardBreak) { | 
1392  | 0  |         table->fFlags  |= RBBI_LOOKAHEAD_HARD_BREAK;  | 
1393  | 0  |     }  | 
1394  | 0  |     if (fRB->fSetBuilder->sawBOF()) { | 
1395  | 0  |         table->fFlags  |= RBBI_BOF_REQUIRED;  | 
1396  | 0  |     }  | 
1397  |  | 
  | 
1398  | 0  |     for (state=0; state<table->fNumStates; state++) { | 
1399  | 0  |         RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);  | 
1400  | 0  |         RBBIStateTableRow   *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);  | 
1401  | 0  |         if (use8BitsForTable()) { | 
1402  | 0  |             U_ASSERT (sd->fAccepting <= 255);  | 
1403  | 0  |             U_ASSERT (sd->fLookAhead <= 255);  | 
1404  | 0  |             U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255);  | 
1405  | 0  |             RBBIStateTableRow8 *r8 = (RBBIStateTableRow8*)row;  | 
1406  | 0  |             r8->fAccepting = sd->fAccepting;  | 
1407  | 0  |             r8->fLookAhead = sd->fLookAhead;  | 
1408  | 0  |             r8->fTagsIdx   = sd->fTagsIdx;  | 
1409  | 0  |             for (col=0; col<catCount; col++) { | 
1410  | 0  |                 U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable);  | 
1411  | 0  |                 r8->fNextState[col] = sd->fDtran->elementAti(col);  | 
1412  | 0  |             }  | 
1413  | 0  |         } else { | 
1414  | 0  |             U_ASSERT (sd->fAccepting <= 0xffff);  | 
1415  | 0  |             U_ASSERT (sd->fLookAhead <= 0xffff);  | 
1416  | 0  |             U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 0xffff);  | 
1417  | 0  |             row->r16.fAccepting = sd->fAccepting;  | 
1418  | 0  |             row->r16.fLookAhead = sd->fLookAhead;  | 
1419  | 0  |             row->r16.fTagsIdx   = sd->fTagsIdx;  | 
1420  | 0  |             for (col=0; col<catCount; col++) { | 
1421  | 0  |                 row->r16.fNextState[col] = sd->fDtran->elementAti(col);  | 
1422  | 0  |             }  | 
1423  | 0  |         }  | 
1424  | 0  |     }  | 
1425  | 0  | }  | 
1426  |  |  | 
1427  |  |  | 
1428  |  | /**  | 
1429  |  |  *   Synthesize a safe state table from the main state table.  | 
1430  |  |  */  | 
1431  | 0  | void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) { | 
1432  |  |     // The safe table creation has three steps:  | 
1433  |  |  | 
1434  |  |     // 1. Identify pairs of character classes that are "safe." Safe means that boundaries  | 
1435  |  |     // following the pair do not depend on context or state before the pair. To test  | 
1436  |  |     // whether a pair is safe, run it through the main forward state table, starting  | 
1437  |  |     // from each state. If the the final state is the same, no matter what the starting state,  | 
1438  |  |     // the pair is safe.  | 
1439  |  |     //  | 
1440  |  |     // 2. Build a state table that recognizes the safe pairs. It's similar to their  | 
1441  |  |     // forward table, with a column for each input character [class], and a row for  | 
1442  |  |     // each state. Row 1 is the start state, and row 0 is the stop state. Initially  | 
1443  |  |     // create an additional state for each input character category; being in  | 
1444  |  |     // one of these states means that the character has been seen, and is potentially  | 
1445  |  |     // the first of a pair. In each of these rows, the entry for the second character  | 
1446  |  |     // of a safe pair is set to the stop state (0), indicating that a match was found.  | 
1447  |  |     // All other table entries are set to the state corresponding the current input  | 
1448  |  |     // character, allowing that character to be the of a start following pair.  | 
1449  |  |     //  | 
1450  |  |     // Because the safe rules are to be run in reverse, moving backwards in the text,  | 
1451  |  |     // the first and second pair categories are swapped when building the table.  | 
1452  |  |     //  | 
1453  |  |     // 3. Compress the table. There are typically many rows (states) that are  | 
1454  |  |     // equivalent - that have zeroes (match completed) in the same columns -  | 
1455  |  |     // and can be folded together.  | 
1456  |  |  | 
1457  |  |     // Each safe pair is stored as two UChars in the safePair string.  | 
1458  | 0  |     UnicodeString safePairs;  | 
1459  |  | 
  | 
1460  | 0  |     int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories();  | 
1461  | 0  |     int32_t numStates = fDStates->size();  | 
1462  |  | 
  | 
1463  | 0  |     for (int32_t c1=0; c1<numCharClasses; ++c1) { | 
1464  | 0  |         for (int32_t c2=0; c2 < numCharClasses; ++c2) { | 
1465  | 0  |             int32_t wantedEndState = -1;  | 
1466  | 0  |             int32_t endState = 0;  | 
1467  | 0  |             for (int32_t startState = 1; startState < numStates; ++startState) { | 
1468  | 0  |                 RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState));  | 
1469  | 0  |                 int32_t s2 = startStateD->fDtran->elementAti(c1);  | 
1470  | 0  |                 RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2));  | 
1471  | 0  |                 endState = s2StateD->fDtran->elementAti(c2);  | 
1472  | 0  |                 if (wantedEndState < 0) { | 
1473  | 0  |                     wantedEndState = endState;  | 
1474  | 0  |                 } else { | 
1475  | 0  |                     if (wantedEndState != endState) { | 
1476  | 0  |                         break;  | 
1477  | 0  |                     }  | 
1478  | 0  |                 }  | 
1479  | 0  |             }  | 
1480  | 0  |             if (wantedEndState == endState) { | 
1481  | 0  |                 safePairs.append((char16_t)c1);  | 
1482  | 0  |                 safePairs.append((char16_t)c2);  | 
1483  |  |                 // printf("(%d, %d) ", c1, c2); | 
1484  | 0  |             }  | 
1485  | 0  |         }  | 
1486  |  |         // printf("\n"); | 
1487  | 0  |     }  | 
1488  |  |  | 
1489  |  |     // Populate the initial safe table.  | 
1490  |  |     // The table as a whole is UVector<UnicodeString>  | 
1491  |  |     // Each row is represented by a UnicodeString, being used as a Vector<int16>.  | 
1492  |  |     // Row 0 is the stop state.  | 
1493  |  |     // Row 1 is the start state.  | 
1494  |  |     // Row 2 and beyond are other states, initially one per char class, but  | 
1495  |  |     //   after initial construction, many of the states will be combined, compacting the table.  | 
1496  |  |     // The String holds the nextState data only. The four leading fields of a row, fAccepting,  | 
1497  |  |     // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.  | 
1498  |  | 
  | 
1499  | 0  |     U_ASSERT(fSafeTable == nullptr);  | 
1500  | 0  |     fSafeTable = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status);  | 
1501  | 0  |     for (int32_t row=0; row<numCharClasses + 2; ++row) { | 
1502  | 0  |         fSafeTable->addElementX(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);  | 
1503  | 0  |     }  | 
1504  |  |  | 
1505  |  |     // From the start state, each input char class transitions to the state for that input.  | 
1506  | 0  |     UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));  | 
1507  | 0  |     for (int32_t charClass=0; charClass < numCharClasses; ++charClass) { | 
1508  |  |         // Note: +2 for the start & stop state.  | 
1509  | 0  |         startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));  | 
1510  | 0  |     }  | 
1511  |  |  | 
1512  |  |     // Initially make every other state table row look like the start state row,  | 
1513  | 0  |     for (int32_t row=2; row<numCharClasses+2; ++row) { | 
1514  | 0  |         UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row));  | 
1515  | 0  |         rowState = startState;   // UnicodeString assignment, copies contents.  | 
1516  | 0  |     }  | 
1517  |  |  | 
1518  |  |     // Run through the safe pairs, set the next state to zero when pair has been seen.  | 
1519  |  |     // Zero being the stop state, meaning we found a safe point.  | 
1520  | 0  |     for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) { | 
1521  | 0  |         int32_t c1 = safePairs.charAt(pairIdx);  | 
1522  | 0  |         int32_t c2 = safePairs.charAt(pairIdx + 1);  | 
1523  |  | 
  | 
1524  | 0  |         UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2));  | 
1525  | 0  |         rowState.setCharAt(c1, 0);  | 
1526  | 0  |     }  | 
1527  |  |  | 
1528  |  |     // Remove duplicate or redundant rows from the table.  | 
1529  | 0  |     IntPair states = {1, 0}; | 
1530  | 0  |     while (findDuplicateSafeState(&states)) { | 
1531  |  |         // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second); | 
1532  | 0  |         removeSafeState(states);  | 
1533  | 0  |     }  | 
1534  | 0  | }  | 
1535  |  |  | 
1536  |  |  | 
1537  |  | //-----------------------------------------------------------------------------  | 
1538  |  | //  | 
1539  |  | //   getSafeTableSize()    Calculate the size of the runtime form of this  | 
1540  |  | //                         safe state table.  | 
1541  |  | //  | 
1542  |  | //-----------------------------------------------------------------------------  | 
1543  | 0  | int32_t  RBBITableBuilder::getSafeTableSize() const { | 
1544  | 0  |     int32_t    size = 0;  | 
1545  | 0  |     int32_t    numRows;  | 
1546  | 0  |     int32_t    numCols;  | 
1547  | 0  |     int32_t    rowSize;  | 
1548  |  | 
  | 
1549  | 0  |     if (fSafeTable == nullptr) { | 
1550  | 0  |         return 0;  | 
1551  | 0  |     }  | 
1552  |  |  | 
1553  | 0  |     size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.  | 
1554  |  | 
  | 
1555  | 0  |     numRows = fSafeTable->size();  | 
1556  | 0  |     numCols = fRB->fSetBuilder->getNumCharCategories();  | 
1557  |  | 
  | 
1558  | 0  |     if (use8BitsForSafeTable()) { | 
1559  | 0  |         rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;  | 
1560  | 0  |     } else { | 
1561  | 0  |         rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;  | 
1562  | 0  |     }  | 
1563  | 0  |     size   += numRows * rowSize;  | 
1564  | 0  |     return size;  | 
1565  | 0  | }  | 
1566  |  |  | 
1567  | 0  | bool RBBITableBuilder::use8BitsForSafeTable() const { | 
1568  | 0  |     return fSafeTable->size() <= kMaxStateFor8BitsTable;  | 
1569  | 0  | }  | 
1570  |  |  | 
1571  |  | //-----------------------------------------------------------------------------  | 
1572  |  | //  | 
1573  |  | //   exportSafeTable()   export the state transition table in the format required  | 
1574  |  | //                       by the runtime engine.  getTableSize() bytes of memory  | 
1575  |  | //                       must be available at the output address "where".  | 
1576  |  | //  | 
1577  |  | //-----------------------------------------------------------------------------  | 
1578  | 0  | void RBBITableBuilder::exportSafeTable(void *where) { | 
1579  | 0  |     RBBIStateTable    *table = (RBBIStateTable *)where;  | 
1580  | 0  |     uint32_t           state;  | 
1581  | 0  |     int                col;  | 
1582  |  | 
  | 
1583  | 0  |     if (U_FAILURE(*fStatus) || fSafeTable == nullptr) { | 
1584  | 0  |         return;  | 
1585  | 0  |     }  | 
1586  |  |  | 
1587  | 0  |     int32_t catCount = fRB->fSetBuilder->getNumCharCategories();  | 
1588  | 0  |     if (catCount > 0x7fff ||  | 
1589  | 0  |             fSafeTable->size() > 0x7fff) { | 
1590  | 0  |         *fStatus = U_BRK_INTERNAL_ERROR;  | 
1591  | 0  |         return;  | 
1592  | 0  |     }  | 
1593  |  |  | 
1594  | 0  |     table->fNumStates = fSafeTable->size();  | 
1595  | 0  |     table->fFlags     = 0;  | 
1596  | 0  |     if (use8BitsForSafeTable()) { | 
1597  | 0  |         table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;  | 
1598  | 0  |         table->fFlags  |= RBBI_8BITS_ROWS;  | 
1599  | 0  |     } else { | 
1600  | 0  |         table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;  | 
1601  | 0  |     }  | 
1602  |  | 
  | 
1603  | 0  |     for (state=0; state<table->fNumStates; state++) { | 
1604  | 0  |         UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state);  | 
1605  | 0  |         RBBIStateTableRow   *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);  | 
1606  | 0  |         if (use8BitsForSafeTable()) { | 
1607  | 0  |             RBBIStateTableRow8 *r8 = (RBBIStateTableRow8*)row;  | 
1608  | 0  |             r8->fAccepting = 0;  | 
1609  | 0  |             r8->fLookAhead = 0;  | 
1610  | 0  |             r8->fTagsIdx    = 0;  | 
1611  | 0  |             for (col=0; col<catCount; col++) { | 
1612  | 0  |                 U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable);  | 
1613  | 0  |                 r8->fNextState[col] = static_cast<uint8_t>(rowString->charAt(col));  | 
1614  | 0  |             }  | 
1615  | 0  |         } else { | 
1616  | 0  |             row->r16.fAccepting = 0;  | 
1617  | 0  |             row->r16.fLookAhead = 0;  | 
1618  | 0  |             row->r16.fTagsIdx    = 0;  | 
1619  | 0  |             for (col=0; col<catCount; col++) { | 
1620  | 0  |                 row->r16.fNextState[col] = rowString->charAt(col);  | 
1621  | 0  |             }  | 
1622  | 0  |         }  | 
1623  | 0  |     }  | 
1624  | 0  | }  | 
1625  |  |  | 
1626  |  |  | 
1627  |  |  | 
1628  |  |  | 
1629  |  | //-----------------------------------------------------------------------------  | 
1630  |  | //  | 
1631  |  | //   printSet    Debug function.   Print the contents of a UVector  | 
1632  |  | //  | 
1633  |  | //-----------------------------------------------------------------------------  | 
1634  |  | #ifdef RBBI_DEBUG  | 
1635  |  | void RBBITableBuilder::printSet(UVector *s) { | 
1636  |  |     int32_t  i;  | 
1637  |  |     for (i=0; i<s->size(); i++) { | 
1638  |  |         const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i));  | 
1639  |  |         RBBIDebugPrintf("%5d", v==NULL? -1 : v->fSerialNum); | 
1640  |  |     }  | 
1641  |  |     RBBIDebugPrintf("\n"); | 
1642  |  | }  | 
1643  |  | #endif  | 
1644  |  |  | 
1645  |  |  | 
1646  |  | //-----------------------------------------------------------------------------  | 
1647  |  | //  | 
1648  |  | //   printStates    Debug Function.  Dump the fully constructed state transition table.  | 
1649  |  | //  | 
1650  |  | //-----------------------------------------------------------------------------  | 
1651  |  | #ifdef RBBI_DEBUG  | 
1652  |  | void RBBITableBuilder::printStates() { | 
1653  |  |     int     c;    // input "character"  | 
1654  |  |     int     n;    // state number  | 
1655  |  |  | 
1656  |  |     RBBIDebugPrintf("state |           i n p u t     s y m b o l s \n"); | 
1657  |  |     RBBIDebugPrintf("      | Acc  LA    Tag"); | 
1658  |  |     for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { | 
1659  |  |         RBBIDebugPrintf(" %3d", c); | 
1660  |  |     }  | 
1661  |  |     RBBIDebugPrintf("\n"); | 
1662  |  |     RBBIDebugPrintf("      |---------------"); | 
1663  |  |     for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { | 
1664  |  |         RBBIDebugPrintf("----"); | 
1665  |  |     }  | 
1666  |  |     RBBIDebugPrintf("\n"); | 
1667  |  |  | 
1668  |  |     for (n=0; n<fDStates->size(); n++) { | 
1669  |  |         RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);  | 
1670  |  |         RBBIDebugPrintf("  %3d | " , n); | 
1671  |  |         RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx); | 
1672  |  |         for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { | 
1673  |  |             RBBIDebugPrintf(" %3d", sd->fDtran->elementAti(c)); | 
1674  |  |         }  | 
1675  |  |         RBBIDebugPrintf("\n"); | 
1676  |  |     }  | 
1677  |  |     RBBIDebugPrintf("\n\n"); | 
1678  |  | }  | 
1679  |  | #endif  | 
1680  |  |  | 
1681  |  |  | 
1682  |  | //-----------------------------------------------------------------------------  | 
1683  |  | //  | 
1684  |  | //   printSafeTable    Debug Function.  Dump the fully constructed safe table.  | 
1685  |  | //  | 
1686  |  | //-----------------------------------------------------------------------------  | 
1687  |  | #ifdef RBBI_DEBUG  | 
1688  |  | void RBBITableBuilder::printReverseTable() { | 
1689  |  |     int     c;    // input "character"  | 
1690  |  |     int     n;    // state number  | 
1691  |  |  | 
1692  |  |     RBBIDebugPrintf("    Safe Reverse Table \n"); | 
1693  |  |     if (fSafeTable == nullptr) { | 
1694  |  |         RBBIDebugPrintf("   --- nullptr ---\n"); | 
1695  |  |         return;  | 
1696  |  |     }  | 
1697  |  |     RBBIDebugPrintf("state |           i n p u t     s y m b o l s \n"); | 
1698  |  |     RBBIDebugPrintf("      | Acc  LA    Tag"); | 
1699  |  |     for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { | 
1700  |  |         RBBIDebugPrintf(" %2d", c); | 
1701  |  |     }  | 
1702  |  |     RBBIDebugPrintf("\n"); | 
1703  |  |     RBBIDebugPrintf("      |---------------"); | 
1704  |  |     for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { | 
1705  |  |         RBBIDebugPrintf("---"); | 
1706  |  |     }  | 
1707  |  |     RBBIDebugPrintf("\n"); | 
1708  |  |  | 
1709  |  |     for (n=0; n<fSafeTable->size(); n++) { | 
1710  |  |         UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n);  | 
1711  |  |         RBBIDebugPrintf("  %3d | " , n); | 
1712  |  |         RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0);  // Accepting, LookAhead, Tags | 
1713  |  |         for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) { | 
1714  |  |             RBBIDebugPrintf(" %2d", rowString->charAt(c)); | 
1715  |  |         }  | 
1716  |  |         RBBIDebugPrintf("\n"); | 
1717  |  |     }  | 
1718  |  |     RBBIDebugPrintf("\n\n"); | 
1719  |  | }  | 
1720  |  | #endif  | 
1721  |  |  | 
1722  |  |  | 
1723  |  |  | 
1724  |  | //-----------------------------------------------------------------------------  | 
1725  |  | //  | 
1726  |  | //   printRuleStatusTable    Debug Function.  Dump the common rule status table  | 
1727  |  | //  | 
1728  |  | //-----------------------------------------------------------------------------  | 
1729  |  | #ifdef RBBI_DEBUG  | 
1730  |  | void RBBITableBuilder::printRuleStatusTable() { | 
1731  |  |     int32_t  thisRecord = 0;  | 
1732  |  |     int32_t  nextRecord = 0;  | 
1733  |  |     int      i;  | 
1734  |  |     UVector  *tbl = fRB->fRuleStatusVals;  | 
1735  |  |  | 
1736  |  |     RBBIDebugPrintf("index |  tags \n"); | 
1737  |  |     RBBIDebugPrintf("-------------------\n"); | 
1738  |  |  | 
1739  |  |     while (nextRecord < tbl->size()) { | 
1740  |  |         thisRecord = nextRecord;  | 
1741  |  |         nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1;  | 
1742  |  |         RBBIDebugPrintf("%4d   ", thisRecord); | 
1743  |  |         for (i=thisRecord+1; i<nextRecord; i++) { | 
1744  |  |             RBBIDebugPrintf("  %5d", tbl->elementAti(i)); | 
1745  |  |         }  | 
1746  |  |         RBBIDebugPrintf("\n"); | 
1747  |  |     }  | 
1748  |  |     RBBIDebugPrintf("\n\n"); | 
1749  |  | }  | 
1750  |  | #endif  | 
1751  |  |  | 
1752  |  |  | 
1753  |  | //-----------------------------------------------------------------------------  | 
1754  |  | //  | 
1755  |  | //   RBBIStateDescriptor     Methods.  This is a very struct-like class  | 
1756  |  | //                           Most access is directly to the fields.  | 
1757  |  | //  | 
1758  |  | //-----------------------------------------------------------------------------  | 
1759  |  |  | 
1760  | 0  | RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) { | 
1761  | 0  |     fMarked    = FALSE;  | 
1762  | 0  |     fAccepting = 0;  | 
1763  | 0  |     fLookAhead = 0;  | 
1764  | 0  |     fTagsIdx   = 0;  | 
1765  | 0  |     fTagVals   = NULL;  | 
1766  | 0  |     fPositions = NULL;  | 
1767  | 0  |     fDtran     = NULL;  | 
1768  |  | 
  | 
1769  | 0  |     fDtran     = new UVector32(lastInputSymbol+1, *fStatus);  | 
1770  | 0  |     if (U_FAILURE(*fStatus)) { | 
1771  | 0  |         return;  | 
1772  | 0  |     }  | 
1773  | 0  |     if (fDtran == NULL) { | 
1774  | 0  |         *fStatus = U_MEMORY_ALLOCATION_ERROR;  | 
1775  | 0  |         return;  | 
1776  | 0  |     }  | 
1777  | 0  |     fDtran->setSize(lastInputSymbol+1);    // fDtran needs to be pre-sized.  | 
1778  |  |                                            //   It is indexed by input symbols, and will  | 
1779  |  |                                            //   hold  the next state number for each  | 
1780  |  |                                            //   symbol.  | 
1781  | 0  | }  | 
1782  |  |  | 
1783  |  |  | 
1784  | 0  | RBBIStateDescriptor::~RBBIStateDescriptor() { | 
1785  | 0  |     delete       fPositions;  | 
1786  | 0  |     delete       fDtran;  | 
1787  | 0  |     delete       fTagVals;  | 
1788  | 0  |     fPositions = NULL;  | 
1789  | 0  |     fDtran     = NULL;  | 
1790  | 0  |     fTagVals   = NULL;  | 
1791  | 0  | }  | 
1792  |  |  | 
1793  |  | U_NAMESPACE_END  | 
1794  |  |  | 
1795  |  | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */  |