Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/rbbitblb.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (c) 2002-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*/
9
//
10
//  rbbitblb.cpp
11
//
12
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_BREAK_ITERATION
17
18
#include "unicode/unistr.h"
19
#include "rbbitblb.h"
20
#include "rbbirb.h"
21
#include "rbbiscan.h"
22
#include "rbbisetb.h"
23
#include "rbbidata.h"
24
#include "cstring.h"
25
#include "uassert.h"
26
#include "uvectr32.h"
27
#include "cmemory.h"
28
29
U_NAMESPACE_BEGIN
30
31
const int32_t kMaxStateFor8BitsTable = 255;
32
33
RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :
34
        fRB(rb),
35
        fTree(*rootNode),
36
        fStatus(&status),
37
        fDStates(nullptr),
38
0
        fSafeTable(nullptr) {
39
0
    if (U_FAILURE(status)) {
40
0
        return;
41
0
    }
42
    // fDStates is UVector<RBBIStateDescriptor *>
43
0
    fDStates = new UVector(status);
44
0
    if (U_SUCCESS(status) && fDStates == nullptr ) {
45
0
        status = U_MEMORY_ALLOCATION_ERROR;
46
0
    }
47
0
}
48
49
50
51
0
RBBITableBuilder::~RBBITableBuilder() {
52
0
    int i;
53
0
    for (i=0; i<fDStates->size(); i++) {
54
0
        delete (RBBIStateDescriptor *)fDStates->elementAt(i);
55
0
    }
56
0
    delete fDStates;
57
0
    delete fSafeTable;
58
0
    delete fLookAheadRuleMap;
59
0
}
60
61
62
//-----------------------------------------------------------------------------
63
//
64
//   RBBITableBuilder::buildForwardTable  -  This is the main function for building
65
//                               the DFA state transition table from the RBBI rules parse tree.
66
//
67
//-----------------------------------------------------------------------------
68
0
void  RBBITableBuilder::buildForwardTable() {
69
70
0
    if (U_FAILURE(*fStatus)) {
71
0
        return;
72
0
    }
73
74
    // If there were no rules, just return.  This situation can easily arise
75
    //   for the reverse rules.
76
0
    if (fTree==NULL) {
77
0
        return;
78
0
    }
79
80
    //
81
    // Walk through the tree, replacing any references to $variables with a copy of the
82
    //   parse tree for the substition expression.
83
    //
84
0
    fTree = fTree->flattenVariables();
85
#ifdef RBBI_DEBUG
86
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
87
        RBBIDebugPuts("\nParse tree after flattening variable references.");
88
        RBBINode::printTree(fTree, TRUE);
89
    }
90
#endif
91
92
    //
93
    // If the rules contained any references to {bof} 
94
    //   add a {bof} <cat> <former root of tree> to the
95
    //   tree.  Means that all matches must start out with the 
96
    //   {bof} fake character.
97
    // 
98
0
    if (fRB->fSetBuilder->sawBOF()) {
99
0
        RBBINode *bofTop    = new RBBINode(RBBINode::opCat);
100
0
        RBBINode *bofLeaf   = new RBBINode(RBBINode::leafChar);
101
        // Delete and exit if memory allocation failed.
102
0
        if (bofTop == NULL || bofLeaf == NULL) {
103
0
            *fStatus = U_MEMORY_ALLOCATION_ERROR;
104
0
            delete bofTop;
105
0
            delete bofLeaf;
106
0
            return;
107
0
        }
108
0
        bofTop->fLeftChild  = bofLeaf;
109
0
        bofTop->fRightChild = fTree;
110
0
        bofLeaf->fParent    = bofTop;
111
0
        bofLeaf->fVal       = 2;      // Reserved value for {bof}.
112
0
        fTree               = bofTop;
113
0
    }
114
115
    //
116
    // Add a unique right-end marker to the expression.
117
    //   Appears as a cat-node, left child being the original tree,
118
    //   right child being the end marker.
119
    //
120
0
    RBBINode *cn = new RBBINode(RBBINode::opCat);
121
    // Exit if memory allocation failed.
122
0
    if (cn == NULL) {
123
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
124
0
        return;
125
0
    }
126
0
    cn->fLeftChild = fTree;
127
0
    fTree->fParent = cn;
128
0
    RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark);
129
    // Delete and exit if memory allocation failed.
130
0
    if (cn->fRightChild == NULL) {
131
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
132
0
        delete cn;
133
0
        return;
134
0
    }
135
0
    cn->fRightChild->fParent = cn;
136
0
    fTree = cn;
137
138
    //
139
    //  Replace all references to UnicodeSets with the tree for the equivalent
140
    //      expression.
141
    //
142
0
    fTree->flattenSets();
143
#ifdef RBBI_DEBUG
144
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
145
        RBBIDebugPuts("\nParse tree after flattening Unicode Set references.");
146
        RBBINode::printTree(fTree, TRUE);
147
    }
148
#endif
149
150
151
    //
152
    // calculate the functions nullable, firstpos, lastpos and followpos on
153
    // nodes in the parse tree.
154
    //    See the algorithm description in Aho.
155
    //    Understanding how this works by looking at the code alone will be
156
    //       nearly impossible.
157
    //
158
0
    calcNullable(fTree);
159
0
    calcFirstPos(fTree);
160
0
    calcLastPos(fTree);
161
0
    calcFollowPos(fTree);
162
0
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) {
163
0
        RBBIDebugPuts("\n");
164
0
        printPosSets(fTree);
165
0
    }
166
167
    //
168
    //  For "chained" rules, modify the followPos sets
169
    //
170
0
    if (fRB->fChainRules) {
171
0
        calcChainedFollowPos(fTree, endMarkerNode);
172
0
    }
173
174
    //
175
    //  BOF (start of input) test fixup.
176
    //
177
0
    if (fRB->fSetBuilder->sawBOF()) {
178
0
        bofFixup();
179
0
    }
180
181
    //
182
    // Build the DFA state transition tables.
183
    //
184
0
    buildStateTable();
185
0
    mapLookAheadRules();
186
0
    flagAcceptingStates();
187
0
    flagLookAheadStates();
188
0
    flagTaggedStates();
189
190
    //
191
    // Update the global table of rule status {tag} values
192
    // The rule builder has a global vector of status values that are common
193
    //    for all tables.  Merge the ones from this table into the global set.
194
    //
195
0
    mergeRuleStatusVals();
196
0
}
197
198
199
200
//-----------------------------------------------------------------------------
201
//
202
//   calcNullable.    Impossible to explain succinctly.  See Aho, section 3.9
203
//
204
//-----------------------------------------------------------------------------
205
0
void RBBITableBuilder::calcNullable(RBBINode *n) {
206
0
    if (n == NULL) {
207
0
        return;
208
0
    }
209
0
    if (n->fType == RBBINode::setRef ||
210
0
        n->fType == RBBINode::endMark ) {
211
        // These are non-empty leaf node types.
212
0
        n->fNullable = FALSE;
213
0
        return;
214
0
    }
215
216
0
    if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) {
217
        // Lookahead marker node.  It's a leaf, so no recursion on children.
218
        // It's nullable because it does not match any literal text from the input stream.
219
0
        n->fNullable = TRUE;
220
0
        return;
221
0
    }
222
223
224
    // The node is not a leaf.
225
    //  Calculate nullable on its children.
226
0
    calcNullable(n->fLeftChild);
227
0
    calcNullable(n->fRightChild);
228
229
    // Apply functions from table 3.40 in Aho
230
0
    if (n->fType == RBBINode::opOr) {
231
0
        n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable;
232
0
    }
233
0
    else if (n->fType == RBBINode::opCat) {
234
0
        n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable;
235
0
    }
236
0
    else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) {
237
0
        n->fNullable = TRUE;
238
0
    }
239
0
    else {
240
0
        n->fNullable = FALSE;
241
0
    }
242
0
}
243
244
245
246
247
//-----------------------------------------------------------------------------
248
//
249
//   calcFirstPos.    Impossible to explain succinctly.  See Aho, section 3.9
250
//
251
//-----------------------------------------------------------------------------
252
0
void RBBITableBuilder::calcFirstPos(RBBINode *n) {
253
0
    if (n == NULL) {
254
0
        return;
255
0
    }
256
0
    if (n->fType == RBBINode::leafChar  ||
257
0
        n->fType == RBBINode::endMark   ||
258
0
        n->fType == RBBINode::lookAhead ||
259
0
        n->fType == RBBINode::tag) {
260
        // These are non-empty leaf node types.
261
        // Note: In order to maintain the sort invariant on the set,
262
        // this function should only be called on a node whose set is
263
        // empty to start with.
264
0
        n->fFirstPosSet->addElement(n, *fStatus);
265
0
        return;
266
0
    }
267
268
    // The node is not a leaf.
269
    //  Calculate firstPos on its children.
270
0
    calcFirstPos(n->fLeftChild);
271
0
    calcFirstPos(n->fRightChild);
272
273
    // Apply functions from table 3.40 in Aho
274
0
    if (n->fType == RBBINode::opOr) {
275
0
        setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
276
0
        setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
277
0
    }
278
0
    else if (n->fType == RBBINode::opCat) {
279
0
        setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
280
0
        if (n->fLeftChild->fNullable) {
281
0
            setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
282
0
        }
283
0
    }
284
0
    else if (n->fType == RBBINode::opStar ||
285
0
             n->fType == RBBINode::opQuestion ||
286
0
             n->fType == RBBINode::opPlus) {
287
0
        setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
288
0
    }
289
0
}
290
291
292
293
//-----------------------------------------------------------------------------
294
//
295
//   calcLastPos.    Impossible to explain succinctly.  See Aho, section 3.9
296
//
297
//-----------------------------------------------------------------------------
298
0
void RBBITableBuilder::calcLastPos(RBBINode *n) {
299
0
    if (n == NULL) {
300
0
        return;
301
0
    }
302
0
    if (n->fType == RBBINode::leafChar  ||
303
0
        n->fType == RBBINode::endMark   ||
304
0
        n->fType == RBBINode::lookAhead ||
305
0
        n->fType == RBBINode::tag) {
306
        // These are non-empty leaf node types.
307
        // Note: In order to maintain the sort invariant on the set,
308
        // this function should only be called on a node whose set is
309
        // empty to start with.
310
0
        n->fLastPosSet->addElement(n, *fStatus);
311
0
        return;
312
0
    }
313
314
    // The node is not a leaf.
315
    //  Calculate lastPos on its children.
316
0
    calcLastPos(n->fLeftChild);
317
0
    calcLastPos(n->fRightChild);
318
319
    // Apply functions from table 3.40 in Aho
320
0
    if (n->fType == RBBINode::opOr) {
321
0
        setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
322
0
        setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
323
0
    }
324
0
    else if (n->fType == RBBINode::opCat) {
325
0
        setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
326
0
        if (n->fRightChild->fNullable) {
327
0
            setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
328
0
        }
329
0
    }
330
0
    else if (n->fType == RBBINode::opStar     ||
331
0
             n->fType == RBBINode::opQuestion ||
332
0
             n->fType == RBBINode::opPlus) {
333
0
        setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
334
0
    }
335
0
}
336
337
338
339
//-----------------------------------------------------------------------------
340
//
341
//   calcFollowPos.    Impossible to explain succinctly.  See Aho, section 3.9
342
//
343
//-----------------------------------------------------------------------------
344
0
void RBBITableBuilder::calcFollowPos(RBBINode *n) {
345
0
    if (n == NULL ||
346
0
        n->fType == RBBINode::leafChar ||
347
0
        n->fType == RBBINode::endMark) {
348
0
        return;
349
0
    }
350
351
0
    calcFollowPos(n->fLeftChild);
352
0
    calcFollowPos(n->fRightChild);
353
354
    // Aho rule #1
355
0
    if (n->fType == RBBINode::opCat) {
356
0
        RBBINode *i;   // is 'i' in Aho's description
357
0
        uint32_t     ix;
358
359
0
        UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet;
360
361
0
        for (ix=0; ix<(uint32_t)LastPosOfLeftChild->size(); ix++) {
362
0
            i = (RBBINode *)LastPosOfLeftChild->elementAt(ix);
363
0
            setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet);
364
0
        }
365
0
    }
366
367
    // Aho rule #2
368
0
    if (n->fType == RBBINode::opStar ||
369
0
        n->fType == RBBINode::opPlus) {
370
0
        RBBINode   *i;  // again, n and i are the names from Aho's description.
371
0
        uint32_t    ix;
372
373
0
        for (ix=0; ix<(uint32_t)n->fLastPosSet->size(); ix++) {
374
0
            i = (RBBINode *)n->fLastPosSet->elementAt(ix);
375
0
            setAdd(i->fFollowPos, n->fFirstPosSet);
376
0
        }
377
0
    }
378
379
380
381
0
}
382
383
//-----------------------------------------------------------------------------
384
//
385
//    addRuleRootNodes    Recursively walk a parse tree, adding all nodes flagged
386
//                        as roots of a rule to a destination vector.
387
//
388
//-----------------------------------------------------------------------------
389
0
void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) {
390
0
    if (node == NULL || U_FAILURE(*fStatus)) {
391
0
        return;
392
0
    }
393
0
    if (node->fRuleRoot) {
394
0
        dest->addElement(node, *fStatus);
395
        // Note: rules cannot nest. If we found a rule start node,
396
        //       no child node can also be a start node.
397
0
        return;
398
0
    }
399
0
    addRuleRootNodes(dest, node->fLeftChild);
400
0
    addRuleRootNodes(dest, node->fRightChild);
401
0
}
402
403
//-----------------------------------------------------------------------------
404
//
405
//   calcChainedFollowPos.    Modify the previously calculated followPos sets
406
//                            to implement rule chaining.  NOT described by Aho
407
//
408
//-----------------------------------------------------------------------------
409
0
void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNode) {
410
411
0
    UVector         leafNodes(*fStatus);
412
0
    if (U_FAILURE(*fStatus)) {
413
0
        return;
414
0
    }
415
416
    // get a list all leaf nodes
417
0
    tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus);
418
0
    if (U_FAILURE(*fStatus)) {
419
0
        return;
420
0
    }
421
422
    // Collect all leaf nodes that can start matches for rules
423
    // with inbound chaining enabled, which is the union of the 
424
    // firstPosition sets from each of the rule root nodes.
425
    
426
0
    UVector ruleRootNodes(*fStatus);
427
0
    addRuleRootNodes(&ruleRootNodes, tree);
428
429
0
    UVector matchStartNodes(*fStatus);
430
0
    for (int j=0; j<ruleRootNodes.size(); ++j) {
431
0
        RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));
432
0
        if (node->fChainIn) {
433
0
            setAdd(&matchStartNodes, node->fFirstPosSet);
434
0
        }
435
0
    }
436
0
    if (U_FAILURE(*fStatus)) {
437
0
        return;
438
0
    }
439
440
0
    int32_t  endNodeIx;
441
0
    int32_t  startNodeIx;
442
443
0
    for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) {
444
0
        RBBINode *endNode   = (RBBINode *)leafNodes.elementAt(endNodeIx);
445
446
        // Identify leaf nodes that correspond to overall rule match positions.
447
        // These include the endMarkNode in their followPos sets.
448
        //
449
        // Note: do not consider other end marker nodes, those that are added to
450
        //       look-ahead rules. These can't chain; a match immediately stops
451
        //       further matching. This leaves exactly one end marker node, the one
452
        //       at the end of the complete tree.
453
454
0
        if (!endNode->fFollowPos->contains(endMarkNode)) {
455
0
            continue;
456
0
        }
457
458
        // We've got a node that can end a match.
459
460
        // !!LBCMNoChain implementation:  If this node's val correspond to
461
        // the Line Break $CM char class, don't chain from it.
462
        // TODO:  Remove this. !!LBCMNoChain is deprecated, and is not used
463
        //        by any of the standard ICU rules.
464
0
        if (fRB->fLBCMNoChain) {
465
0
            UChar32 c = this->fRB->fSetBuilder->getFirstChar(endNode->fVal);
466
0
            if (c != -1) {
467
                // c == -1 occurs with sets containing only the {eof} marker string.
468
0
                ULineBreak cLBProp = (ULineBreak)u_getIntPropertyValue(c, UCHAR_LINE_BREAK);
469
0
                if (cLBProp == U_LB_COMBINING_MARK) {
470
0
                    continue;
471
0
                }
472
0
            }
473
0
        }
474
475
        // Now iterate over the nodes that can start a match, looking for ones
476
        //   with the same char class as our ending node.
477
0
        RBBINode *startNode;
478
0
        for (startNodeIx = 0; startNodeIx<matchStartNodes.size(); startNodeIx++) {
479
0
            startNode = (RBBINode *)matchStartNodes.elementAt(startNodeIx);
480
0
            if (startNode->fType != RBBINode::leafChar) {
481
0
                continue;
482
0
            }
483
484
0
            if (endNode->fVal == startNode->fVal) {
485
                // The end val (character class) of one possible match is the
486
                //   same as the start of another.
487
488
                // Add all nodes from the followPos of the start node to the
489
                //  followPos set of the end node, which will have the effect of
490
                //  letting matches transition from a match state at endNode
491
                //  to the second char of a match starting with startNode.
492
0
                setAdd(endNode->fFollowPos, startNode->fFollowPos);
493
0
            }
494
0
        }
495
0
    }
496
0
}
497
498
499
//-----------------------------------------------------------------------------
500
//
501
//   bofFixup.    Fixup for state tables that include {bof} beginning of input testing.
502
//                Do an swizzle similar to chaining, modifying the followPos set of
503
//                the bofNode to include the followPos nodes from other {bot} nodes
504
//                scattered through the tree.
505
//
506
//                This function has much in common with calcChainedFollowPos().
507
//
508
//-----------------------------------------------------------------------------
509
0
void RBBITableBuilder::bofFixup() {
510
511
0
    if (U_FAILURE(*fStatus)) {
512
0
        return;
513
0
    }
514
515
    //   The parse tree looks like this ...
516
    //         fTree root  --->       <cat>
517
    //                               /     \       .
518
    //                            <cat>   <#end node>
519
    //                           /     \  .
520
    //                     <bofNode>   rest
521
    //                               of tree
522
    //
523
    //    We will be adding things to the followPos set of the <bofNode>
524
    //
525
0
    RBBINode  *bofNode = fTree->fLeftChild->fLeftChild;
526
0
    U_ASSERT(bofNode->fType == RBBINode::leafChar);
527
0
    U_ASSERT(bofNode->fVal == 2);
528
529
    // Get all nodes that can be the start a match of the user-written rules
530
    //  (excluding the fake bofNode)
531
    //  We want the nodes that can start a match in the
532
    //     part labeled "rest of tree"
533
    // 
534
0
    UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet;
535
536
0
    RBBINode *startNode;
537
0
    int       startNodeIx;
538
0
    for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) {
539
0
        startNode = (RBBINode *)matchStartNodes->elementAt(startNodeIx);
540
0
        if (startNode->fType != RBBINode::leafChar) {
541
0
            continue;
542
0
        }
543
544
0
        if (startNode->fVal == bofNode->fVal) {
545
            //  We found a leaf node corresponding to a {bof} that was
546
            //    explicitly written into a rule.
547
            //  Add everything from the followPos set of this node to the
548
            //    followPos set of the fake bofNode at the start of the tree.
549
            //  
550
0
            setAdd(bofNode->fFollowPos, startNode->fFollowPos);
551
0
        }
552
0
    }
553
0
}
554
555
//-----------------------------------------------------------------------------
556
//
557
//   buildStateTable()    Determine the set of runtime DFA states and the
558
//                        transition tables for these states, by the algorithm
559
//                        of fig. 3.44 in Aho.
560
//
561
//                        Most of the comments are quotes of Aho's psuedo-code.
562
//
563
//-----------------------------------------------------------------------------
564
0
void RBBITableBuilder::buildStateTable() {
565
0
    if (U_FAILURE(*fStatus)) {
566
0
        return;
567
0
    }
568
0
    RBBIStateDescriptor *failState;
569
    // Set it to NULL to avoid uninitialized warning
570
0
    RBBIStateDescriptor *initialState = NULL; 
571
    //
572
    // Add a dummy state 0 - the stop state.  Not from Aho.
573
0
    int      lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1;
574
0
    failState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
575
0
    if (failState == NULL) {
576
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
577
0
        goto ExitBuildSTdeleteall;
578
0
    }
579
0
    failState->fPositions = new UVector(*fStatus);
580
0
    if (failState->fPositions == NULL) {
581
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
582
0
    }
583
0
    if (failState->fPositions == NULL || U_FAILURE(*fStatus)) {
584
0
        goto ExitBuildSTdeleteall;
585
0
    }
586
0
    fDStates->addElement(failState, *fStatus);
587
0
    if (U_FAILURE(*fStatus)) {
588
0
        goto ExitBuildSTdeleteall;
589
0
    }
590
591
    // initially, the only unmarked state in Dstates is firstpos(root),
592
    //       where toot is the root of the syntax tree for (r)#;
593
0
    initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
594
0
    if (initialState == NULL) {
595
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
596
0
    }
597
0
    if (U_FAILURE(*fStatus)) {
598
0
        goto ExitBuildSTdeleteall;
599
0
    }
600
0
    initialState->fPositions = new UVector(*fStatus);
601
0
    if (initialState->fPositions == NULL) {
602
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
603
0
    }
604
0
    if (U_FAILURE(*fStatus)) {
605
0
        goto ExitBuildSTdeleteall;
606
0
    }
607
0
    setAdd(initialState->fPositions, fTree->fFirstPosSet);
608
0
    fDStates->addElement(initialState, *fStatus);
609
0
    if (U_FAILURE(*fStatus)) {
610
0
        goto ExitBuildSTdeleteall;
611
0
    }
612
613
    // while there is an unmarked state T in Dstates do begin
614
0
    for (;;) {
615
0
        RBBIStateDescriptor *T = NULL;
616
0
        int32_t              tx;
617
0
        for (tx=1; tx<fDStates->size(); tx++) {
618
0
            RBBIStateDescriptor *temp;
619
0
            temp = (RBBIStateDescriptor *)fDStates->elementAt(tx);
620
0
            if (temp->fMarked == FALSE) {
621
0
                T = temp;
622
0
                break;
623
0
            }
624
0
        }
625
0
        if (T == NULL) {
626
0
            break;
627
0
        }
628
629
        // mark T;
630
0
        T->fMarked = TRUE;
631
632
        // for each input symbol a do begin
633
0
        int32_t  a;
634
0
        for (a = 1; a<=lastInputSymbol; a++) {
635
            // let U be the set of positions that are in followpos(p)
636
            //    for some position p in T
637
            //    such that the symbol at position p is a;
638
0
            UVector    *U = NULL;
639
0
            RBBINode   *p;
640
0
            int32_t     px;
641
0
            for (px=0; px<T->fPositions->size(); px++) {
642
0
                p = (RBBINode *)T->fPositions->elementAt(px);
643
0
                if ((p->fType == RBBINode::leafChar) &&  (p->fVal == a)) {
644
0
                    if (U == NULL) {
645
0
                        U = new UVector(*fStatus);
646
0
                        if (U == NULL) {
647
0
                          *fStatus = U_MEMORY_ALLOCATION_ERROR;
648
0
                          goto ExitBuildSTdeleteall;
649
0
                        }
650
0
                    }
651
0
                    setAdd(U, p->fFollowPos);
652
0
                }
653
0
            }
654
655
            // if U is not empty and not in DStates then
656
0
            int32_t  ux = 0;
657
0
            UBool    UinDstates = FALSE;
658
0
            if (U != NULL) {
659
0
                U_ASSERT(U->size() > 0);
660
0
                int  ix;
661
0
                for (ix=0; ix<fDStates->size(); ix++) {
662
0
                    RBBIStateDescriptor *temp2;
663
0
                    temp2 = (RBBIStateDescriptor *)fDStates->elementAt(ix);
664
0
                    if (setEquals(U, temp2->fPositions)) {
665
0
                        delete U;
666
0
                        U  = temp2->fPositions;
667
0
                        ux = ix;
668
0
                        UinDstates = TRUE;
669
0
                        break;
670
0
                    }
671
0
                }
672
673
                // Add U as an unmarked state to Dstates
674
0
                if (!UinDstates)
675
0
                {
676
0
                    RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
677
0
                    if (newState == NULL) {
678
0
                      *fStatus = U_MEMORY_ALLOCATION_ERROR;
679
0
                    }
680
0
                    if (U_FAILURE(*fStatus)) {
681
0
                        goto ExitBuildSTdeleteall;
682
0
                    }
683
0
                    newState->fPositions = U;
684
0
                    fDStates->addElement(newState, *fStatus);
685
0
                    if (U_FAILURE(*fStatus)) {
686
0
                        return;
687
0
                    }
688
0
                    ux = fDStates->size()-1;
689
0
                }
690
691
                // Dtran[T, a] := U;
692
0
                T->fDtran->setElementAt(ux, a);
693
0
            }
694
0
        }
695
0
    }
696
0
    return;
697
    // delete local pointers only if error occured.
698
0
ExitBuildSTdeleteall:
699
0
    delete initialState;
700
0
    delete failState;
701
0
}
702
703
704
/**
705
 * mapLookAheadRules
706
 *
707
 */
708
0
void RBBITableBuilder::mapLookAheadRules() {
709
0
    fLookAheadRuleMap =  new UVector32(fRB->fScanner->numRules() + 1, *fStatus);
710
0
    if (fLookAheadRuleMap == nullptr) {
711
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
712
0
    }
713
0
    if (U_FAILURE(*fStatus)) {
714
0
        return;
715
0
    }
716
0
    fLookAheadRuleMap->setSize(fRB->fScanner->numRules() + 1);
717
718
0
    for (int32_t n=0; n<fDStates->size(); n++) {
719
0
        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
720
0
        int32_t laSlotForState = 0;
721
722
        // Establish the look-ahead slot for this state, if the state covers
723
        // any look-ahead nodes - corresponding to the '/' in look-ahead rules.
724
725
        // If any of the look-ahead nodes already have a slot assigned, use it,
726
        // otherwise assign a new one.
727
728
0
        bool sawLookAheadNode = false;
729
0
        for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
730
0
            RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
731
0
            if (node->fType != RBBINode::NodeType::lookAhead) {
732
0
                continue;
733
0
            }
734
0
            sawLookAheadNode = true;
735
0
            int32_t ruleNum = node->fVal;     // Set when rule was originally parsed.
736
0
            U_ASSERT(ruleNum < fLookAheadRuleMap->size());
737
0
            U_ASSERT(ruleNum > 0);
738
0
            int32_t laSlot = fLookAheadRuleMap->elementAti(ruleNum);
739
0
            if (laSlot != 0) {
740
0
                if (laSlotForState == 0) {
741
0
                    laSlotForState = laSlot;
742
0
                } else {
743
                    // TODO: figure out if this can fail, change to setting an error code if so.
744
0
                    U_ASSERT(laSlot == laSlotForState);
745
0
                }
746
0
            }
747
0
        }
748
0
        if (!sawLookAheadNode) {
749
0
            continue;
750
0
        }
751
752
0
        if (laSlotForState == 0) {
753
0
            laSlotForState = ++fLASlotsInUse;
754
0
        }
755
756
        // For each look ahead node covered by this state,
757
        // set the mapping from the node's rule number to the look ahead slot.
758
        // There can be multiple nodes/rule numbers going to the same la slot.
759
760
0
        for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
761
0
            RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
762
0
            if (node->fType != RBBINode::NodeType::lookAhead) {
763
0
                continue;
764
0
            }
765
0
            int32_t ruleNum = node->fVal;     // Set when rule was originally parsed.
766
0
            int32_t existingVal = fLookAheadRuleMap->elementAti(ruleNum);
767
0
            (void)existingVal;
768
0
            U_ASSERT(existingVal == 0 || existingVal == laSlotForState);
769
0
            fLookAheadRuleMap->setElementAt(laSlotForState, ruleNum);
770
0
        }
771
0
    }
772
773
0
}
774
775
//-----------------------------------------------------------------------------
776
//
777
//   flagAcceptingStates    Identify accepting states.
778
//                          First get a list of all of the end marker nodes.
779
//                          Then, for each state s,
780
//                              if s contains one of the end marker nodes in its list of tree positions then
781
//                                  s is an accepting state.
782
//
783
//-----------------------------------------------------------------------------
784
0
void     RBBITableBuilder::flagAcceptingStates() {
785
0
    if (U_FAILURE(*fStatus)) {
786
0
        return;
787
0
    }
788
0
    UVector     endMarkerNodes(*fStatus);
789
0
    RBBINode    *endMarker;
790
0
    int32_t     i;
791
0
    int32_t     n;
792
793
0
    if (U_FAILURE(*fStatus)) {
794
0
        return;
795
0
    }
796
797
0
    fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus);
798
0
    if (U_FAILURE(*fStatus)) {
799
0
        return;
800
0
    }
801
802
0
    for (i=0; i<endMarkerNodes.size(); i++) {
803
0
        endMarker = (RBBINode *)endMarkerNodes.elementAt(i);
804
0
        for (n=0; n<fDStates->size(); n++) {
805
0
            RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
806
0
            if (sd->fPositions->indexOf(endMarker) >= 0) {
807
                // Any non-zero value for fAccepting means this is an accepting node.
808
                // The value is what will be returned to the user as the break status.
809
                // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1).
810
811
0
                if (sd->fAccepting==0) {
812
                    // State hasn't been marked as accepting yet.  Do it now.
813
0
                    sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
814
0
                    if (sd->fAccepting == 0) {
815
0
                        sd->fAccepting = ACCEPTING_UNCONDITIONAL;
816
0
                    }
817
0
                }
818
0
                if (sd->fAccepting==ACCEPTING_UNCONDITIONAL && endMarker->fVal != 0) {
819
                    // Both lookahead and non-lookahead accepting for this state.
820
                    // Favor the look-ahead, because a look-ahead match needs to
821
                    // immediately stop the run-time engine. First match, not longest.
822
0
                    sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
823
0
                }
824
                // implicit else:
825
                // if sd->fAccepting already had a value other than 0 or 1, leave it be.
826
0
            }
827
0
        }
828
0
    }
829
0
}
830
831
832
//-----------------------------------------------------------------------------
833
//
834
//    flagLookAheadStates   Very similar to flagAcceptingStates, above.
835
//
836
//-----------------------------------------------------------------------------
837
0
void     RBBITableBuilder::flagLookAheadStates() {
838
0
    if (U_FAILURE(*fStatus)) {
839
0
        return;
840
0
    }
841
0
    UVector     lookAheadNodes(*fStatus);
842
0
    RBBINode    *lookAheadNode;
843
0
    int32_t     i;
844
0
    int32_t     n;
845
846
0
    fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus);
847
0
    if (U_FAILURE(*fStatus)) {
848
0
        return;
849
0
    }
850
0
    for (i=0; i<lookAheadNodes.size(); i++) {
851
0
        lookAheadNode = (RBBINode *)lookAheadNodes.elementAt(i);
852
0
        U_ASSERT(lookAheadNode->fType == RBBINode::NodeType::lookAhead);
853
854
0
        for (n=0; n<fDStates->size(); n++) {
855
0
            RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
856
0
            int32_t positionsIdx = sd->fPositions->indexOf(lookAheadNode);
857
0
            if (positionsIdx >= 0) {
858
0
                U_ASSERT(lookAheadNode == sd->fPositions->elementAt(positionsIdx));
859
0
                uint32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal);
860
0
                U_ASSERT(sd->fLookAhead == 0 || sd->fLookAhead == lookaheadSlot);
861
                // if (sd->fLookAhead != 0 && sd->fLookAhead != lookaheadSlot) {
862
                //     printf("%s:%d Bingo. sd->fLookAhead:%d   lookaheadSlot:%d\n",
863
                //            __FILE__, __LINE__, sd->fLookAhead, lookaheadSlot);
864
                // }
865
0
                sd->fLookAhead = lookaheadSlot;
866
0
            }
867
0
        }
868
0
    }
869
0
}
870
871
872
873
874
//-----------------------------------------------------------------------------
875
//
876
//    flagTaggedStates
877
//
878
//-----------------------------------------------------------------------------
879
0
void     RBBITableBuilder::flagTaggedStates() {
880
0
    if (U_FAILURE(*fStatus)) {
881
0
        return;
882
0
    }
883
0
    UVector     tagNodes(*fStatus);
884
0
    RBBINode    *tagNode;
885
0
    int32_t     i;
886
0
    int32_t     n;
887
888
0
    if (U_FAILURE(*fStatus)) {
889
0
        return;
890
0
    }
891
0
    fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus);
892
0
    if (U_FAILURE(*fStatus)) {
893
0
        return;
894
0
    }
895
0
    for (i=0; i<tagNodes.size(); i++) {                   // For each tag node t (all of 'em)
896
0
        tagNode = (RBBINode *)tagNodes.elementAt(i);
897
898
0
        for (n=0; n<fDStates->size(); n++) {              //    For each state  s (row in the state table)
899
0
            RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
900
0
            if (sd->fPositions->indexOf(tagNode) >= 0) {  //       if  s include the tag node t
901
0
                sortedAdd(&sd->fTagVals, tagNode->fVal);
902
0
            }
903
0
        }
904
0
    }
905
0
}
906
907
908
909
910
//-----------------------------------------------------------------------------
911
//
912
//  mergeRuleStatusVals
913
//
914
//      Update the global table of rule status {tag} values
915
//      The rule builder has a global vector of status values that are common
916
//      for all tables.  Merge the ones from this table into the global set.
917
//
918
//-----------------------------------------------------------------------------
919
0
void  RBBITableBuilder::mergeRuleStatusVals() {
920
    //
921
    //  The basic outline of what happens here is this...
922
    //
923
    //    for each state in this state table
924
    //       if the status tag list for this state is in the global statuses list
925
    //           record where and
926
    //           continue with the next state
927
    //       else
928
    //           add the tag list for this state to the global list.
929
    //
930
0
    int i;
931
0
    int n;
932
933
    // Pre-set a single tag of {0} into the table.
934
    //   We will need this as a default, for rule sets with no explicit tagging.
935
0
    if (fRB->fRuleStatusVals->size() == 0) {
936
0
        fRB->fRuleStatusVals->addElement(1, *fStatus);  // Num of statuses in group
937
0
        fRB->fRuleStatusVals->addElement((int32_t)0, *fStatus);  //   and our single status of zero
938
0
    }
939
940
    //    For each state
941
0
    for (n=0; n<fDStates->size(); n++) {
942
0
        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
943
0
        UVector *thisStatesTagValues = sd->fTagVals;
944
0
        if (thisStatesTagValues == NULL) {
945
            // No tag values are explicitly associated with this state.
946
            //   Set the default tag value.
947
0
            sd->fTagsIdx = 0;
948
0
            continue;
949
0
        }
950
951
        // There are tag(s) associated with this state.
952
        //   fTagsIdx will be the index into the global tag list for this state's tag values.
953
        //   Initial value of -1 flags that we haven't got it set yet.
954
0
        sd->fTagsIdx = -1;
955
0
        int32_t  thisTagGroupStart = 0;   // indexes into the global rule status vals list
956
0
        int32_t  nextTagGroupStart = 0;
957
958
        // Loop runs once per group of tags in the global list
959
0
        while (nextTagGroupStart < fRB->fRuleStatusVals->size()) {
960
0
            thisTagGroupStart = nextTagGroupStart;
961
0
            nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1;
962
0
            if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) {
963
                // The number of tags for this state is different from
964
                //    the number of tags in this group from the global list.
965
                //    Continue with the next group from the global list.
966
0
                continue;
967
0
            }
968
            // The lengths match, go ahead and compare the actual tag values
969
            //    between this state and the group from the global list.
970
0
            for (i=0; i<thisStatesTagValues->size(); i++) {
971
0
                if (thisStatesTagValues->elementAti(i) !=
972
0
                    fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) {
973
                    // Mismatch.
974
0
                    break;
975
0
                }
976
0
            }
977
978
0
            if (i == thisStatesTagValues->size()) {
979
                // We found a set of tag values in the global list that match
980
                //   those for this state.  Use them.
981
0
                sd->fTagsIdx = thisTagGroupStart;
982
0
                break;
983
0
            }
984
0
        }
985
986
0
        if (sd->fTagsIdx == -1) {
987
            // No suitable entry in the global tag list already.  Add one
988
0
            sd->fTagsIdx = fRB->fRuleStatusVals->size();
989
0
            fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus);
990
0
            for (i=0; i<thisStatesTagValues->size(); i++) {
991
0
                fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus);
992
0
            }
993
0
        }
994
0
    }
995
0
}
996
997
998
999
1000
1001
1002
1003
//-----------------------------------------------------------------------------
1004
//
1005
//  sortedAdd  Add a value to a vector of sorted values (ints).
1006
//             Do not replicate entries; if the value is already there, do not
1007
//                add a second one.
1008
//             Lazily create the vector if it does not already exist.
1009
//
1010
//-----------------------------------------------------------------------------
1011
0
void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) {
1012
0
    int32_t i;
1013
1014
0
    if (*vector == NULL) {
1015
0
        *vector = new UVector(*fStatus);
1016
0
    }
1017
0
    if (*vector == NULL || U_FAILURE(*fStatus)) {
1018
0
        return;
1019
0
    }
1020
0
    UVector *vec = *vector;
1021
0
    int32_t  vSize = vec->size();
1022
0
    for (i=0; i<vSize; i++) {
1023
0
        int32_t valAtI = vec->elementAti(i);
1024
0
        if (valAtI == val) {
1025
            // The value is already in the vector.  Don't add it again.
1026
0
            return;
1027
0
        }
1028
0
        if (valAtI > val) {
1029
0
            break;
1030
0
        }
1031
0
    }
1032
0
    vec->insertElementAt(val, i, *fStatus);
1033
0
}
1034
1035
1036
1037
//-----------------------------------------------------------------------------
1038
//
1039
//  setAdd     Set operation on UVector
1040
//             dest = dest union source
1041
//             Elements may only appear once and must be sorted.
1042
//
1043
//-----------------------------------------------------------------------------
1044
0
void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
1045
0
    int32_t destOriginalSize = dest->size();
1046
0
    int32_t sourceSize       = source->size();
1047
0
    int32_t di           = 0;
1048
0
    MaybeStackArray<void *, 16> destArray, sourceArray;  // Handle small cases without malloc
1049
0
    void **destPtr, **sourcePtr;
1050
0
    void **destLim, **sourceLim;
1051
1052
0
    if (destOriginalSize > destArray.getCapacity()) {
1053
0
        if (destArray.resize(destOriginalSize) == NULL) {
1054
0
            return;
1055
0
        }
1056
0
    }
1057
0
    destPtr = destArray.getAlias();
1058
0
    destLim = destPtr + destOriginalSize;  // destArray.getArrayLimit()?
1059
1060
0
    if (sourceSize > sourceArray.getCapacity()) {
1061
0
        if (sourceArray.resize(sourceSize) == NULL) {
1062
0
            return;
1063
0
        }
1064
0
    }
1065
0
    sourcePtr = sourceArray.getAlias();
1066
0
    sourceLim = sourcePtr + sourceSize;  // sourceArray.getArrayLimit()?
1067
1068
    // Avoid multiple "get element" calls by getting the contents into arrays
1069
0
    (void) dest->toArray(destPtr);
1070
0
    (void) source->toArray(sourcePtr);
1071
1072
0
    dest->setSize(sourceSize+destOriginalSize, *fStatus);
1073
1074
0
    while (sourcePtr < sourceLim && destPtr < destLim) {
1075
0
        if (*destPtr == *sourcePtr) {
1076
0
            dest->setElementAt(*sourcePtr++, di++);
1077
0
            destPtr++;
1078
0
        }
1079
        // This check is required for machines with segmented memory, like i5/OS.
1080
        // Direct pointer comparison is not recommended.
1081
0
        else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) {
1082
0
            dest->setElementAt(*destPtr++, di++);
1083
0
        }
1084
0
        else { /* *sourcePtr < *destPtr */
1085
0
            dest->setElementAt(*sourcePtr++, di++);
1086
0
        }
1087
0
    }
1088
1089
    // At most one of these two cleanup loops will execute
1090
0
    while (destPtr < destLim) {
1091
0
        dest->setElementAt(*destPtr++, di++);
1092
0
    }
1093
0
    while (sourcePtr < sourceLim) {
1094
0
        dest->setElementAt(*sourcePtr++, di++);
1095
0
    }
1096
1097
0
    dest->setSize(di, *fStatus);
1098
0
}
1099
1100
1101
1102
//-----------------------------------------------------------------------------
1103
//
1104
//  setEqual    Set operation on UVector.
1105
//              Compare for equality.
1106
//              Elements must be sorted.
1107
//
1108
//-----------------------------------------------------------------------------
1109
0
UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) {
1110
0
    return a->equals(*b);
1111
0
}
1112
1113
1114
//-----------------------------------------------------------------------------
1115
//
1116
//  printPosSets   Debug function.  Dump Nullable, firstpos, lastpos and followpos
1117
//                 for each node in the tree.
1118
//
1119
//-----------------------------------------------------------------------------
1120
#ifdef RBBI_DEBUG
1121
void RBBITableBuilder::printPosSets(RBBINode *n) {
1122
    if (n==NULL) {
1123
        return;
1124
    }
1125
    printf("\n");
1126
    RBBINode::printNodeHeader();
1127
    RBBINode::printNode(n);
1128
    RBBIDebugPrintf("         Nullable:  %s\n", n->fNullable?"TRUE":"FALSE");
1129
1130
    RBBIDebugPrintf("         firstpos:  ");
1131
    printSet(n->fFirstPosSet);
1132
1133
    RBBIDebugPrintf("         lastpos:   ");
1134
    printSet(n->fLastPosSet);
1135
1136
    RBBIDebugPrintf("         followpos: ");
1137
    printSet(n->fFollowPos);
1138
1139
    printPosSets(n->fLeftChild);
1140
    printPosSets(n->fRightChild);
1141
}
1142
#endif
1143
1144
//
1145
//    findDuplCharClassFrom()
1146
//
1147
0
bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
1148
0
    int32_t numStates = fDStates->size();
1149
0
    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
1150
1151
0
    for (; categories->first < numCols-1; categories->first++) {
1152
        // Note: dictionary & non-dictionary columns cannot be merged.
1153
        //       The limitSecond value prevents considering mixed pairs.
1154
        //       Dictionary categories are >= DictCategoriesStart.
1155
        //       Non dict categories are   <  DictCategoriesStart.
1156
0
        int limitSecond = categories->first < fRB->fSetBuilder->getDictCategoriesStart() ?
1157
0
            fRB->fSetBuilder->getDictCategoriesStart() : numCols;
1158
0
        for (categories->second=categories->first+1; categories->second < limitSecond; categories->second++) {
1159
            // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).
1160
0
            uint16_t table_base = 0;
1161
0
            uint16_t table_dupl = 1;
1162
0
            for (int32_t state=0; state<numStates; state++) {
1163
0
                RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
1164
0
                table_base = (uint16_t)sd->fDtran->elementAti(categories->first);
1165
0
                table_dupl = (uint16_t)sd->fDtran->elementAti(categories->second);
1166
0
                if (table_base != table_dupl) {
1167
0
                    break;
1168
0
                }
1169
0
            }
1170
0
            if (table_base == table_dupl) {
1171
0
                return true;
1172
0
            }
1173
0
        }
1174
0
    }
1175
0
    return false;
1176
0
}
1177
1178
1179
//
1180
//    removeColumn()
1181
//
1182
0
void RBBITableBuilder::removeColumn(int32_t column) {
1183
0
    int32_t numStates = fDStates->size();
1184
0
    for (int32_t state=0; state<numStates; state++) {
1185
0
        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
1186
0
        U_ASSERT(column < sd->fDtran->size());
1187
0
        sd->fDtran->removeElementAt(column);
1188
0
    }
1189
0
}
1190
1191
/*
1192
 * findDuplicateState
1193
 */
1194
0
bool RBBITableBuilder::findDuplicateState(IntPair *states) {
1195
0
    int32_t numStates = fDStates->size();
1196
0
    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
1197
1198
0
    for (; states->first<numStates-1; states->first++) {
1199
0
        RBBIStateDescriptor *firstSD = (RBBIStateDescriptor *)fDStates->elementAt(states->first);
1200
0
        for (states->second=states->first+1; states->second<numStates; states->second++) {
1201
0
            RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(states->second);
1202
0
            if (firstSD->fAccepting != duplSD->fAccepting ||
1203
0
                firstSD->fLookAhead != duplSD->fLookAhead ||
1204
0
                firstSD->fTagsIdx   != duplSD->fTagsIdx) {
1205
0
                continue;
1206
0
            }
1207
0
            bool rowsMatch = true;
1208
0
            for (int32_t col=0; col < numCols; ++col) {
1209
0
                int32_t firstVal = firstSD->fDtran->elementAti(col);
1210
0
                int32_t duplVal = duplSD->fDtran->elementAti(col);
1211
0
                if (!((firstVal == duplVal) ||
1212
0
                        ((firstVal == states->first || firstVal == states->second) &&
1213
0
                        (duplVal  == states->first || duplVal  == states->second)))) {
1214
0
                    rowsMatch = false;
1215
0
                    break;
1216
0
                }
1217
0
            }
1218
0
            if (rowsMatch) {
1219
0
                return true;
1220
0
            }
1221
0
        }
1222
0
    }
1223
0
    return false;
1224
0
}
1225
1226
1227
0
bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
1228
0
    int32_t numStates = fSafeTable->size();
1229
1230
0
    for (; states->first<numStates-1; states->first++) {
1231
0
        UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
1232
0
        for (states->second=states->first+1; states->second<numStates; states->second++) {
1233
0
            UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
1234
0
            bool rowsMatch = true;
1235
0
            int32_t numCols = firstRow->length();
1236
0
            for (int32_t col=0; col < numCols; ++col) {
1237
0
                int32_t firstVal = firstRow->charAt(col);
1238
0
                int32_t duplVal = duplRow->charAt(col);
1239
0
                if (!((firstVal == duplVal) ||
1240
0
                        ((firstVal == states->first || firstVal == states->second) &&
1241
0
                        (duplVal  == states->first || duplVal  == states->second)))) {
1242
0
                    rowsMatch = false;
1243
0
                    break;
1244
0
                }
1245
0
            }
1246
0
            if (rowsMatch) {
1247
0
                return true;
1248
0
            }
1249
0
        }
1250
0
    }
1251
0
    return false;
1252
0
}
1253
1254
1255
0
void RBBITableBuilder::removeState(IntPair duplStates) {
1256
0
    const int32_t keepState = duplStates.first;
1257
0
    const int32_t duplState = duplStates.second;
1258
0
    U_ASSERT(keepState < duplState);
1259
0
    U_ASSERT(duplState < fDStates->size());
1260
1261
0
    RBBIStateDescriptor *duplSD = (RBBIStateDescriptor *)fDStates->elementAt(duplState);
1262
0
    fDStates->removeElementAt(duplState);
1263
0
    delete duplSD;
1264
1265
0
    int32_t numStates = fDStates->size();
1266
0
    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
1267
0
    for (int32_t state=0; state<numStates; ++state) {
1268
0
        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
1269
0
        for (int32_t col=0; col<numCols; col++) {
1270
0
            int32_t existingVal = sd->fDtran->elementAti(col);
1271
0
            int32_t newVal = existingVal;
1272
0
            if (existingVal == duplState) {
1273
0
                newVal = keepState;
1274
0
            } else if (existingVal > duplState) {
1275
0
                newVal = existingVal - 1;
1276
0
            }
1277
0
            sd->fDtran->setElementAt(newVal, col);
1278
0
        }
1279
0
    }
1280
0
}
1281
1282
0
void RBBITableBuilder::removeSafeState(IntPair duplStates) {
1283
0
    const int32_t keepState = duplStates.first;
1284
0
    const int32_t duplState = duplStates.second;
1285
0
    U_ASSERT(keepState < duplState);
1286
0
    U_ASSERT(duplState < fSafeTable->size());
1287
1288
0
    fSafeTable->removeElementAt(duplState);   // Note that fSafeTable has a deleter function
1289
                                              // and will auto-delete the removed element.
1290
0
    int32_t numStates = fSafeTable->size();
1291
0
    for (int32_t state=0; state<numStates; ++state) {
1292
0
        UnicodeString *sd = (UnicodeString *)fSafeTable->elementAt(state);
1293
0
        int32_t numCols = sd->length();
1294
0
        for (int32_t col=0; col<numCols; col++) {
1295
0
            int32_t existingVal = sd->charAt(col);
1296
0
            int32_t newVal = existingVal;
1297
0
            if (existingVal == duplState) {
1298
0
                newVal = keepState;
1299
0
            } else if (existingVal > duplState) {
1300
0
                newVal = existingVal - 1;
1301
0
            }
1302
0
            sd->setCharAt(col, static_cast<char16_t>(newVal));
1303
0
        }
1304
0
    }
1305
0
}
1306
1307
1308
/*
1309
 * RemoveDuplicateStates
1310
 */
1311
0
int32_t RBBITableBuilder::removeDuplicateStates() {
1312
0
    IntPair dupls = {3, 0};
1313
0
    int32_t numStatesRemoved = 0;
1314
1315
0
    while (findDuplicateState(&dupls)) {
1316
        // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
1317
0
        removeState(dupls);
1318
0
        ++numStatesRemoved;
1319
0
    }
1320
0
    return numStatesRemoved;
1321
0
}
1322
1323
1324
//-----------------------------------------------------------------------------
1325
//
1326
//   getTableSize()    Calculate the size of the runtime form of this
1327
//                     state transition table.
1328
//
1329
//-----------------------------------------------------------------------------
1330
0
int32_t  RBBITableBuilder::getTableSize() const {
1331
0
    int32_t    size = 0;
1332
0
    int32_t    numRows;
1333
0
    int32_t    numCols;
1334
0
    int32_t    rowSize;
1335
1336
0
    if (fTree == NULL) {
1337
0
        return 0;
1338
0
    }
1339
1340
0
    size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.
1341
1342
0
    numRows = fDStates->size();
1343
0
    numCols = fRB->fSetBuilder->getNumCharCategories();
1344
1345
0
    if (use8BitsForTable()) {
1346
0
        rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
1347
0
    } else {
1348
0
        rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
1349
0
    }
1350
0
    size   += numRows * rowSize;
1351
0
    return size;
1352
0
}
1353
1354
0
bool RBBITableBuilder::use8BitsForTable() const {
1355
0
    return fDStates->size() <= kMaxStateFor8BitsTable;
1356
0
}
1357
1358
//-----------------------------------------------------------------------------
1359
//
1360
//   exportTable()    export the state transition table in the format required
1361
//                    by the runtime engine.  getTableSize() bytes of memory
1362
//                    must be available at the output address "where".
1363
//
1364
//-----------------------------------------------------------------------------
1365
0
void RBBITableBuilder::exportTable(void *where) {
1366
0
    RBBIStateTable    *table = (RBBIStateTable *)where;
1367
0
    uint32_t           state;
1368
0
    int                col;
1369
1370
0
    if (U_FAILURE(*fStatus) || fTree == NULL) {
1371
0
        return;
1372
0
    }
1373
1374
0
    int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
1375
0
    if (catCount > 0x7fff ||
1376
0
        fDStates->size() > 0x7fff) {
1377
0
        *fStatus = U_BRK_INTERNAL_ERROR;
1378
0
        return;
1379
0
    }
1380
1381
0
    table->fNumStates = fDStates->size();
1382
0
    table->fDictCategoriesStart = fRB->fSetBuilder->getDictCategoriesStart();
1383
0
    table->fLookAheadResultsSize = fLASlotsInUse == ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1;
1384
0
    table->fFlags     = 0;
1385
0
    if (use8BitsForTable()) {
1386
0
        table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
1387
0
        table->fFlags  |= RBBI_8BITS_ROWS;
1388
0
    } else {
1389
0
        table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
1390
0
    }
1391
0
    if (fRB->fLookAheadHardBreak) {
1392
0
        table->fFlags  |= RBBI_LOOKAHEAD_HARD_BREAK;
1393
0
    }
1394
0
    if (fRB->fSetBuilder->sawBOF()) {
1395
0
        table->fFlags  |= RBBI_BOF_REQUIRED;
1396
0
    }
1397
1398
0
    for (state=0; state<table->fNumStates; state++) {
1399
0
        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(state);
1400
0
        RBBIStateTableRow   *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
1401
0
        if (use8BitsForTable()) {
1402
0
            U_ASSERT (sd->fAccepting <= 255);
1403
0
            U_ASSERT (sd->fLookAhead <= 255);
1404
0
            U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255);
1405
0
            RBBIStateTableRow8 *r8 = (RBBIStateTableRow8*)row;
1406
0
            r8->fAccepting = sd->fAccepting;
1407
0
            r8->fLookAhead = sd->fLookAhead;
1408
0
            r8->fTagsIdx   = sd->fTagsIdx;
1409
0
            for (col=0; col<catCount; col++) {
1410
0
                U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable);
1411
0
                r8->fNextState[col] = sd->fDtran->elementAti(col);
1412
0
            }
1413
0
        } else {
1414
0
            U_ASSERT (sd->fAccepting <= 0xffff);
1415
0
            U_ASSERT (sd->fLookAhead <= 0xffff);
1416
0
            U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 0xffff);
1417
0
            row->r16.fAccepting = sd->fAccepting;
1418
0
            row->r16.fLookAhead = sd->fLookAhead;
1419
0
            row->r16.fTagsIdx   = sd->fTagsIdx;
1420
0
            for (col=0; col<catCount; col++) {
1421
0
                row->r16.fNextState[col] = sd->fDtran->elementAti(col);
1422
0
            }
1423
0
        }
1424
0
    }
1425
0
}
1426
1427
1428
/**
1429
 *   Synthesize a safe state table from the main state table.
1430
 */
1431
0
void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
1432
    // The safe table creation has three steps:
1433
1434
    // 1. Identifiy pairs of character classes that are "safe." Safe means that boundaries
1435
    // following the pair do not depend on context or state before the pair. To test
1436
    // whether a pair is safe, run it through the main forward state table, starting
1437
    // from each state. If the the final state is the same, no matter what the starting state,
1438
    // the pair is safe.
1439
    //
1440
    // 2. Build a state table that recognizes the safe pairs. It's similar to their
1441
    // forward table, with a column for each input character [class], and a row for
1442
    // each state. Row 1 is the start state, and row 0 is the stop state. Initially
1443
    // create an additional state for each input character category; being in
1444
    // one of these states means that the character has been seen, and is potentially
1445
    // the first of a pair. In each of these rows, the entry for the second character
1446
    // of a safe pair is set to the stop state (0), indicating that a match was found.
1447
    // All other table entries are set to the state corresponding the current input
1448
    // character, allowing that charcter to be the of a start following pair.
1449
    //
1450
    // Because the safe rules are to be run in reverse, moving backwards in the text,
1451
    // the first and second pair categories are swapped when building the table.
1452
    //
1453
    // 3. Compress the table. There are typically many rows (states) that are
1454
    // equivalent - that have zeroes (match completed) in the same columns -
1455
    // and can be folded together.
1456
1457
    // Each safe pair is stored as two UChars in the safePair string.
1458
0
    UnicodeString safePairs;
1459
1460
0
    int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories();
1461
0
    int32_t numStates = fDStates->size();
1462
1463
0
    for (int32_t c1=0; c1<numCharClasses; ++c1) {
1464
0
        for (int32_t c2=0; c2 < numCharClasses; ++c2) {
1465
0
            int32_t wantedEndState = -1;
1466
0
            int32_t endState = 0;
1467
0
            for (int32_t startState = 1; startState < numStates; ++startState) {
1468
0
                RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState));
1469
0
                int32_t s2 = startStateD->fDtran->elementAti(c1);
1470
0
                RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2));
1471
0
                endState = s2StateD->fDtran->elementAti(c2);
1472
0
                if (wantedEndState < 0) {
1473
0
                    wantedEndState = endState;
1474
0
                } else {
1475
0
                    if (wantedEndState != endState) {
1476
0
                        break;
1477
0
                    }
1478
0
                }
1479
0
            }
1480
0
            if (wantedEndState == endState) {
1481
0
                safePairs.append((char16_t)c1);
1482
0
                safePairs.append((char16_t)c2);
1483
                // printf("(%d, %d) ", c1, c2);
1484
0
            }
1485
0
        }
1486
        // printf("\n");
1487
0
    }
1488
1489
    // Populate the initial safe table.
1490
    // The table as a whole is UVector<UnicodeString>
1491
    // Each row is represented by a UnicodeString, being used as a Vector<int16>.
1492
    // Row 0 is the stop state.
1493
    // Row 1 is the start sate.
1494
    // Row 2 and beyond are other states, initially one per char class, but
1495
    //   after initial construction, many of the states will be combined, compacting the table.
1496
    // The String holds the nextState data only. The four leading fields of a row, fAccepting,
1497
    // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
1498
1499
0
    U_ASSERT(fSafeTable == nullptr);
1500
0
    fSafeTable = new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status);
1501
0
    for (int32_t row=0; row<numCharClasses + 2; ++row) {
1502
0
        fSafeTable->addElement(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
1503
0
    }
1504
1505
    // From the start state, each input char class transitions to the state for that input.
1506
0
    UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
1507
0
    for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
1508
        // Note: +2 for the start & stop state.
1509
0
        startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));
1510
0
    }
1511
1512
    // Initially make every other state table row look like the start state row,
1513
0
    for (int32_t row=2; row<numCharClasses+2; ++row) {
1514
0
        UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row));
1515
0
        rowState = startState;   // UnicodeString assignment, copies contents.
1516
0
    }
1517
1518
    // Run through the safe pairs, set the next state to zero when pair has been seen.
1519
    // Zero being the stop state, meaning we found a safe point.
1520
0
    for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
1521
0
        int32_t c1 = safePairs.charAt(pairIdx);
1522
0
        int32_t c2 = safePairs.charAt(pairIdx + 1);
1523
1524
0
        UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2));
1525
0
        rowState.setCharAt(c1, 0);
1526
0
    }
1527
1528
    // Remove duplicate or redundant rows from the table.
1529
0
    IntPair states = {1, 0};
1530
0
    while (findDuplicateSafeState(&states)) {
1531
        // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
1532
0
        removeSafeState(states);
1533
0
    }
1534
0
}
1535
1536
1537
//-----------------------------------------------------------------------------
1538
//
1539
//   getSafeTableSize()    Calculate the size of the runtime form of this
1540
//                         safe state table.
1541
//
1542
//-----------------------------------------------------------------------------
1543
0
int32_t  RBBITableBuilder::getSafeTableSize() const {
1544
0
    int32_t    size = 0;
1545
0
    int32_t    numRows;
1546
0
    int32_t    numCols;
1547
0
    int32_t    rowSize;
1548
1549
0
    if (fSafeTable == nullptr) {
1550
0
        return 0;
1551
0
    }
1552
1553
0
    size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.
1554
1555
0
    numRows = fSafeTable->size();
1556
0
    numCols = fRB->fSetBuilder->getNumCharCategories();
1557
1558
0
    if (use8BitsForSafeTable()) {
1559
0
        rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
1560
0
    } else {
1561
0
        rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
1562
0
    }
1563
0
    size   += numRows * rowSize;
1564
0
    return size;
1565
0
}
1566
1567
0
bool RBBITableBuilder::use8BitsForSafeTable() const {
1568
0
    return fSafeTable->size() <= kMaxStateFor8BitsTable;
1569
0
}
1570
1571
//-----------------------------------------------------------------------------
1572
//
1573
//   exportSafeTable()   export the state transition table in the format required
1574
//                       by the runtime engine.  getTableSize() bytes of memory
1575
//                       must be available at the output address "where".
1576
//
1577
//-----------------------------------------------------------------------------
1578
0
void RBBITableBuilder::exportSafeTable(void *where) {
1579
0
    RBBIStateTable    *table = (RBBIStateTable *)where;
1580
0
    uint32_t           state;
1581
0
    int                col;
1582
1583
0
    if (U_FAILURE(*fStatus) || fSafeTable == nullptr) {
1584
0
        return;
1585
0
    }
1586
1587
0
    int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
1588
0
    if (catCount > 0x7fff ||
1589
0
            fSafeTable->size() > 0x7fff) {
1590
0
        *fStatus = U_BRK_INTERNAL_ERROR;
1591
0
        return;
1592
0
    }
1593
1594
0
    table->fNumStates = fSafeTable->size();
1595
0
    table->fFlags     = 0;
1596
0
    if (use8BitsForSafeTable()) {
1597
0
        table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
1598
0
        table->fFlags  |= RBBI_8BITS_ROWS;
1599
0
    } else {
1600
0
        table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
1601
0
    }
1602
1603
0
    for (state=0; state<table->fNumStates; state++) {
1604
0
        UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(state);
1605
0
        RBBIStateTableRow   *row = (RBBIStateTableRow *)(table->fTableData + state*table->fRowLen);
1606
0
        if (use8BitsForSafeTable()) {
1607
0
            RBBIStateTableRow8 *r8 = (RBBIStateTableRow8*)row;
1608
0
            r8->fAccepting = 0;
1609
0
            r8->fLookAhead = 0;
1610
0
            r8->fTagsIdx    = 0;
1611
0
            for (col=0; col<catCount; col++) {
1612
0
                U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable);
1613
0
                r8->fNextState[col] = static_cast<uint8_t>(rowString->charAt(col));
1614
0
            }
1615
0
        } else {
1616
0
            row->r16.fAccepting = 0;
1617
0
            row->r16.fLookAhead = 0;
1618
0
            row->r16.fTagsIdx    = 0;
1619
0
            for (col=0; col<catCount; col++) {
1620
0
                row->r16.fNextState[col] = rowString->charAt(col);
1621
0
            }
1622
0
        }
1623
0
    }
1624
0
}
1625
1626
1627
1628
1629
//-----------------------------------------------------------------------------
1630
//
1631
//   printSet    Debug function.   Print the contents of a UVector
1632
//
1633
//-----------------------------------------------------------------------------
1634
#ifdef RBBI_DEBUG
1635
void RBBITableBuilder::printSet(UVector *s) {
1636
    int32_t  i;
1637
    for (i=0; i<s->size(); i++) {
1638
        const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i));
1639
        RBBIDebugPrintf("%5d", v==NULL? -1 : v->fSerialNum);
1640
    }
1641
    RBBIDebugPrintf("\n");
1642
}
1643
#endif
1644
1645
1646
//-----------------------------------------------------------------------------
1647
//
1648
//   printStates    Debug Function.  Dump the fully constructed state transition table.
1649
//
1650
//-----------------------------------------------------------------------------
1651
#ifdef RBBI_DEBUG
1652
void RBBITableBuilder::printStates() {
1653
    int     c;    // input "character"
1654
    int     n;    // state number
1655
1656
    RBBIDebugPrintf("state |           i n p u t     s y m b o l s \n");
1657
    RBBIDebugPrintf("      | Acc  LA    Tag");
1658
    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1659
        RBBIDebugPrintf(" %3d", c);
1660
    }
1661
    RBBIDebugPrintf("\n");
1662
    RBBIDebugPrintf("      |---------------");
1663
    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1664
        RBBIDebugPrintf("----");
1665
    }
1666
    RBBIDebugPrintf("\n");
1667
1668
    for (n=0; n<fDStates->size(); n++) {
1669
        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
1670
        RBBIDebugPrintf("  %3d | " , n);
1671
        RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx);
1672
        for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1673
            RBBIDebugPrintf(" %3d", sd->fDtran->elementAti(c));
1674
        }
1675
        RBBIDebugPrintf("\n");
1676
    }
1677
    RBBIDebugPrintf("\n\n");
1678
}
1679
#endif
1680
1681
1682
//-----------------------------------------------------------------------------
1683
//
1684
//   printSafeTable    Debug Function.  Dump the fully constructed safe table.
1685
//
1686
//-----------------------------------------------------------------------------
1687
#ifdef RBBI_DEBUG
1688
void RBBITableBuilder::printReverseTable() {
1689
    int     c;    // input "character"
1690
    int     n;    // state number
1691
1692
    RBBIDebugPrintf("    Safe Reverse Table \n");
1693
    if (fSafeTable == nullptr) {
1694
        RBBIDebugPrintf("   --- nullptr ---\n");
1695
        return;
1696
    }
1697
    RBBIDebugPrintf("state |           i n p u t     s y m b o l s \n");
1698
    RBBIDebugPrintf("      | Acc  LA    Tag");
1699
    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1700
        RBBIDebugPrintf(" %2d", c);
1701
    }
1702
    RBBIDebugPrintf("\n");
1703
    RBBIDebugPrintf("      |---------------");
1704
    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1705
        RBBIDebugPrintf("---");
1706
    }
1707
    RBBIDebugPrintf("\n");
1708
1709
    for (n=0; n<fSafeTable->size(); n++) {
1710
        UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n);
1711
        RBBIDebugPrintf("  %3d | " , n);
1712
        RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0);  // Accepting, LookAhead, Tags
1713
        for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1714
            RBBIDebugPrintf(" %2d", rowString->charAt(c));
1715
        }
1716
        RBBIDebugPrintf("\n");
1717
    }
1718
    RBBIDebugPrintf("\n\n");
1719
}
1720
#endif
1721
1722
1723
1724
//-----------------------------------------------------------------------------
1725
//
1726
//   printRuleStatusTable    Debug Function.  Dump the common rule status table
1727
//
1728
//-----------------------------------------------------------------------------
1729
#ifdef RBBI_DEBUG
1730
void RBBITableBuilder::printRuleStatusTable() {
1731
    int32_t  thisRecord = 0;
1732
    int32_t  nextRecord = 0;
1733
    int      i;
1734
    UVector  *tbl = fRB->fRuleStatusVals;
1735
1736
    RBBIDebugPrintf("index |  tags \n");
1737
    RBBIDebugPrintf("-------------------\n");
1738
1739
    while (nextRecord < tbl->size()) {
1740
        thisRecord = nextRecord;
1741
        nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1;
1742
        RBBIDebugPrintf("%4d   ", thisRecord);
1743
        for (i=thisRecord+1; i<nextRecord; i++) {
1744
            RBBIDebugPrintf("  %5d", tbl->elementAti(i));
1745
        }
1746
        RBBIDebugPrintf("\n");
1747
    }
1748
    RBBIDebugPrintf("\n\n");
1749
}
1750
#endif
1751
1752
1753
//-----------------------------------------------------------------------------
1754
//
1755
//   RBBIStateDescriptor     Methods.  This is a very struct-like class
1756
//                           Most access is directly to the fields.
1757
//
1758
//-----------------------------------------------------------------------------
1759
1760
0
RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) {
1761
0
    fMarked    = FALSE;
1762
0
    fAccepting = 0;
1763
0
    fLookAhead = 0;
1764
0
    fTagsIdx   = 0;
1765
0
    fTagVals   = NULL;
1766
0
    fPositions = NULL;
1767
0
    fDtran     = NULL;
1768
1769
0
    fDtran     = new UVector32(lastInputSymbol+1, *fStatus);
1770
0
    if (U_FAILURE(*fStatus)) {
1771
0
        return;
1772
0
    }
1773
0
    if (fDtran == NULL) {
1774
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
1775
0
        return;
1776
0
    }
1777
0
    fDtran->setSize(lastInputSymbol+1);    // fDtran needs to be pre-sized.
1778
                                           //   It is indexed by input symbols, and will
1779
                                           //   hold  the next state number for each
1780
                                           //   symbol.
1781
0
}
1782
1783
1784
0
RBBIStateDescriptor::~RBBIStateDescriptor() {
1785
0
    delete       fPositions;
1786
0
    delete       fDtran;
1787
0
    delete       fTagVals;
1788
0
    fPositions = NULL;
1789
0
    fDtran     = NULL;
1790
0
    fTagVals   = NULL;
1791
0
}
1792
1793
U_NAMESPACE_END
1794
1795
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */