Coverage Report

Created: 2025-06-13 06:34

/src/icu/icu4c/source/common/rbbitblb.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (c) 2002-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*/
9
//
10
//  rbbitblb.cpp
11
//
12
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_BREAK_ITERATION
17
18
#include "unicode/unistr.h"
19
#include "rbbitblb.h"
20
#include "rbbirb.h"
21
#include "rbbiscan.h"
22
#include "rbbisetb.h"
23
#include "rbbidata.h"
24
#include "cstring.h"
25
#include "uassert.h"
26
#include "uvectr32.h"
27
#include "cmemory.h"
28
29
U_NAMESPACE_BEGIN
30
31
const int32_t kMaxStateFor8BitsTable = 255;
32
33
RBBITableBuilder::RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status) :
34
0
        fRB(rb),
35
0
        fTree(*rootNode),
36
0
        fStatus(&status),
37
0
        fDStates(nullptr),
38
0
        fSafeTable(nullptr) {
39
0
    if (U_FAILURE(status)) {
40
0
        return;
41
0
    }
42
    // fDStates is UVector<RBBIStateDescriptor *>
43
0
    fDStates = new UVector(status);
44
0
    if (U_SUCCESS(status) && fDStates == nullptr ) {
45
0
        status = U_MEMORY_ALLOCATION_ERROR;
46
0
    }
47
0
}
48
49
50
51
0
RBBITableBuilder::~RBBITableBuilder() {
52
0
    int i;
53
0
    for (i=0; i<fDStates->size(); i++) {
54
0
        delete static_cast<RBBIStateDescriptor*>(fDStates->elementAt(i));
55
0
    }
56
0
    delete fDStates;
57
0
    delete fSafeTable;
58
0
    delete fLookAheadRuleMap;
59
0
}
60
61
62
//-----------------------------------------------------------------------------
63
//
64
//   RBBITableBuilder::buildForwardTable  -  This is the main function for building
65
//                               the DFA state transition table from the RBBI rules parse tree.
66
//
67
//-----------------------------------------------------------------------------
68
0
void  RBBITableBuilder::buildForwardTable() {
69
70
0
    if (U_FAILURE(*fStatus)) {
71
0
        return;
72
0
    }
73
74
    // If there were no rules, just return.  This situation can easily arise
75
    //   for the reverse rules.
76
0
    if (fTree==nullptr) {
77
0
        return;
78
0
    }
79
80
    //
81
    // Walk through the tree, replacing any references to $variables with a copy of the
82
    //   parse tree for the substitution expression.
83
    //
84
0
    fTree = fTree->flattenVariables(*fStatus, 0);
85
0
    if (U_FAILURE(*fStatus)) {
86
0
        return;
87
0
    }
88
#ifdef RBBI_DEBUG
89
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "ftree")) {
90
        RBBIDebugPuts("\nParse tree after flattening variable references.");
91
        RBBINode::printTree(fTree, true);
92
    }
93
#endif
94
95
    //
96
    // If the rules contained any references to {bof} 
97
    //   add a {bof} <cat> <former root of tree> to the
98
    //   tree.  Means that all matches must start out with the 
99
    //   {bof} fake character.
100
    // 
101
0
    if (fRB->fSetBuilder->sawBOF()) {
102
0
        RBBINode *bofTop    = new RBBINode(RBBINode::opCat, *fStatus);
103
0
        if (bofTop == nullptr) {
104
0
            *fStatus = U_MEMORY_ALLOCATION_ERROR;
105
0
        }
106
0
        if (U_FAILURE(*fStatus)) {
107
0
            delete bofTop;
108
0
            return;
109
0
        }
110
0
        RBBINode *bofLeaf   = new RBBINode(RBBINode::leafChar, *fStatus);
111
        // Delete and exit if memory allocation failed.
112
0
        if (bofLeaf == nullptr) {
113
0
            *fStatus = U_MEMORY_ALLOCATION_ERROR;
114
0
        }
115
0
        if (U_FAILURE(*fStatus)) {
116
0
            delete bofLeaf;
117
0
            delete bofTop;
118
0
            return;
119
0
        }
120
0
        bofTop->fLeftChild  = bofLeaf;
121
0
        bofTop->fRightChild = fTree;
122
0
        bofLeaf->fParent    = bofTop;
123
0
        bofLeaf->fVal       = 2;      // Reserved value for {bof}.
124
0
        fTree               = bofTop;
125
0
    }
126
127
    //
128
    // Add a unique right-end marker to the expression.
129
    //   Appears as a cat-node, left child being the original tree,
130
    //   right child being the end marker.
131
    //
132
0
    RBBINode *cn = new RBBINode(RBBINode::opCat, *fStatus);
133
    // Exit if memory allocation failed.
134
0
    if (cn == nullptr) {
135
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
136
0
    }
137
0
    if (U_FAILURE(*fStatus)) {
138
0
        delete cn;
139
0
        return;
140
0
    }
141
0
    cn->fLeftChild = fTree;
142
0
    fTree->fParent = cn;
143
0
    RBBINode *endMarkerNode = cn->fRightChild = new RBBINode(RBBINode::endMark, *fStatus);
144
    // Delete and exit if memory allocation failed.
145
0
    if (cn->fRightChild == nullptr) {
146
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
147
0
    }
148
0
    if (U_FAILURE(*fStatus)) {
149
0
        delete cn;
150
0
        return;
151
0
    }
152
0
    cn->fRightChild->fParent = cn;
153
0
    fTree = cn;
154
155
    //
156
    //  Replace all references to UnicodeSets with the tree for the equivalent
157
    //      expression.
158
    //
159
0
    fTree->flattenSets(*fStatus, 0);
160
#ifdef RBBI_DEBUG
161
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "stree")) {
162
        RBBIDebugPuts("\nParse tree after flattening Unicode Set references.");
163
        RBBINode::printTree(fTree, true);
164
    }
165
#endif
166
167
168
    //
169
    // calculate the functions nullable, firstpos, lastpos and followpos on
170
    // nodes in the parse tree.
171
    //    See the algorithm description in Aho.
172
    //    Understanding how this works by looking at the code alone will be
173
    //       nearly impossible.
174
    //
175
0
    calcNullable(fTree);
176
0
    calcFirstPos(fTree);
177
0
    calcLastPos(fTree);
178
0
    calcFollowPos(fTree);
179
0
    if (fRB->fDebugEnv && uprv_strstr(fRB->fDebugEnv, "pos")) {
180
0
        RBBIDebugPuts("\n");
181
0
        printPosSets(fTree);
182
0
    }
183
184
    //
185
    //  For "chained" rules, modify the followPos sets
186
    //
187
0
    if (fRB->fChainRules) {
188
0
        calcChainedFollowPos(fTree, endMarkerNode);
189
0
    }
190
191
    //
192
    //  BOF (start of input) test fixup.
193
    //
194
0
    if (fRB->fSetBuilder->sawBOF()) {
195
0
        bofFixup();
196
0
    }
197
198
    //
199
    // Build the DFA state transition tables.
200
    //
201
0
    buildStateTable();
202
0
    mapLookAheadRules();
203
0
    flagAcceptingStates();
204
0
    flagLookAheadStates();
205
0
    flagTaggedStates();
206
207
    //
208
    // Update the global table of rule status {tag} values
209
    // The rule builder has a global vector of status values that are common
210
    //    for all tables.  Merge the ones from this table into the global set.
211
    //
212
0
    mergeRuleStatusVals();
213
0
}
214
215
216
217
//-----------------------------------------------------------------------------
218
//
219
//   calcNullable.    Impossible to explain succinctly.  See Aho, section 3.9
220
//
221
//-----------------------------------------------------------------------------
222
0
void RBBITableBuilder::calcNullable(RBBINode *n) {
223
0
    if (n == nullptr) {
224
0
        return;
225
0
    }
226
0
    if (n->fType == RBBINode::setRef ||
227
0
        n->fType == RBBINode::endMark ) {
228
        // These are non-empty leaf node types.
229
0
        n->fNullable = false;
230
0
        return;
231
0
    }
232
233
0
    if (n->fType == RBBINode::lookAhead || n->fType == RBBINode::tag) {
234
        // Lookahead marker node.  It's a leaf, so no recursion on children.
235
        // It's nullable because it does not match any literal text from the input stream.
236
0
        n->fNullable = true;
237
0
        return;
238
0
    }
239
240
241
    // The node is not a leaf.
242
    //  Calculate nullable on its children.
243
0
    calcNullable(n->fLeftChild);
244
0
    calcNullable(n->fRightChild);
245
246
    // Apply functions from table 3.40 in Aho
247
0
    if (n->fType == RBBINode::opOr) {
248
0
        n->fNullable = n->fLeftChild->fNullable || n->fRightChild->fNullable;
249
0
    }
250
0
    else if (n->fType == RBBINode::opCat) {
251
0
        n->fNullable = n->fLeftChild->fNullable && n->fRightChild->fNullable;
252
0
    }
253
0
    else if (n->fType == RBBINode::opStar || n->fType == RBBINode::opQuestion) {
254
0
        n->fNullable = true;
255
0
    }
256
0
    else {
257
0
        n->fNullable = false;
258
0
    }
259
0
}
260
261
262
263
264
//-----------------------------------------------------------------------------
265
//
266
//   calcFirstPos.    Impossible to explain succinctly.  See Aho, section 3.9
267
//
268
//-----------------------------------------------------------------------------
269
0
void RBBITableBuilder::calcFirstPos(RBBINode *n) {
270
0
    if (n == nullptr) {
271
0
        return;
272
0
    }
273
0
    if (n->fType == RBBINode::leafChar  ||
274
0
        n->fType == RBBINode::endMark   ||
275
0
        n->fType == RBBINode::lookAhead ||
276
0
        n->fType == RBBINode::tag) {
277
        // These are non-empty leaf node types.
278
        // Note: In order to maintain the sort invariant on the set,
279
        // this function should only be called on a node whose set is
280
        // empty to start with.
281
0
        n->fFirstPosSet->addElement(n, *fStatus);
282
0
        return;
283
0
    }
284
285
    // The node is not a leaf.
286
    //  Calculate firstPos on its children.
287
0
    calcFirstPos(n->fLeftChild);
288
0
    calcFirstPos(n->fRightChild);
289
290
    // Apply functions from table 3.40 in Aho
291
0
    if (n->fType == RBBINode::opOr) {
292
0
        setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
293
0
        setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
294
0
    }
295
0
    else if (n->fType == RBBINode::opCat) {
296
0
        setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
297
0
        if (n->fLeftChild->fNullable) {
298
0
            setAdd(n->fFirstPosSet, n->fRightChild->fFirstPosSet);
299
0
        }
300
0
    }
301
0
    else if (n->fType == RBBINode::opStar ||
302
0
             n->fType == RBBINode::opQuestion ||
303
0
             n->fType == RBBINode::opPlus) {
304
0
        setAdd(n->fFirstPosSet, n->fLeftChild->fFirstPosSet);
305
0
    }
306
0
}
307
308
309
310
//-----------------------------------------------------------------------------
311
//
312
//   calcLastPos.    Impossible to explain succinctly.  See Aho, section 3.9
313
//
314
//-----------------------------------------------------------------------------
315
0
void RBBITableBuilder::calcLastPos(RBBINode *n) {
316
0
    if (n == nullptr) {
317
0
        return;
318
0
    }
319
0
    if (n->fType == RBBINode::leafChar  ||
320
0
        n->fType == RBBINode::endMark   ||
321
0
        n->fType == RBBINode::lookAhead ||
322
0
        n->fType == RBBINode::tag) {
323
        // These are non-empty leaf node types.
324
        // Note: In order to maintain the sort invariant on the set,
325
        // this function should only be called on a node whose set is
326
        // empty to start with.
327
0
        n->fLastPosSet->addElement(n, *fStatus);
328
0
        return;
329
0
    }
330
331
    // The node is not a leaf.
332
    //  Calculate lastPos on its children.
333
0
    calcLastPos(n->fLeftChild);
334
0
    calcLastPos(n->fRightChild);
335
336
    // Apply functions from table 3.40 in Aho
337
0
    if (n->fType == RBBINode::opOr) {
338
0
        setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
339
0
        setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
340
0
    }
341
0
    else if (n->fType == RBBINode::opCat) {
342
0
        setAdd(n->fLastPosSet, n->fRightChild->fLastPosSet);
343
0
        if (n->fRightChild->fNullable) {
344
0
            setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
345
0
        }
346
0
    }
347
0
    else if (n->fType == RBBINode::opStar     ||
348
0
             n->fType == RBBINode::opQuestion ||
349
0
             n->fType == RBBINode::opPlus) {
350
0
        setAdd(n->fLastPosSet, n->fLeftChild->fLastPosSet);
351
0
    }
352
0
}
353
354
355
356
//-----------------------------------------------------------------------------
357
//
358
//   calcFollowPos.    Impossible to explain succinctly.  See Aho, section 3.9
359
//
360
//-----------------------------------------------------------------------------
361
0
void RBBITableBuilder::calcFollowPos(RBBINode *n) {
362
0
    if (n == nullptr ||
363
0
        n->fType == RBBINode::leafChar ||
364
0
        n->fType == RBBINode::endMark) {
365
0
        return;
366
0
    }
367
368
0
    calcFollowPos(n->fLeftChild);
369
0
    calcFollowPos(n->fRightChild);
370
371
    // Aho rule #1
372
0
    if (n->fType == RBBINode::opCat) {
373
0
        RBBINode *i;   // is 'i' in Aho's description
374
0
        uint32_t     ix;
375
376
0
        UVector *LastPosOfLeftChild = n->fLeftChild->fLastPosSet;
377
378
0
        for (ix = 0; ix < static_cast<uint32_t>(LastPosOfLeftChild->size()); ix++) {
379
0
            i = static_cast<RBBINode*>(LastPosOfLeftChild->elementAt(ix));
380
0
            setAdd(i->fFollowPos, n->fRightChild->fFirstPosSet);
381
0
        }
382
0
    }
383
384
    // Aho rule #2
385
0
    if (n->fType == RBBINode::opStar ||
386
0
        n->fType == RBBINode::opPlus) {
387
0
        RBBINode   *i;  // again, n and i are the names from Aho's description.
388
0
        uint32_t    ix;
389
390
0
        for (ix = 0; ix < static_cast<uint32_t>(n->fLastPosSet->size()); ix++) {
391
0
            i = static_cast<RBBINode*>(n->fLastPosSet->elementAt(ix));
392
0
            setAdd(i->fFollowPos, n->fFirstPosSet);
393
0
        }
394
0
    }
395
396
397
398
0
}
399
400
//-----------------------------------------------------------------------------
401
//
402
//    addRuleRootNodes    Recursively walk a parse tree, adding all nodes flagged
403
//                        as roots of a rule to a destination vector.
404
//
405
//-----------------------------------------------------------------------------
406
0
void RBBITableBuilder::addRuleRootNodes(UVector *dest, RBBINode *node) {
407
0
    if (node == nullptr || U_FAILURE(*fStatus)) {
408
0
        return;
409
0
    }
410
0
    U_ASSERT(!dest->hasDeleter());
411
0
    if (node->fRuleRoot) {
412
0
        dest->addElement(node, *fStatus);
413
        // Note: rules cannot nest. If we found a rule start node,
414
        //       no child node can also be a start node.
415
0
        return;
416
0
    }
417
0
    addRuleRootNodes(dest, node->fLeftChild);
418
0
    addRuleRootNodes(dest, node->fRightChild);
419
0
}
420
421
//-----------------------------------------------------------------------------
422
//
423
//   calcChainedFollowPos.    Modify the previously calculated followPos sets
424
//                            to implement rule chaining.  NOT described by Aho
425
//
426
//-----------------------------------------------------------------------------
427
0
void RBBITableBuilder::calcChainedFollowPos(RBBINode *tree, RBBINode *endMarkNode) {
428
429
0
    UVector         leafNodes(*fStatus);
430
0
    if (U_FAILURE(*fStatus)) {
431
0
        return;
432
0
    }
433
434
    // get a list all leaf nodes
435
0
    tree->findNodes(&leafNodes, RBBINode::leafChar, *fStatus);
436
0
    if (U_FAILURE(*fStatus)) {
437
0
        return;
438
0
    }
439
440
    // Collect all leaf nodes that can start matches for rules
441
    // with inbound chaining enabled, which is the union of the 
442
    // firstPosition sets from each of the rule root nodes.
443
    
444
0
    UVector ruleRootNodes(*fStatus);
445
0
    addRuleRootNodes(&ruleRootNodes, tree);
446
447
0
    UVector matchStartNodes(*fStatus);
448
0
    for (int j=0; j<ruleRootNodes.size(); ++j) {
449
0
        RBBINode *node = static_cast<RBBINode *>(ruleRootNodes.elementAt(j));
450
0
        if (node->fChainIn) {
451
0
            setAdd(&matchStartNodes, node->fFirstPosSet);
452
0
        }
453
0
    }
454
0
    if (U_FAILURE(*fStatus)) {
455
0
        return;
456
0
    }
457
458
0
    int32_t  endNodeIx;
459
0
    int32_t  startNodeIx;
460
461
0
    for (endNodeIx=0; endNodeIx<leafNodes.size(); endNodeIx++) {
462
0
        RBBINode* endNode = static_cast<RBBINode*>(leafNodes.elementAt(endNodeIx));
463
464
        // Identify leaf nodes that correspond to overall rule match positions.
465
        // These include the endMarkNode in their followPos sets.
466
        //
467
        // Note: do not consider other end marker nodes, those that are added to
468
        //       look-ahead rules. These can't chain; a match immediately stops
469
        //       further matching. This leaves exactly one end marker node, the one
470
        //       at the end of the complete tree.
471
472
0
        if (!endNode->fFollowPos->contains(endMarkNode)) {
473
0
            continue;
474
0
        }
475
476
        // We've got a node that can end a match.
477
478
        // Now iterate over the nodes that can start a match, looking for ones
479
        //   with the same char class as our ending node.
480
0
        RBBINode *startNode;
481
0
        for (startNodeIx = 0; startNodeIx<matchStartNodes.size(); startNodeIx++) {
482
0
            startNode = static_cast<RBBINode*>(matchStartNodes.elementAt(startNodeIx));
483
0
            if (startNode->fType != RBBINode::leafChar) {
484
0
                continue;
485
0
            }
486
487
0
            if (endNode->fVal == startNode->fVal) {
488
                // The end val (character class) of one possible match is the
489
                //   same as the start of another.
490
491
                // Add all nodes from the followPos of the start node to the
492
                //  followPos set of the end node, which will have the effect of
493
                //  letting matches transition from a match state at endNode
494
                //  to the second char of a match starting with startNode.
495
0
                setAdd(endNode->fFollowPos, startNode->fFollowPos);
496
0
            }
497
0
        }
498
0
    }
499
0
}
500
501
502
//-----------------------------------------------------------------------------
503
//
504
//   bofFixup.    Fixup for state tables that include {bof} beginning of input testing.
505
//                Do an swizzle similar to chaining, modifying the followPos set of
506
//                the bofNode to include the followPos nodes from other {bot} nodes
507
//                scattered through the tree.
508
//
509
//                This function has much in common with calcChainedFollowPos().
510
//
511
//-----------------------------------------------------------------------------
512
0
void RBBITableBuilder::bofFixup() {
513
514
0
    if (U_FAILURE(*fStatus)) {
515
0
        return;
516
0
    }
517
518
    //   The parse tree looks like this ...
519
    //         fTree root  --->       <cat>
520
    //                               /     \       .
521
    //                            <cat>   <#end node>
522
    //                           /     \  .
523
    //                     <bofNode>   rest
524
    //                               of tree
525
    //
526
    //    We will be adding things to the followPos set of the <bofNode>
527
    //
528
0
    RBBINode  *bofNode = fTree->fLeftChild->fLeftChild;
529
0
    U_ASSERT(bofNode->fType == RBBINode::leafChar);
530
0
    U_ASSERT(bofNode->fVal == 2);
531
532
    // Get all nodes that can be the start a match of the user-written rules
533
    //  (excluding the fake bofNode)
534
    //  We want the nodes that can start a match in the
535
    //     part labeled "rest of tree"
536
    // 
537
0
    UVector *matchStartNodes = fTree->fLeftChild->fRightChild->fFirstPosSet;
538
539
0
    RBBINode *startNode;
540
0
    int       startNodeIx;
541
0
    for (startNodeIx = 0; startNodeIx<matchStartNodes->size(); startNodeIx++) {
542
0
        startNode = static_cast<RBBINode*>(matchStartNodes->elementAt(startNodeIx));
543
0
        if (startNode->fType != RBBINode::leafChar) {
544
0
            continue;
545
0
        }
546
547
0
        if (startNode->fVal == bofNode->fVal) {
548
            //  We found a leaf node corresponding to a {bof} that was
549
            //    explicitly written into a rule.
550
            //  Add everything from the followPos set of this node to the
551
            //    followPos set of the fake bofNode at the start of the tree.
552
            //  
553
0
            setAdd(bofNode->fFollowPos, startNode->fFollowPos);
554
0
        }
555
0
    }
556
0
}
557
558
//-----------------------------------------------------------------------------
559
//
560
//   buildStateTable()    Determine the set of runtime DFA states and the
561
//                        transition tables for these states, by the algorithm
562
//                        of fig. 3.44 in Aho.
563
//
564
//                        Most of the comments are quotes of Aho's psuedo-code.
565
//
566
//-----------------------------------------------------------------------------
567
0
void RBBITableBuilder::buildStateTable() {
568
0
    if (U_FAILURE(*fStatus)) {
569
0
        return;
570
0
    }
571
0
    RBBIStateDescriptor *failState;
572
    // Set it to nullptr to avoid uninitialized warning
573
0
    RBBIStateDescriptor *initialState = nullptr;
574
    //
575
    // Add a dummy state 0 - the stop state.  Not from Aho.
576
0
    int      lastInputSymbol = fRB->fSetBuilder->getNumCharCategories() - 1;
577
0
    failState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
578
0
    if (failState == nullptr) {
579
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
580
0
        goto ExitBuildSTdeleteall;
581
0
    }
582
0
    failState->fPositions = new UVector(*fStatus);
583
0
    if (failState->fPositions == nullptr) {
584
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
585
0
    }
586
0
    if (failState->fPositions == nullptr || U_FAILURE(*fStatus)) {
587
0
        goto ExitBuildSTdeleteall;
588
0
    }
589
0
    fDStates->addElement(failState, *fStatus);
590
0
    if (U_FAILURE(*fStatus)) {
591
0
        goto ExitBuildSTdeleteall;
592
0
    }
593
594
    // initially, the only unmarked state in Dstates is firstpos(root),
595
    //       where toot is the root of the syntax tree for (r)#;
596
0
    initialState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
597
0
    if (initialState == nullptr) {
598
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
599
0
    }
600
0
    if (U_FAILURE(*fStatus)) {
601
0
        goto ExitBuildSTdeleteall;
602
0
    }
603
0
    initialState->fPositions = new UVector(*fStatus);
604
0
    if (initialState->fPositions == nullptr) {
605
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
606
0
    }
607
0
    if (U_FAILURE(*fStatus)) {
608
0
        goto ExitBuildSTdeleteall;
609
0
    }
610
0
    setAdd(initialState->fPositions, fTree->fFirstPosSet);
611
0
    fDStates->addElement(initialState, *fStatus);
612
0
    if (U_FAILURE(*fStatus)) {
613
0
        goto ExitBuildSTdeleteall;
614
0
    }
615
616
    // while there is an unmarked state T in Dstates do begin
617
0
    for (;;) {
618
0
        RBBIStateDescriptor *T = nullptr;
619
0
        int32_t              tx;
620
0
        for (tx=1; tx<fDStates->size(); tx++) {
621
0
            RBBIStateDescriptor *temp;
622
0
            temp = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(tx));
623
0
            if (temp->fMarked == false) {
624
0
                T = temp;
625
0
                break;
626
0
            }
627
0
        }
628
0
        if (T == nullptr) {
629
0
            break;
630
0
        }
631
632
        // mark T;
633
0
        T->fMarked = true;
634
635
        // for each input symbol a do begin
636
0
        int32_t  a;
637
0
        for (a = 1; a<=lastInputSymbol; a++) {
638
            // let U be the set of positions that are in followpos(p)
639
            //    for some position p in T
640
            //    such that the symbol at position p is a;
641
0
            UVector    *U = nullptr;
642
0
            RBBINode   *p;
643
0
            int32_t     px;
644
0
            for (px=0; px<T->fPositions->size(); px++) {
645
0
                p = static_cast<RBBINode*>(T->fPositions->elementAt(px));
646
0
                if ((p->fType == RBBINode::leafChar) &&  (p->fVal == a)) {
647
0
                    if (U == nullptr) {
648
0
                        U = new UVector(*fStatus);
649
0
                        if (U == nullptr) {
650
0
                          *fStatus = U_MEMORY_ALLOCATION_ERROR;
651
0
                          goto ExitBuildSTdeleteall;
652
0
                        }
653
0
                    }
654
0
                    setAdd(U, p->fFollowPos);
655
0
                }
656
0
            }
657
658
            // if U is not empty and not in DStates then
659
0
            int32_t  ux = 0;
660
0
            UBool    UinDstates = false;
661
0
            if (U != nullptr) {
662
0
                U_ASSERT(U->size() > 0);
663
0
                int  ix;
664
0
                for (ix=0; ix<fDStates->size(); ix++) {
665
0
                    RBBIStateDescriptor *temp2;
666
0
                    temp2 = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(ix));
667
0
                    if (setEquals(U, temp2->fPositions)) {
668
0
                        delete U;
669
0
                        U  = temp2->fPositions;
670
0
                        ux = ix;
671
0
                        UinDstates = true;
672
0
                        break;
673
0
                    }
674
0
                }
675
676
                // Add U as an unmarked state to Dstates
677
0
                if (!UinDstates)
678
0
                {
679
0
                    RBBIStateDescriptor *newState = new RBBIStateDescriptor(lastInputSymbol, fStatus);
680
0
                    if (newState == nullptr) {
681
0
                      *fStatus = U_MEMORY_ALLOCATION_ERROR;
682
0
                    }
683
0
                    if (U_FAILURE(*fStatus)) {
684
0
                        goto ExitBuildSTdeleteall;
685
0
                    }
686
0
                    newState->fPositions = U;
687
0
                    fDStates->addElement(newState, *fStatus);
688
0
                    if (U_FAILURE(*fStatus)) {
689
0
                        return;
690
0
                    }
691
0
                    ux = fDStates->size()-1;
692
0
                }
693
694
                // Dtran[T, a] := U;
695
0
                T->fDtran->setElementAt(ux, a);
696
0
            }
697
0
        }
698
0
    }
699
0
    return;
700
    // delete local pointers only if error occurred.
701
0
ExitBuildSTdeleteall:
702
0
    delete initialState;
703
0
    delete failState;
704
0
}
705
706
707
/**
708
 * mapLookAheadRules
709
 *
710
 */
711
0
void RBBITableBuilder::mapLookAheadRules() {
712
0
    fLookAheadRuleMap =  new UVector32(fRB->fScanner->numRules() + 1, *fStatus);
713
0
    if (fLookAheadRuleMap == nullptr) {
714
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
715
0
    }
716
0
    if (U_FAILURE(*fStatus)) {
717
0
        return;
718
0
    }
719
0
    fLookAheadRuleMap->setSize(fRB->fScanner->numRules() + 1);
720
721
0
    for (int32_t n=0; n<fDStates->size(); n++) {
722
0
        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
723
0
        int32_t laSlotForState = 0;
724
725
        // Establish the look-ahead slot for this state, if the state covers
726
        // any look-ahead nodes - corresponding to the '/' in look-ahead rules.
727
728
        // If any of the look-ahead nodes already have a slot assigned, use it,
729
        // otherwise assign a new one.
730
731
0
        bool sawLookAheadNode = false;
732
0
        for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
733
0
            RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
734
0
            if (node->fType != RBBINode::NodeType::lookAhead) {
735
0
                continue;
736
0
            }
737
0
            sawLookAheadNode = true;
738
0
            int32_t ruleNum = node->fVal;     // Set when rule was originally parsed.
739
0
            U_ASSERT(ruleNum < fLookAheadRuleMap->size());
740
0
            U_ASSERT(ruleNum > 0);
741
0
            int32_t laSlot = fLookAheadRuleMap->elementAti(ruleNum);
742
0
            if (laSlot != 0) {
743
0
                if (laSlotForState == 0) {
744
0
                    laSlotForState = laSlot;
745
0
                } else {
746
                    // TODO: figure out if this can fail, change to setting an error code if so.
747
0
                    U_ASSERT(laSlot == laSlotForState);
748
0
                }
749
0
            }
750
0
        }
751
0
        if (!sawLookAheadNode) {
752
0
            continue;
753
0
        }
754
755
0
        if (laSlotForState == 0) {
756
0
            laSlotForState = ++fLASlotsInUse;
757
0
        }
758
759
        // For each look ahead node covered by this state,
760
        // set the mapping from the node's rule number to the look ahead slot.
761
        // There can be multiple nodes/rule numbers going to the same la slot.
762
763
0
        for (int32_t ipos=0; ipos<sd->fPositions->size(); ++ipos) {
764
0
            RBBINode *node = static_cast<RBBINode *>(sd->fPositions->elementAt(ipos));
765
0
            if (node->fType != RBBINode::NodeType::lookAhead) {
766
0
                continue;
767
0
            }
768
0
            int32_t ruleNum = node->fVal;     // Set when rule was originally parsed.
769
0
            int32_t existingVal = fLookAheadRuleMap->elementAti(ruleNum);
770
0
            (void)existingVal;
771
0
            U_ASSERT(existingVal == 0 || existingVal == laSlotForState);
772
0
            fLookAheadRuleMap->setElementAt(laSlotForState, ruleNum);
773
0
        }
774
0
    }
775
776
0
}
777
778
//-----------------------------------------------------------------------------
779
//
780
//   flagAcceptingStates    Identify accepting states.
781
//                          First get a list of all of the end marker nodes.
782
//                          Then, for each state s,
783
//                              if s contains one of the end marker nodes in its list of tree positions then
784
//                                  s is an accepting state.
785
//
786
//-----------------------------------------------------------------------------
787
0
void     RBBITableBuilder::flagAcceptingStates() {
788
0
    if (U_FAILURE(*fStatus)) {
789
0
        return;
790
0
    }
791
0
    UVector     endMarkerNodes(*fStatus);
792
0
    RBBINode    *endMarker;
793
0
    int32_t     i;
794
0
    int32_t     n;
795
796
0
    if (U_FAILURE(*fStatus)) {
797
0
        return;
798
0
    }
799
800
0
    fTree->findNodes(&endMarkerNodes, RBBINode::endMark, *fStatus);
801
0
    if (U_FAILURE(*fStatus)) {
802
0
        return;
803
0
    }
804
805
0
    for (i=0; i<endMarkerNodes.size(); i++) {
806
0
        endMarker = static_cast<RBBINode*>(endMarkerNodes.elementAt(i));
807
0
        for (n=0; n<fDStates->size(); n++) {
808
0
            RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
809
0
            if (sd->fPositions->indexOf(endMarker) >= 0) {
810
                // Any non-zero value for fAccepting means this is an accepting node.
811
                // The value is what will be returned to the user as the break status.
812
                // If no other value was specified, force it to ACCEPTING_UNCONDITIONAL (1).
813
814
0
                if (sd->fAccepting==0) {
815
                    // State hasn't been marked as accepting yet.  Do it now.
816
0
                    sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
817
0
                    if (sd->fAccepting == 0) {
818
0
                        sd->fAccepting = ACCEPTING_UNCONDITIONAL;
819
0
                    }
820
0
                }
821
0
                if (sd->fAccepting==ACCEPTING_UNCONDITIONAL && endMarker->fVal != 0) {
822
                    // Both lookahead and non-lookahead accepting for this state.
823
                    // Favor the look-ahead, because a look-ahead match needs to
824
                    // immediately stop the run-time engine. First match, not longest.
825
0
                    sd->fAccepting = fLookAheadRuleMap->elementAti(endMarker->fVal);
826
0
                }
827
                // implicit else:
828
                // if sd->fAccepting already had a value other than 0 or 1, leave it be.
829
0
            }
830
0
        }
831
0
    }
832
0
}
833
834
835
//-----------------------------------------------------------------------------
836
//
837
//    flagLookAheadStates   Very similar to flagAcceptingStates, above.
838
//
839
//-----------------------------------------------------------------------------
840
0
void     RBBITableBuilder::flagLookAheadStates() {
841
0
    if (U_FAILURE(*fStatus)) {
842
0
        return;
843
0
    }
844
0
    UVector     lookAheadNodes(*fStatus);
845
0
    RBBINode    *lookAheadNode;
846
0
    int32_t     i;
847
0
    int32_t     n;
848
849
0
    fTree->findNodes(&lookAheadNodes, RBBINode::lookAhead, *fStatus);
850
0
    if (U_FAILURE(*fStatus)) {
851
0
        return;
852
0
    }
853
0
    for (i=0; i<lookAheadNodes.size(); i++) {
854
0
        lookAheadNode = static_cast<RBBINode*>(lookAheadNodes.elementAt(i));
855
0
        U_ASSERT(lookAheadNode->fType == RBBINode::NodeType::lookAhead);
856
857
0
        for (n=0; n<fDStates->size(); n++) {
858
0
            RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
859
0
            int32_t positionsIdx = sd->fPositions->indexOf(lookAheadNode);
860
0
            if (positionsIdx >= 0) {
861
0
                U_ASSERT(lookAheadNode == sd->fPositions->elementAt(positionsIdx));
862
0
                uint32_t lookaheadSlot = fLookAheadRuleMap->elementAti(lookAheadNode->fVal);
863
0
                U_ASSERT(sd->fLookAhead == 0 || sd->fLookAhead == lookaheadSlot);
864
                // if (sd->fLookAhead != 0 && sd->fLookAhead != lookaheadSlot) {
865
                //     printf("%s:%d Bingo. sd->fLookAhead:%d   lookaheadSlot:%d\n",
866
                //            __FILE__, __LINE__, sd->fLookAhead, lookaheadSlot);
867
                // }
868
0
                sd->fLookAhead = lookaheadSlot;
869
0
            }
870
0
        }
871
0
    }
872
0
}
873
874
875
876
877
//-----------------------------------------------------------------------------
878
//
879
//    flagTaggedStates
880
//
881
//-----------------------------------------------------------------------------
882
0
void     RBBITableBuilder::flagTaggedStates() {
883
0
    if (U_FAILURE(*fStatus)) {
884
0
        return;
885
0
    }
886
0
    UVector     tagNodes(*fStatus);
887
0
    RBBINode    *tagNode;
888
0
    int32_t     i;
889
0
    int32_t     n;
890
891
0
    if (U_FAILURE(*fStatus)) {
892
0
        return;
893
0
    }
894
0
    fTree->findNodes(&tagNodes, RBBINode::tag, *fStatus);
895
0
    if (U_FAILURE(*fStatus)) {
896
0
        return;
897
0
    }
898
0
    for (i=0; i<tagNodes.size(); i++) {                   // For each tag node t (all of 'em)
899
0
        tagNode = static_cast<RBBINode*>(tagNodes.elementAt(i));
900
901
0
        for (n=0; n<fDStates->size(); n++) {              //    For each state  s (row in the state table)
902
0
            RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
903
0
            if (sd->fPositions->indexOf(tagNode) >= 0) {  //       if  s include the tag node t
904
0
                sortedAdd(&sd->fTagVals, tagNode->fVal);
905
0
            }
906
0
        }
907
0
    }
908
0
}
909
910
911
912
913
//-----------------------------------------------------------------------------
914
//
915
//  mergeRuleStatusVals
916
//
917
//      Update the global table of rule status {tag} values
918
//      The rule builder has a global vector of status values that are common
919
//      for all tables.  Merge the ones from this table into the global set.
920
//
921
//-----------------------------------------------------------------------------
922
0
void  RBBITableBuilder::mergeRuleStatusVals() {
923
    //
924
    //  The basic outline of what happens here is this...
925
    //
926
    //    for each state in this state table
927
    //       if the status tag list for this state is in the global statuses list
928
    //           record where and
929
    //           continue with the next state
930
    //       else
931
    //           add the tag list for this state to the global list.
932
    //
933
0
    int i;
934
0
    int n;
935
936
    // Pre-set a single tag of {0} into the table.
937
    //   We will need this as a default, for rule sets with no explicit tagging.
938
0
    if (fRB->fRuleStatusVals->size() == 0) {
939
0
        fRB->fRuleStatusVals->addElement(1, *fStatus);  // Num of statuses in group
940
0
        fRB->fRuleStatusVals->addElement(static_cast<int32_t>(0), *fStatus); // and our single status of zero
941
0
    }
942
943
    //    For each state
944
0
    for (n=0; n<fDStates->size(); n++) {
945
0
        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(n));
946
0
        UVector *thisStatesTagValues = sd->fTagVals;
947
0
        if (thisStatesTagValues == nullptr) {
948
            // No tag values are explicitly associated with this state.
949
            //   Set the default tag value.
950
0
            sd->fTagsIdx = 0;
951
0
            continue;
952
0
        }
953
954
        // There are tag(s) associated with this state.
955
        //   fTagsIdx will be the index into the global tag list for this state's tag values.
956
        //   Initial value of -1 flags that we haven't got it set yet.
957
0
        sd->fTagsIdx = -1;
958
0
        int32_t  thisTagGroupStart = 0;   // indexes into the global rule status vals list
959
0
        int32_t  nextTagGroupStart = 0;
960
961
        // Loop runs once per group of tags in the global list
962
0
        while (nextTagGroupStart < fRB->fRuleStatusVals->size()) {
963
0
            thisTagGroupStart = nextTagGroupStart;
964
0
            nextTagGroupStart += fRB->fRuleStatusVals->elementAti(thisTagGroupStart) + 1;
965
0
            if (thisStatesTagValues->size() != fRB->fRuleStatusVals->elementAti(thisTagGroupStart)) {
966
                // The number of tags for this state is different from
967
                //    the number of tags in this group from the global list.
968
                //    Continue with the next group from the global list.
969
0
                continue;
970
0
            }
971
            // The lengths match, go ahead and compare the actual tag values
972
            //    between this state and the group from the global list.
973
0
            for (i=0; i<thisStatesTagValues->size(); i++) {
974
0
                if (thisStatesTagValues->elementAti(i) !=
975
0
                    fRB->fRuleStatusVals->elementAti(thisTagGroupStart + 1 + i) ) {
976
                    // Mismatch.
977
0
                    break;
978
0
                }
979
0
            }
980
981
0
            if (i == thisStatesTagValues->size()) {
982
                // We found a set of tag values in the global list that match
983
                //   those for this state.  Use them.
984
0
                sd->fTagsIdx = thisTagGroupStart;
985
0
                break;
986
0
            }
987
0
        }
988
989
0
        if (sd->fTagsIdx == -1) {
990
            // No suitable entry in the global tag list already.  Add one
991
0
            sd->fTagsIdx = fRB->fRuleStatusVals->size();
992
0
            fRB->fRuleStatusVals->addElement(thisStatesTagValues->size(), *fStatus);
993
0
            for (i=0; i<thisStatesTagValues->size(); i++) {
994
0
                fRB->fRuleStatusVals->addElement(thisStatesTagValues->elementAti(i), *fStatus);
995
0
            }
996
0
        }
997
0
    }
998
0
}
999
1000
1001
1002
1003
1004
1005
1006
//-----------------------------------------------------------------------------
1007
//
1008
//  sortedAdd  Add a value to a vector of sorted values (ints).
1009
//             Do not replicate entries; if the value is already there, do not
1010
//                add a second one.
1011
//             Lazily create the vector if it does not already exist.
1012
//
1013
//-----------------------------------------------------------------------------
1014
0
void RBBITableBuilder::sortedAdd(UVector **vector, int32_t val) {
1015
0
    int32_t i;
1016
1017
0
    if (*vector == nullptr) {
1018
0
        *vector = new UVector(*fStatus);
1019
0
    }
1020
0
    if (*vector == nullptr || U_FAILURE(*fStatus)) {
1021
0
        return;
1022
0
    }
1023
0
    UVector *vec = *vector;
1024
0
    int32_t  vSize = vec->size();
1025
0
    for (i=0; i<vSize; i++) {
1026
0
        int32_t valAtI = vec->elementAti(i);
1027
0
        if (valAtI == val) {
1028
            // The value is already in the vector.  Don't add it again.
1029
0
            return;
1030
0
        }
1031
0
        if (valAtI > val) {
1032
0
            break;
1033
0
        }
1034
0
    }
1035
0
    vec->insertElementAt(val, i, *fStatus);
1036
0
}
1037
1038
1039
1040
//-----------------------------------------------------------------------------
1041
//
1042
//  setAdd     Set operation on UVector
1043
//             dest = dest union source
1044
//             Elements may only appear once and must be sorted.
1045
//
1046
//-----------------------------------------------------------------------------
1047
0
void RBBITableBuilder::setAdd(UVector *dest, UVector *source) {
1048
0
    U_ASSERT(!dest->hasDeleter());
1049
0
    U_ASSERT(!source->hasDeleter());
1050
0
    int32_t destOriginalSize = dest->size();
1051
0
    int32_t sourceSize       = source->size();
1052
0
    int32_t di           = 0;
1053
0
    MaybeStackArray<void *, 16> destArray, sourceArray;  // Handle small cases without malloc
1054
0
    void **destPtr, **sourcePtr;
1055
0
    void **destLim, **sourceLim;
1056
1057
0
    if (destOriginalSize > destArray.getCapacity()) {
1058
0
        if (destArray.resize(destOriginalSize) == nullptr) {
1059
0
            return;
1060
0
        }
1061
0
    }
1062
0
    destPtr = destArray.getAlias();
1063
0
    destLim = destPtr + destOriginalSize;  // destArray.getArrayLimit()?
1064
1065
0
    if (sourceSize > sourceArray.getCapacity()) {
1066
0
        if (sourceArray.resize(sourceSize) == nullptr) {
1067
0
            return;
1068
0
        }
1069
0
    }
1070
0
    sourcePtr = sourceArray.getAlias();
1071
0
    sourceLim = sourcePtr + sourceSize;  // sourceArray.getArrayLimit()?
1072
1073
    // Avoid multiple "get element" calls by getting the contents into arrays
1074
0
    (void) dest->toArray(destPtr);
1075
0
    (void) source->toArray(sourcePtr);
1076
1077
0
    dest->setSize(sourceSize+destOriginalSize, *fStatus);
1078
0
    if (U_FAILURE(*fStatus)) {
1079
0
        return;
1080
0
    }
1081
1082
0
    while (sourcePtr < sourceLim && destPtr < destLim) {
1083
0
        if (*destPtr == *sourcePtr) {
1084
0
            dest->setElementAt(*sourcePtr++, di++);
1085
0
            destPtr++;
1086
0
        }
1087
        // This check is required for machines with segmented memory, like i5/OS.
1088
        // Direct pointer comparison is not recommended.
1089
0
        else if (uprv_memcmp(destPtr, sourcePtr, sizeof(void *)) < 0) {
1090
0
            dest->setElementAt(*destPtr++, di++);
1091
0
        }
1092
0
        else { /* *sourcePtr < *destPtr */
1093
0
            dest->setElementAt(*sourcePtr++, di++);
1094
0
        }
1095
0
    }
1096
1097
    // At most one of these two cleanup loops will execute
1098
0
    while (destPtr < destLim) {
1099
0
        dest->setElementAt(*destPtr++, di++);
1100
0
    }
1101
0
    while (sourcePtr < sourceLim) {
1102
0
        dest->setElementAt(*sourcePtr++, di++);
1103
0
    }
1104
1105
0
    dest->setSize(di, *fStatus);
1106
0
}
1107
1108
1109
1110
//-----------------------------------------------------------------------------
1111
//
1112
//  setEqual    Set operation on UVector.
1113
//              Compare for equality.
1114
//              Elements must be sorted.
1115
//
1116
//-----------------------------------------------------------------------------
1117
0
UBool RBBITableBuilder::setEquals(UVector *a, UVector *b) {
1118
0
    return a->equals(*b);
1119
0
}
1120
1121
1122
//-----------------------------------------------------------------------------
1123
//
1124
//  printPosSets   Debug function.  Dump Nullable, firstpos, lastpos and followpos
1125
//                 for each node in the tree.
1126
//
1127
//-----------------------------------------------------------------------------
1128
#ifdef RBBI_DEBUG
1129
void RBBITableBuilder::printPosSets(RBBINode *n) {
1130
    if (n==nullptr) {
1131
        return;
1132
    }
1133
    printf("\n");
1134
    RBBINode::printNodeHeader();
1135
    RBBINode::printNode(n);
1136
    RBBIDebugPrintf("         Nullable:  %s\n", n->fNullable?"true":"false");
1137
1138
    RBBIDebugPrintf("         firstpos:  ");
1139
    printSet(n->fFirstPosSet);
1140
1141
    RBBIDebugPrintf("         lastpos:   ");
1142
    printSet(n->fLastPosSet);
1143
1144
    RBBIDebugPrintf("         followpos: ");
1145
    printSet(n->fFollowPos);
1146
1147
    printPosSets(n->fLeftChild);
1148
    printPosSets(n->fRightChild);
1149
}
1150
#endif
1151
1152
//
1153
//    findDuplCharClassFrom()
1154
//
1155
0
bool RBBITableBuilder::findDuplCharClassFrom(IntPair *categories) {
1156
0
    int32_t numStates = fDStates->size();
1157
0
    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
1158
1159
0
    for (; categories->first < numCols-1; categories->first++) {
1160
        // Note: dictionary & non-dictionary columns cannot be merged.
1161
        //       The limitSecond value prevents considering mixed pairs.
1162
        //       Dictionary categories are >= DictCategoriesStart.
1163
        //       Non dict categories are   <  DictCategoriesStart.
1164
0
        int limitSecond = categories->first < fRB->fSetBuilder->getDictCategoriesStart() ?
1165
0
            fRB->fSetBuilder->getDictCategoriesStart() : numCols;
1166
0
        for (categories->second=categories->first+1; categories->second < limitSecond; categories->second++) {
1167
            // Initialized to different values to prevent returning true if numStates = 0 (implies no duplicates).
1168
0
            uint16_t table_base = 0;
1169
0
            uint16_t table_dupl = 1;
1170
0
            for (int32_t state=0; state<numStates; state++) {
1171
0
                RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
1172
0
                table_base = static_cast<uint16_t>(sd->fDtran->elementAti(categories->first));
1173
0
                table_dupl = static_cast<uint16_t>(sd->fDtran->elementAti(categories->second));
1174
0
                if (table_base != table_dupl) {
1175
0
                    break;
1176
0
                }
1177
0
            }
1178
0
            if (table_base == table_dupl) {
1179
0
                return true;
1180
0
            }
1181
0
        }
1182
0
    }
1183
0
    return false;
1184
0
}
1185
1186
1187
//
1188
//    removeColumn()
1189
//
1190
0
void RBBITableBuilder::removeColumn(int32_t column) {
1191
0
    int32_t numStates = fDStates->size();
1192
0
    for (int32_t state=0; state<numStates; state++) {
1193
0
        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
1194
0
        U_ASSERT(column < sd->fDtran->size());
1195
0
        sd->fDtran->removeElementAt(column);
1196
0
    }
1197
0
}
1198
1199
/*
1200
 * findDuplicateState
1201
 */
1202
0
bool RBBITableBuilder::findDuplicateState(IntPair *states) {
1203
0
    int32_t numStates = fDStates->size();
1204
0
    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
1205
1206
0
    for (; states->first<numStates-1; states->first++) {
1207
0
        RBBIStateDescriptor* firstSD = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(states->first));
1208
0
        for (states->second=states->first+1; states->second<numStates; states->second++) {
1209
0
            RBBIStateDescriptor* duplSD = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(states->second));
1210
0
            if (firstSD->fAccepting != duplSD->fAccepting ||
1211
0
                firstSD->fLookAhead != duplSD->fLookAhead ||
1212
0
                firstSD->fTagsIdx   != duplSD->fTagsIdx) {
1213
0
                continue;
1214
0
            }
1215
0
            bool rowsMatch = true;
1216
0
            for (int32_t col=0; col < numCols; ++col) {
1217
0
                int32_t firstVal = firstSD->fDtran->elementAti(col);
1218
0
                int32_t duplVal = duplSD->fDtran->elementAti(col);
1219
0
                if (!((firstVal == duplVal) ||
1220
0
                        ((firstVal == states->first || firstVal == states->second) &&
1221
0
                        (duplVal  == states->first || duplVal  == states->second)))) {
1222
0
                    rowsMatch = false;
1223
0
                    break;
1224
0
                }
1225
0
            }
1226
0
            if (rowsMatch) {
1227
0
                return true;
1228
0
            }
1229
0
        }
1230
0
    }
1231
0
    return false;
1232
0
}
1233
1234
1235
0
bool RBBITableBuilder::findDuplicateSafeState(IntPair *states) {
1236
0
    int32_t numStates = fSafeTable->size();
1237
1238
0
    for (; states->first<numStates-1; states->first++) {
1239
0
        UnicodeString *firstRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->first));
1240
0
        for (states->second=states->first+1; states->second<numStates; states->second++) {
1241
0
            UnicodeString *duplRow = static_cast<UnicodeString *>(fSafeTable->elementAt(states->second));
1242
0
            bool rowsMatch = true;
1243
0
            int32_t numCols = firstRow->length();
1244
0
            for (int32_t col=0; col < numCols; ++col) {
1245
0
                int32_t firstVal = firstRow->charAt(col);
1246
0
                int32_t duplVal = duplRow->charAt(col);
1247
0
                if (!((firstVal == duplVal) ||
1248
0
                        ((firstVal == states->first || firstVal == states->second) &&
1249
0
                        (duplVal  == states->first || duplVal  == states->second)))) {
1250
0
                    rowsMatch = false;
1251
0
                    break;
1252
0
                }
1253
0
            }
1254
0
            if (rowsMatch) {
1255
0
                return true;
1256
0
            }
1257
0
        }
1258
0
    }
1259
0
    return false;
1260
0
}
1261
1262
1263
0
void RBBITableBuilder::removeState(IntPair duplStates) {
1264
0
    const int32_t keepState = duplStates.first;
1265
0
    const int32_t duplState = duplStates.second;
1266
0
    U_ASSERT(keepState < duplState);
1267
0
    U_ASSERT(duplState < fDStates->size());
1268
1269
0
    RBBIStateDescriptor* duplSD = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(duplState));
1270
0
    fDStates->removeElementAt(duplState);
1271
0
    delete duplSD;
1272
1273
0
    int32_t numStates = fDStates->size();
1274
0
    int32_t numCols = fRB->fSetBuilder->getNumCharCategories();
1275
0
    for (int32_t state=0; state<numStates; ++state) {
1276
0
        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
1277
0
        for (int32_t col=0; col<numCols; col++) {
1278
0
            int32_t existingVal = sd->fDtran->elementAti(col);
1279
0
            int32_t newVal = existingVal;
1280
0
            if (existingVal == duplState) {
1281
0
                newVal = keepState;
1282
0
            } else if (existingVal > duplState) {
1283
0
                newVal = existingVal - 1;
1284
0
            }
1285
0
            sd->fDtran->setElementAt(newVal, col);
1286
0
        }
1287
0
    }
1288
0
}
1289
1290
0
void RBBITableBuilder::removeSafeState(IntPair duplStates) {
1291
0
    const int32_t keepState = duplStates.first;
1292
0
    const int32_t duplState = duplStates.second;
1293
0
    U_ASSERT(keepState < duplState);
1294
0
    U_ASSERT(duplState < fSafeTable->size());
1295
1296
0
    fSafeTable->removeElementAt(duplState);   // Note that fSafeTable has a deleter function
1297
                                              // and will auto-delete the removed element.
1298
0
    int32_t numStates = fSafeTable->size();
1299
0
    for (int32_t state=0; state<numStates; ++state) {
1300
0
        UnicodeString* sd = static_cast<UnicodeString*>(fSafeTable->elementAt(state));
1301
0
        int32_t numCols = sd->length();
1302
0
        for (int32_t col=0; col<numCols; col++) {
1303
0
            int32_t existingVal = sd->charAt(col);
1304
0
            int32_t newVal = existingVal;
1305
0
            if (existingVal == duplState) {
1306
0
                newVal = keepState;
1307
0
            } else if (existingVal > duplState) {
1308
0
                newVal = existingVal - 1;
1309
0
            }
1310
0
            sd->setCharAt(col, static_cast<char16_t>(newVal));
1311
0
        }
1312
0
    }
1313
0
}
1314
1315
1316
/*
1317
 * RemoveDuplicateStates
1318
 */
1319
0
int32_t RBBITableBuilder::removeDuplicateStates() {
1320
0
    IntPair dupls = {3, 0};
1321
0
    int32_t numStatesRemoved = 0;
1322
1323
0
    while (findDuplicateState(&dupls)) {
1324
        // printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
1325
0
        removeState(dupls);
1326
0
        ++numStatesRemoved;
1327
0
    }
1328
0
    return numStatesRemoved;
1329
0
}
1330
1331
1332
//-----------------------------------------------------------------------------
1333
//
1334
//   getTableSize()    Calculate the size of the runtime form of this
1335
//                     state transition table.
1336
//
1337
//-----------------------------------------------------------------------------
1338
0
int32_t  RBBITableBuilder::getTableSize() const {
1339
0
    int32_t    size = 0;
1340
0
    int32_t    numRows;
1341
0
    int32_t    numCols;
1342
0
    int32_t    rowSize;
1343
1344
0
    if (fTree == nullptr) {
1345
0
        return 0;
1346
0
    }
1347
1348
0
    size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.
1349
1350
0
    numRows = fDStates->size();
1351
0
    numCols = fRB->fSetBuilder->getNumCharCategories();
1352
1353
0
    if (use8BitsForTable()) {
1354
0
        rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
1355
0
    } else {
1356
0
        rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
1357
0
    }
1358
0
    size   += numRows * rowSize;
1359
0
    return size;
1360
0
}
1361
1362
0
bool RBBITableBuilder::use8BitsForTable() const {
1363
0
    return fDStates->size() <= kMaxStateFor8BitsTable;
1364
0
}
1365
1366
//-----------------------------------------------------------------------------
1367
//
1368
//   exportTable()    export the state transition table in the format required
1369
//                    by the runtime engine.  getTableSize() bytes of memory
1370
//                    must be available at the output address "where".
1371
//
1372
//-----------------------------------------------------------------------------
1373
0
void RBBITableBuilder::exportTable(void *where) {
1374
0
    RBBIStateTable* table = static_cast<RBBIStateTable*>(where);
1375
0
    uint32_t           state;
1376
0
    int                col;
1377
1378
0
    if (U_FAILURE(*fStatus) || fTree == nullptr) {
1379
0
        return;
1380
0
    }
1381
1382
0
    int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
1383
0
    if (catCount > 0x7fff ||
1384
0
        fDStates->size() > 0x7fff) {
1385
0
        *fStatus = U_BRK_INTERNAL_ERROR;
1386
0
        return;
1387
0
    }
1388
1389
0
    table->fNumStates = fDStates->size();
1390
0
    table->fDictCategoriesStart = fRB->fSetBuilder->getDictCategoriesStart();
1391
0
    table->fLookAheadResultsSize = fLASlotsInUse == ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1;
1392
0
    table->fFlags     = 0;
1393
0
    if (use8BitsForTable()) {
1394
0
        table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
1395
0
        table->fFlags  |= RBBI_8BITS_ROWS;
1396
0
    } else {
1397
0
        table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
1398
0
    }
1399
0
    if (fRB->fLookAheadHardBreak) {
1400
0
        table->fFlags  |= RBBI_LOOKAHEAD_HARD_BREAK;
1401
0
    }
1402
0
    if (fRB->fSetBuilder->sawBOF()) {
1403
0
        table->fFlags  |= RBBI_BOF_REQUIRED;
1404
0
    }
1405
1406
0
    for (state=0; state<table->fNumStates; state++) {
1407
0
        RBBIStateDescriptor* sd = static_cast<RBBIStateDescriptor*>(fDStates->elementAt(state));
1408
0
        RBBIStateTableRow* row = reinterpret_cast<RBBIStateTableRow*>(table->fTableData + state * table->fRowLen);
1409
0
        if (use8BitsForTable()) {
1410
0
            U_ASSERT (sd->fAccepting <= 255);
1411
0
            U_ASSERT (sd->fLookAhead <= 255);
1412
0
            U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 255);
1413
0
            RBBIStateTableRow8* r8 = reinterpret_cast<RBBIStateTableRow8*>(row);
1414
0
            r8->fAccepting = sd->fAccepting;
1415
0
            r8->fLookAhead = sd->fLookAhead;
1416
0
            r8->fTagsIdx   = sd->fTagsIdx;
1417
0
            for (col=0; col<catCount; col++) {
1418
0
                U_ASSERT (sd->fDtran->elementAti(col) <= kMaxStateFor8BitsTable);
1419
0
                r8->fNextState[col] = sd->fDtran->elementAti(col);
1420
0
            }
1421
0
        } else {
1422
0
            U_ASSERT (sd->fAccepting <= 0xffff);
1423
0
            U_ASSERT (sd->fLookAhead <= 0xffff);
1424
0
            U_ASSERT (0 <= sd->fTagsIdx && sd->fTagsIdx <= 0xffff);
1425
0
            row->r16.fAccepting = sd->fAccepting;
1426
0
            row->r16.fLookAhead = sd->fLookAhead;
1427
0
            row->r16.fTagsIdx   = sd->fTagsIdx;
1428
0
            for (col=0; col<catCount; col++) {
1429
0
                row->r16.fNextState[col] = sd->fDtran->elementAti(col);
1430
0
            }
1431
0
        }
1432
0
    }
1433
0
}
1434
1435
1436
/**
1437
 *   Synthesize a safe state table from the main state table.
1438
 */
1439
0
void RBBITableBuilder::buildSafeReverseTable(UErrorCode &status) {
1440
    // The safe table creation has three steps:
1441
1442
    // 1. Identify pairs of character classes that are "safe." Safe means that boundaries
1443
    // following the pair do not depend on context or state before the pair. To test
1444
    // whether a pair is safe, run it through the main forward state table, starting
1445
    // from each state. If the the final state is the same, no matter what the starting state,
1446
    // the pair is safe.
1447
    //
1448
    // 2. Build a state table that recognizes the safe pairs. It's similar to their
1449
    // forward table, with a column for each input character [class], and a row for
1450
    // each state. Row 1 is the start state, and row 0 is the stop state. Initially
1451
    // create an additional state for each input character category; being in
1452
    // one of these states means that the character has been seen, and is potentially
1453
    // the first of a pair. In each of these rows, the entry for the second character
1454
    // of a safe pair is set to the stop state (0), indicating that a match was found.
1455
    // All other table entries are set to the state corresponding the current input
1456
    // character, allowing that character to be the of a start following pair.
1457
    //
1458
    // Because the safe rules are to be run in reverse, moving backwards in the text,
1459
    // the first and second pair categories are swapped when building the table.
1460
    //
1461
    // 3. Compress the table. There are typically many rows (states) that are
1462
    // equivalent - that have zeroes (match completed) in the same columns -
1463
    // and can be folded together.
1464
1465
    // Each safe pair is stored as two UChars in the safePair string.
1466
0
    UnicodeString safePairs;
1467
1468
0
    int32_t numCharClasses = fRB->fSetBuilder->getNumCharCategories();
1469
0
    int32_t numStates = fDStates->size();
1470
1471
0
    for (int32_t c1=0; c1<numCharClasses; ++c1) {
1472
0
        for (int32_t c2=0; c2 < numCharClasses; ++c2) {
1473
0
            int32_t wantedEndState = -1;
1474
0
            int32_t endState = 0;
1475
0
            for (int32_t startState = 1; startState < numStates; ++startState) {
1476
0
                RBBIStateDescriptor *startStateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(startState));
1477
0
                int32_t s2 = startStateD->fDtran->elementAti(c1);
1478
0
                RBBIStateDescriptor *s2StateD = static_cast<RBBIStateDescriptor *>(fDStates->elementAt(s2));
1479
0
                endState = s2StateD->fDtran->elementAti(c2);
1480
0
                if (wantedEndState < 0) {
1481
0
                    wantedEndState = endState;
1482
0
                } else {
1483
0
                    if (wantedEndState != endState) {
1484
0
                        break;
1485
0
                    }
1486
0
                }
1487
0
            }
1488
0
            if (wantedEndState == endState) {
1489
0
                safePairs.append(static_cast<char16_t>(c1));
1490
0
                safePairs.append(static_cast<char16_t>(c2));
1491
                // printf("(%d, %d) ", c1, c2);
1492
0
            }
1493
0
        }
1494
        // printf("\n");
1495
0
    }
1496
1497
    // Populate the initial safe table.
1498
    // The table as a whole is UVector<UnicodeString>
1499
    // Each row is represented by a UnicodeString, being used as a Vector<int16>.
1500
    // Row 0 is the stop state.
1501
    // Row 1 is the start state.
1502
    // Row 2 and beyond are other states, initially one per char class, but
1503
    //   after initial construction, many of the states will be combined, compacting the table.
1504
    // The String holds the nextState data only. The four leading fields of a row, fAccepting,
1505
    // fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
1506
1507
0
    U_ASSERT(fSafeTable == nullptr);
1508
0
    LocalPointer<UVector> lpSafeTable(
1509
0
        new UVector(uprv_deleteUObject, uhash_compareUnicodeString, numCharClasses + 2, status), status);
1510
0
    if (U_FAILURE(status)) {
1511
0
        return;
1512
0
    }
1513
0
    fSafeTable = lpSafeTable.orphan();
1514
0
    for (int32_t row=0; row<numCharClasses + 2; ++row) {
1515
0
        LocalPointer<UnicodeString> lpString(new UnicodeString(numCharClasses, 0, numCharClasses+4), status);
1516
0
        fSafeTable->adoptElement(lpString.orphan(), status);
1517
0
    }
1518
0
    if (U_FAILURE(status)) {
1519
0
        return;
1520
0
    }
1521
1522
    // From the start state, each input char class transitions to the state for that input.
1523
0
    UnicodeString &startState = *static_cast<UnicodeString *>(fSafeTable->elementAt(1));
1524
0
    for (int32_t charClass=0; charClass < numCharClasses; ++charClass) {
1525
        // Note: +2 for the start & stop state.
1526
0
        startState.setCharAt(charClass, static_cast<char16_t>(charClass+2));
1527
0
    }
1528
1529
    // Initially make every other state table row look like the start state row,
1530
0
    for (int32_t row=2; row<numCharClasses+2; ++row) {
1531
0
        UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(row));
1532
0
        rowState = startState;   // UnicodeString assignment, copies contents.
1533
0
    }
1534
1535
    // Run through the safe pairs, set the next state to zero when pair has been seen.
1536
    // Zero being the stop state, meaning we found a safe point.
1537
0
    for (int32_t pairIdx=0; pairIdx<safePairs.length(); pairIdx+=2) {
1538
0
        int32_t c1 = safePairs.charAt(pairIdx);
1539
0
        int32_t c2 = safePairs.charAt(pairIdx + 1);
1540
1541
0
        UnicodeString &rowState = *static_cast<UnicodeString *>(fSafeTable->elementAt(c2 + 2));
1542
0
        rowState.setCharAt(c1, 0);
1543
0
    }
1544
1545
    // Remove duplicate or redundant rows from the table.
1546
0
    IntPair states = {1, 0};
1547
0
    while (findDuplicateSafeState(&states)) {
1548
        // printf("Removing duplicate safe states (%d, %d)\n", states.first, states.second);
1549
0
        removeSafeState(states);
1550
0
    }
1551
0
}
1552
1553
1554
//-----------------------------------------------------------------------------
1555
//
1556
//   getSafeTableSize()    Calculate the size of the runtime form of this
1557
//                         safe state table.
1558
//
1559
//-----------------------------------------------------------------------------
1560
0
int32_t  RBBITableBuilder::getSafeTableSize() const {
1561
0
    int32_t    size = 0;
1562
0
    int32_t    numRows;
1563
0
    int32_t    numCols;
1564
0
    int32_t    rowSize;
1565
1566
0
    if (fSafeTable == nullptr) {
1567
0
        return 0;
1568
0
    }
1569
1570
0
    size    = offsetof(RBBIStateTable, fTableData);    // The header, with no rows to the table.
1571
1572
0
    numRows = fSafeTable->size();
1573
0
    numCols = fRB->fSetBuilder->getNumCharCategories();
1574
1575
0
    if (use8BitsForSafeTable()) {
1576
0
        rowSize = offsetof(RBBIStateTableRow8, fNextState) + sizeof(int8_t)*numCols;
1577
0
    } else {
1578
0
        rowSize = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t)*numCols;
1579
0
    }
1580
0
    size   += numRows * rowSize;
1581
0
    return size;
1582
0
}
1583
1584
0
bool RBBITableBuilder::use8BitsForSafeTable() const {
1585
0
    return fSafeTable->size() <= kMaxStateFor8BitsTable;
1586
0
}
1587
1588
//-----------------------------------------------------------------------------
1589
//
1590
//   exportSafeTable()   export the state transition table in the format required
1591
//                       by the runtime engine.  getTableSize() bytes of memory
1592
//                       must be available at the output address "where".
1593
//
1594
//-----------------------------------------------------------------------------
1595
0
void RBBITableBuilder::exportSafeTable(void *where) {
1596
0
    RBBIStateTable* table = static_cast<RBBIStateTable*>(where);
1597
0
    uint32_t           state;
1598
0
    int                col;
1599
1600
0
    if (U_FAILURE(*fStatus) || fSafeTable == nullptr) {
1601
0
        return;
1602
0
    }
1603
1604
0
    int32_t catCount = fRB->fSetBuilder->getNumCharCategories();
1605
0
    if (catCount > 0x7fff ||
1606
0
            fSafeTable->size() > 0x7fff) {
1607
0
        *fStatus = U_BRK_INTERNAL_ERROR;
1608
0
        return;
1609
0
    }
1610
1611
0
    table->fNumStates = fSafeTable->size();
1612
0
    table->fFlags     = 0;
1613
0
    if (use8BitsForSafeTable()) {
1614
0
        table->fRowLen    = offsetof(RBBIStateTableRow8, fNextState) + sizeof(uint8_t) * catCount;
1615
0
        table->fFlags  |= RBBI_8BITS_ROWS;
1616
0
    } else {
1617
0
        table->fRowLen    = offsetof(RBBIStateTableRow16, fNextState) + sizeof(int16_t) * catCount;
1618
0
    }
1619
1620
0
    for (state=0; state<table->fNumStates; state++) {
1621
0
        UnicodeString* rowString = static_cast<UnicodeString*>(fSafeTable->elementAt(state));
1622
0
        RBBIStateTableRow* row = reinterpret_cast<RBBIStateTableRow*>(table->fTableData + state * table->fRowLen);
1623
0
        if (use8BitsForSafeTable()) {
1624
0
            RBBIStateTableRow8* r8 = reinterpret_cast<RBBIStateTableRow8*>(row);
1625
0
            r8->fAccepting = 0;
1626
0
            r8->fLookAhead = 0;
1627
0
            r8->fTagsIdx    = 0;
1628
0
            for (col=0; col<catCount; col++) {
1629
0
                U_ASSERT(rowString->charAt(col) <= kMaxStateFor8BitsTable);
1630
0
                r8->fNextState[col] = static_cast<uint8_t>(rowString->charAt(col));
1631
0
            }
1632
0
        } else {
1633
0
            row->r16.fAccepting = 0;
1634
0
            row->r16.fLookAhead = 0;
1635
0
            row->r16.fTagsIdx    = 0;
1636
0
            for (col=0; col<catCount; col++) {
1637
0
                row->r16.fNextState[col] = rowString->charAt(col);
1638
0
            }
1639
0
        }
1640
0
    }
1641
0
}
1642
1643
1644
1645
1646
//-----------------------------------------------------------------------------
1647
//
1648
//   printSet    Debug function.   Print the contents of a UVector
1649
//
1650
//-----------------------------------------------------------------------------
1651
#ifdef RBBI_DEBUG
1652
void RBBITableBuilder::printSet(UVector *s) {
1653
    int32_t  i;
1654
    for (i=0; i<s->size(); i++) {
1655
        const RBBINode *v = static_cast<const RBBINode *>(s->elementAt(i));
1656
        RBBIDebugPrintf("%5d", v==nullptr? -1 : v->fSerialNum);
1657
    }
1658
    RBBIDebugPrintf("\n");
1659
}
1660
#endif
1661
1662
1663
//-----------------------------------------------------------------------------
1664
//
1665
//   printStates    Debug Function.  Dump the fully constructed state transition table.
1666
//
1667
//-----------------------------------------------------------------------------
1668
#ifdef RBBI_DEBUG
1669
void RBBITableBuilder::printStates() {
1670
    int     c;    // input "character"
1671
    int     n;    // state number
1672
1673
    RBBIDebugPrintf("state |           i n p u t     s y m b o l s \n");
1674
    RBBIDebugPrintf("      | Acc  LA    Tag");
1675
    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1676
        RBBIDebugPrintf(" %3d", c);
1677
    }
1678
    RBBIDebugPrintf("\n");
1679
    RBBIDebugPrintf("      |---------------");
1680
    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1681
        RBBIDebugPrintf("----");
1682
    }
1683
    RBBIDebugPrintf("\n");
1684
1685
    for (n=0; n<fDStates->size(); n++) {
1686
        RBBIStateDescriptor *sd = (RBBIStateDescriptor *)fDStates->elementAt(n);
1687
        RBBIDebugPrintf("  %3d | " , n);
1688
        RBBIDebugPrintf("%3d %3d %5d ", sd->fAccepting, sd->fLookAhead, sd->fTagsIdx);
1689
        for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1690
            RBBIDebugPrintf(" %3d", sd->fDtran->elementAti(c));
1691
        }
1692
        RBBIDebugPrintf("\n");
1693
    }
1694
    RBBIDebugPrintf("\n\n");
1695
}
1696
#endif
1697
1698
1699
//-----------------------------------------------------------------------------
1700
//
1701
//   printSafeTable    Debug Function.  Dump the fully constructed safe table.
1702
//
1703
//-----------------------------------------------------------------------------
1704
#ifdef RBBI_DEBUG
1705
void RBBITableBuilder::printReverseTable() {
1706
    int     c;    // input "character"
1707
    int     n;    // state number
1708
1709
    RBBIDebugPrintf("    Safe Reverse Table \n");
1710
    if (fSafeTable == nullptr) {
1711
        RBBIDebugPrintf("   --- nullptr ---\n");
1712
        return;
1713
    }
1714
    RBBIDebugPrintf("state |           i n p u t     s y m b o l s \n");
1715
    RBBIDebugPrintf("      | Acc  LA    Tag");
1716
    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1717
        RBBIDebugPrintf(" %2d", c);
1718
    }
1719
    RBBIDebugPrintf("\n");
1720
    RBBIDebugPrintf("      |---------------");
1721
    for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1722
        RBBIDebugPrintf("---");
1723
    }
1724
    RBBIDebugPrintf("\n");
1725
1726
    for (n=0; n<fSafeTable->size(); n++) {
1727
        UnicodeString *rowString = (UnicodeString *)fSafeTable->elementAt(n);
1728
        RBBIDebugPrintf("  %3d | " , n);
1729
        RBBIDebugPrintf("%3d %3d %5d ", 0, 0, 0);  // Accepting, LookAhead, Tags
1730
        for (c=0; c<fRB->fSetBuilder->getNumCharCategories(); c++) {
1731
            RBBIDebugPrintf(" %2d", rowString->charAt(c));
1732
        }
1733
        RBBIDebugPrintf("\n");
1734
    }
1735
    RBBIDebugPrintf("\n\n");
1736
}
1737
#endif
1738
1739
1740
1741
//-----------------------------------------------------------------------------
1742
//
1743
//   printRuleStatusTable    Debug Function.  Dump the common rule status table
1744
//
1745
//-----------------------------------------------------------------------------
1746
#ifdef RBBI_DEBUG
1747
void RBBITableBuilder::printRuleStatusTable() {
1748
    int32_t  thisRecord = 0;
1749
    int32_t  nextRecord = 0;
1750
    int      i;
1751
    UVector  *tbl = fRB->fRuleStatusVals;
1752
1753
    RBBIDebugPrintf("index |  tags \n");
1754
    RBBIDebugPrintf("-------------------\n");
1755
1756
    while (nextRecord < tbl->size()) {
1757
        thisRecord = nextRecord;
1758
        nextRecord = thisRecord + tbl->elementAti(thisRecord) + 1;
1759
        RBBIDebugPrintf("%4d   ", thisRecord);
1760
        for (i=thisRecord+1; i<nextRecord; i++) {
1761
            RBBIDebugPrintf("  %5d", tbl->elementAti(i));
1762
        }
1763
        RBBIDebugPrintf("\n");
1764
    }
1765
    RBBIDebugPrintf("\n\n");
1766
}
1767
#endif
1768
1769
1770
//-----------------------------------------------------------------------------
1771
//
1772
//   RBBIStateDescriptor     Methods.  This is a very struct-like class
1773
//                           Most access is directly to the fields.
1774
//
1775
//-----------------------------------------------------------------------------
1776
1777
0
RBBIStateDescriptor::RBBIStateDescriptor(int lastInputSymbol, UErrorCode *fStatus) {
1778
0
    fMarked    = false;
1779
0
    fAccepting = 0;
1780
0
    fLookAhead = 0;
1781
0
    fTagsIdx   = 0;
1782
0
    fTagVals   = nullptr;
1783
0
    fPositions = nullptr;
1784
0
    fDtran     = nullptr;
1785
1786
0
    fDtran     = new UVector32(lastInputSymbol+1, *fStatus);
1787
0
    if (U_FAILURE(*fStatus)) {
1788
0
        return;
1789
0
    }
1790
0
    if (fDtran == nullptr) {
1791
0
        *fStatus = U_MEMORY_ALLOCATION_ERROR;
1792
0
        return;
1793
0
    }
1794
0
    fDtran->setSize(lastInputSymbol+1);    // fDtran needs to be pre-sized.
1795
                                           //   It is indexed by input symbols, and will
1796
                                           //   hold  the next state number for each
1797
                                           //   symbol.
1798
0
}
1799
1800
1801
0
RBBIStateDescriptor::~RBBIStateDescriptor() {
1802
0
    delete       fPositions;
1803
0
    delete       fDtran;
1804
0
    delete       fTagVals;
1805
0
    fPositions = nullptr;
1806
0
    fDtran     = nullptr;
1807
0
    fTagVals   = nullptr;
1808
0
}
1809
1810
U_NAMESPACE_END
1811
1812
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */