Coverage Report

Created: 2026-05-06 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/rbbinode.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
***************************************************************************
5
*   Copyright (C) 2002-2016 International Business Machines Corporation   *
6
*   and others. All rights reserved.                                      *
7
***************************************************************************
8
*/
9
10
//
11
//  File:  rbbinode.cpp
12
//
13
//         Implementation of class RBBINode, which represents a node in the
14
//         tree generated when parsing the Rules Based Break Iterator rules.
15
//
16
//         This "Class" is actually closer to a struct.
17
//         Code using it is expected to directly access fields much of the time.
18
//
19
20
#include "unicode/utypes.h"
21
22
#if !UCONFIG_NO_BREAK_ITERATION
23
24
#include "unicode/unistr.h"
25
#include "unicode/uniset.h"
26
#include "unicode/uchar.h"
27
#include "unicode/parsepos.h"
28
29
#include "cstr.h"
30
#include "uvector.h"
31
32
#include "rbbirb.h"
33
#include "rbbinode.h"
34
35
#include "uassert.h"
36
37
38
U_NAMESPACE_BEGIN
39
40
#ifdef RBBI_DEBUG
41
static int  gLastSerial = 0;
42
#endif
43
44
45
//-------------------------------------------------------------------------
46
//
47
//    Constructor.   Just set the fields to reasonable default values.
48
//
49
//-------------------------------------------------------------------------
50
6.53M
RBBINode::RBBINode(NodeType t, UErrorCode& status) : UMemory() {
51
6.53M
    if (U_FAILURE(status)) {
52
0
        return;
53
0
    }
54
#ifdef RBBI_DEBUG
55
    fSerialNum    = ++gLastSerial;
56
#endif
57
6.53M
    fType         = t;
58
6.53M
    fParent       = nullptr;
59
6.53M
    fLeftChild    = nullptr;
60
6.53M
    fRightChild   = nullptr;
61
6.53M
    fInputSet     = nullptr;
62
6.53M
    fFirstPos     = 0;
63
6.53M
    fLastPos      = 0;
64
6.53M
    fNullable     = false;
65
6.53M
    fLookAheadEnd = false;
66
6.53M
    fRuleRoot     = false;
67
6.53M
    fChainIn      = false;
68
6.53M
    fVal          = 0;
69
6.53M
    fPrecedence   = precZero;
70
71
6.53M
    fFirstPosSet  = new UVector(status);
72
6.53M
    fLastPosSet   = new UVector(status);
73
6.53M
    fFollowPos    = new UVector(status);
74
6.53M
    if (U_SUCCESS(status) &&
75
6.53M
        (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
76
0
        status =  U_MEMORY_ALLOCATION_ERROR;
77
0
    }
78
6.53M
    if      (t==opCat)    {fPrecedence = precOpCat;}
79
3.57M
    else if (t==opOr)     {fPrecedence = precOpOr;}
80
3.42M
    else if (t==opStart)  {fPrecedence = precStart;}
81
3.36M
    else if (t==opLParen) {fPrecedence = precLParen;}
82
83
6.53M
}
84
85
86
1.09M
RBBINode::RBBINode(const RBBINode &other, UErrorCode& status) : UMemory(other) {
87
1.09M
    if (U_FAILURE(status)) {
88
0
        return;
89
0
    }
90
#ifdef RBBI_DEBUG
91
    fSerialNum   = ++gLastSerial;
92
#endif
93
1.09M
    fType        = other.fType;
94
1.09M
    fParent      = nullptr;
95
1.09M
    fLeftChild   = nullptr;
96
1.09M
    fRightChild  = nullptr;
97
1.09M
    fInputSet    = other.fInputSet;
98
1.09M
    fPrecedence  = other.fPrecedence;
99
1.09M
    fText        = other.fText;
100
1.09M
    fFirstPos    = other.fFirstPos;
101
1.09M
    fLastPos     = other.fLastPos;
102
1.09M
    fNullable    = other.fNullable;
103
1.09M
    fVal         = other.fVal;
104
1.09M
    fRuleRoot    = false;
105
1.09M
    fChainIn     = other.fChainIn;
106
1.09M
    fFirstPosSet = new UVector(status);   // TODO - get a real status from somewhere
107
1.09M
    fLastPosSet  = new UVector(status);
108
1.09M
    fFollowPos   = new UVector(status);
109
1.09M
    if (U_SUCCESS(status) &&
110
1.09M
        (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
111
0
        status =  U_MEMORY_ALLOCATION_ERROR;
112
0
    }
113
1.09M
}
114
115
116
//-------------------------------------------------------------------------
117
//
118
//    Destructor.   Deletes both this node AND any child nodes,
119
//                  except in the case of variable reference nodes.  For
120
//                  these, the l. child points back to the definition, which
121
//                  is common for all references to the variable, meaning
122
//                  it can't be deleted here.
123
//
124
//-------------------------------------------------------------------------
125
7.63M
RBBINode::~RBBINode() {
126
    // printf("deleting node %8x   serial %4d\n", this, this->fSerialNum);
127
7.63M
    delete fInputSet;
128
7.63M
    fInputSet = nullptr;
129
130
7.63M
    switch (this->fType) {
131
1.31k
    case varRef:
132
3.02M
    case setRef:
133
        // for these node types, multiple instances point to the same "children"
134
        // Storage ownership of children handled elsewhere.  Don't delete here.
135
3.02M
        break;
136
137
4.60M
    default:
138
        // Avoid using a recursive implementation because of stack overflow problems.
139
        // See bug ICU-22584.
140
        // delete        fLeftChild;
141
4.60M
        NRDeleteNode(fLeftChild);
142
4.60M
        fLeftChild =   nullptr;
143
        // delete        fRightChild;
144
4.60M
        NRDeleteNode(fRightChild);
145
4.60M
        fRightChild = nullptr;
146
7.63M
    }
147
148
7.63M
    delete fFirstPosSet;
149
7.63M
    delete fLastPosSet;
150
7.63M
    delete fFollowPos;
151
7.63M
}
152
153
/**
154
 * Non-recursive delete of a node + its children. Used from the node destructor
155
 * instead of the more obvious recursive implementation to avoid problems with
156
 * stack overflow with some perverse test rule data (from fuzzing).
157
 */
158
9.21M
void RBBINode::NRDeleteNode(RBBINode *node) {
159
9.21M
    if (node == nullptr) {
160
9.10M
        return;
161
9.10M
    }
162
163
105k
    RBBINode *stopNode = node->fParent;
164
105k
    RBBINode *nextNode = node;
165
14.2M
    while (nextNode != stopNode && nextNode != nullptr) {
166
14.1M
        RBBINode *currentNode = nextNode;
167
168
14.1M
        if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) ||
169
9.84M
                currentNode->fType == varRef ||      // varRef and setRef nodes do not
170
9.84M
                currentNode->fType == setRef) {      // own their children nodes.
171
            // CurrentNode is effectively a leaf node; it's safe to go ahead and delete it.
172
7.14M
            nextNode = currentNode->fParent;
173
7.14M
            if (nextNode) {
174
7.14M
                if (nextNode->fLeftChild == currentNode) {
175
3.62M
                    nextNode->fLeftChild = nullptr;
176
3.62M
                } else if (nextNode->fRightChild == currentNode) {
177
3.52M
                    nextNode->fRightChild = nullptr;
178
3.52M
                }
179
7.14M
            }
180
7.14M
            delete currentNode;
181
7.14M
        } else if (currentNode->fLeftChild) {
182
3.52M
            nextNode = currentNode->fLeftChild;
183
3.52M
            if (nextNode->fParent == nullptr) {
184
772
                nextNode->fParent = currentNode;
185
                // fParent isn't always set; do it now if not.
186
772
            }
187
3.52M
            U_ASSERT(nextNode->fParent == currentNode);
188
3.52M
        } else if (currentNode->fRightChild) {
189
3.51M
            nextNode = currentNode->fRightChild;
190
3.51M
            if (nextNode->fParent == nullptr) {
191
820
                nextNode->fParent = currentNode;
192
                // fParent isn't always set; do it now if not.
193
820
            }
194
3.51M
            U_ASSERT(nextNode->fParent == currentNode);
195
3.51M
        }
196
14.1M
    }
197
105k
}
198
199
//-------------------------------------------------------------------------
200
//
201
//    cloneTree     Make a copy of the subtree rooted at this node.
202
//                  Discard any variable references encountered along the way,
203
//                  and replace with copies of the variable's definitions.
204
//                  Used to replicate the expression underneath variable
205
//                  references in preparation for generating the DFA tables.
206
//
207
//-------------------------------------------------------------------------
208
constexpr int kRecursiveDepthLimit = 3500;
209
1.11M
RBBINode *RBBINode::cloneTree(UErrorCode &status, int depth) {
210
1.11M
    if (U_FAILURE(status)) {
211
0
        return nullptr;
212
0
    }
213
    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
214
    // to avoid stack overflow crash.
215
1.11M
    if (depth > kRecursiveDepthLimit) {
216
22
        status = U_INPUT_TOO_LONG_ERROR;
217
22
        return nullptr;
218
22
    }
219
1.11M
    RBBINode    *n;
220
221
1.11M
    if (fType == RBBINode::varRef) {
222
        // If the current node is a variable reference, skip over it
223
        //   and clone the definition of the variable instead.
224
315
        n = fLeftChild->cloneTree(status, depth+1);
225
315
        if (U_FAILURE(status)) {
226
22
            return nullptr;
227
22
        }
228
1.11M
    } else if (fType == RBBINode::uset) {
229
19.0k
        n = this;
230
1.09M
    } else {
231
1.09M
        n = new RBBINode(*this, status);
232
1.09M
        if (U_FAILURE(status)) {
233
0
            delete n;
234
0
            return nullptr;
235
0
        }
236
        // Check for null pointer.
237
1.09M
        if (n == nullptr) {
238
0
            status =  U_MEMORY_ALLOCATION_ERROR;
239
0
            return nullptr;
240
0
        }
241
1.09M
        if (fLeftChild != nullptr) {
242
488k
            n->fLeftChild          = fLeftChild->cloneTree(status, depth+1);
243
488k
            if (U_FAILURE(status)) {
244
57.5k
                delete n;
245
57.5k
                return nullptr;
246
57.5k
            }
247
431k
            n->fLeftChild->fParent = n;
248
431k
        }
249
1.04M
        if (fRightChild != nullptr) {
250
412k
            n->fRightChild          = fRightChild->cloneTree(status, depth+1);
251
412k
            if (U_FAILURE(status)) {
252
79
                delete n;
253
79
                return nullptr;
254
79
            }
255
412k
            n->fRightChild->fParent = n;
256
412k
        }
257
1.04M
    }
258
1.05M
    return n;
259
1.11M
}
260
261
262
263
//-------------------------------------------------------------------------
264
//
265
//   flattenVariables   Walk a parse tree, replacing any variable
266
//                      references with a copy of the variable's definition.
267
//                      Aside from variables, the tree is not changed.
268
//
269
//                      Return the root of the tree.  If the root was not a variable
270
//                      reference, it remains unchanged - the root we started with
271
//                      is the root we return.  If, however, the root was a variable
272
//                      reference, the root of the newly cloned replacement tree will
273
//                      be returned, and the original tree deleted.
274
//
275
//                      This function works by recursively walking the tree
276
//                      without doing anything until a variable reference is
277
//                      found, then calling cloneTree() at that point.  Any
278
//                      nested references are handled by cloneTree(), not here.
279
//
280
//-------------------------------------------------------------------------
281
1.76M
RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
282
1.76M
    if (U_FAILURE(status)) {
283
0
        return this;
284
0
    }
285
    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
286
    // to avoid stack overflow crash.
287
1.76M
    if (depth > kRecursiveDepthLimit) {
288
20
        status = U_INPUT_TOO_LONG_ERROR;
289
20
        return this;
290
20
    }
291
1.76M
    if (fType == varRef) {
292
315
        RBBINode *retNode  = fLeftChild->cloneTree(status, depth+1);
293
315
        if (U_FAILURE(status)) {
294
22
            return this;
295
22
        }
296
293
        retNode->fRuleRoot = this->fRuleRoot;
297
293
        retNode->fChainIn  = this->fChainIn;
298
293
        delete this;   // TODO: undefined behavior. Fix.
299
293
        return retNode;
300
315
    }
301
302
1.76M
    if (fLeftChild != nullptr) {
303
1.13M
        fLeftChild = fLeftChild->flattenVariables(status, depth+1);
304
1.13M
        if (fLeftChild == nullptr) {
305
0
            status = U_MEMORY_ALLOCATION_ERROR;
306
0
        }
307
1.13M
        if (U_FAILURE(status)) {
308
89.2k
            return this;
309
89.2k
        }
310
1.04M
        fLeftChild->fParent  = this;
311
1.04M
    }
312
1.67M
    if (fRightChild != nullptr) {
313
627k
        fRightChild = fRightChild->flattenVariables(status, depth+1);
314
627k
        if (fRightChild == nullptr) {
315
0
            status = U_MEMORY_ALLOCATION_ERROR;
316
0
        }
317
627k
        if (U_FAILURE(status)) {
318
82
            return this;
319
82
        }
320
627k
        fRightChild->fParent = this;
321
627k
    }
322
1.67M
    return this;
323
1.67M
}
324
325
326
//-------------------------------------------------------------------------
327
//
328
//  flattenSets    Walk the parse tree, replacing any nodes of type setRef
329
//                 with a copy of the expression tree for the set.  A set's
330
//                 equivalent expression tree is precomputed and saved as
331
//                 the left child of the uset node.
332
//
333
//-------------------------------------------------------------------------
334
272k
void RBBINode::flattenSets(UErrorCode &status, int depth) {
335
272k
    if (U_FAILURE(status)) {
336
0
        return;
337
0
    }
338
    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
339
    // to avoid stack overflow crash.
340
272k
    if (depth > kRecursiveDepthLimit) {
341
0
        status = U_INPUT_TOO_LONG_ERROR;
342
0
        return;
343
0
    }
344
272k
    U_ASSERT(fType != setRef);
345
346
272k
    if (fLeftChild != nullptr) {
347
243k
        if (fLeftChild->fType==setRef) {
348
36.5k
            RBBINode *setRefNode = fLeftChild;
349
36.5k
            RBBINode *usetNode   = setRefNode->fLeftChild;
350
36.5k
            RBBINode *replTree   = usetNode->fLeftChild;
351
36.5k
            fLeftChild           = replTree->cloneTree(status, depth+1);
352
36.5k
            if (U_FAILURE(status)) {
353
0
                delete setRefNode;
354
0
                return;
355
0
            }
356
36.5k
            fLeftChild->fParent  = this;
357
36.5k
            delete setRefNode;
358
207k
        } else {
359
207k
            fLeftChild->flattenSets(status, depth+1);
360
207k
        }
361
243k
    }
362
363
272k
    if (fRightChild != nullptr) {
364
241k
        if (fRightChild->fType==setRef) {
365
178k
            RBBINode *setRefNode = fRightChild;
366
178k
            RBBINode *usetNode   = setRefNode->fLeftChild;
367
178k
            RBBINode *replTree   = usetNode->fLeftChild;
368
178k
            fRightChild           = replTree->cloneTree(status, depth+1);
369
178k
            if (U_FAILURE(status)) {
370
0
                delete setRefNode;
371
0
                return;
372
0
            }
373
178k
            fRightChild->fParent  = this;
374
178k
            delete setRefNode;
375
178k
        } else {
376
62.4k
            fRightChild->flattenSets(status, depth+1);
377
62.4k
        }
378
241k
    }
379
272k
}
380
381
382
383
//-------------------------------------------------------------------------
384
//
385
//   findNodes()     Locate all the nodes of the specified type, starting
386
//                   at the specified root.
387
//
388
//-------------------------------------------------------------------------
389
4.13M
void   RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
390
    /* test for buffer overflows */
391
4.13M
    if (U_FAILURE(status)) {
392
0
        return;
393
0
    }
394
4.13M
    U_ASSERT(!dest->hasDeleter());
395
4.13M
    if (fType == kind) {
396
181k
        dest->addElement(this, status);
397
181k
    }
398
4.13M
    if (fLeftChild != nullptr) {
399
2.06M
        fLeftChild->findNodes(dest, kind, status);
400
2.06M
    }
401
4.13M
    if (fRightChild != nullptr) {
402
2.05M
        fRightChild->findNodes(dest, kind, status);
403
2.05M
    }
404
4.13M
}
405
406
407
//-------------------------------------------------------------------------
408
//
409
//    print.         Print out a single node, for debugging.
410
//
411
//-------------------------------------------------------------------------
412
#ifdef RBBI_DEBUG
413
414
static int32_t serial(const RBBINode *node) {
415
    return (node == nullptr? -1 : node->fSerialNum);
416
}
417
418
419
void RBBINode::printNode(const RBBINode *node) {
420
    static const char * const nodeTypeNames[] = {
421
                "setRef",
422
                "uset",
423
                "varRef",
424
                "leafChar",
425
                "lookAhead",
426
                "tag",
427
                "endMark",
428
                "opStart",
429
                "opCat",
430
                "opOr",
431
                "opStar",
432
                "opPlus",
433
                "opQuestion",
434
                "opBreak",
435
                "opReverse",
436
                "opLParen"
437
    };
438
439
    if (node==nullptr) {
440
        RBBIDebugPrintf("%10p", (void *)node);
441
    } else {
442
        RBBIDebugPrintf("%10p %5d %12s %c%c  %5d       %5d     %5d       %6d     %d ",
443
            (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
444
            node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
445
            serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
446
            node->fFirstPos, node->fVal);
447
        if (node->fType == varRef) {
448
            RBBI_DEBUG_printUnicodeString(node->fText);
449
        }
450
    }
451
    RBBIDebugPrintf("\n");
452
}
453
#endif
454
455
456
#ifdef RBBI_DEBUG
457
U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
458
    RBBIDebugPrintf("%*s", minWidth, CStr(s)());
459
}
460
#endif
461
462
463
//-------------------------------------------------------------------------
464
//
465
//    print.         Print out the tree of nodes rooted at "this"
466
//
467
//-------------------------------------------------------------------------
468
#ifdef RBBI_DEBUG
469
void RBBINode::printNodeHeader() {
470
    RBBIDebugPrintf(" Address   serial        type     LeftChild  RightChild   Parent   position value\n");
471
}
472
    
473
void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
474
    if (printHeading) {
475
        printNodeHeader();
476
    }
477
    printNode(node);
478
    if (node != nullptr) {
479
        // Only dump the definition under a variable reference if asked to.
480
        // Unconditionally dump children of all other node types.
481
        if (node->fType != varRef) {
482
            if (node->fLeftChild != nullptr) {
483
                printTree(node->fLeftChild, false);
484
            }
485
            
486
            if (node->fRightChild != nullptr) {
487
                printTree(node->fRightChild, false);
488
            }
489
        }
490
    }
491
}
492
#endif
493
494
495
496
U_NAMESPACE_END
497
498
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */