Coverage Report

Created: 2025-10-24 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/rbbinode.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
***************************************************************************
5
*   Copyright (C) 2002-2016 International Business Machines Corporation   *
6
*   and others. All rights reserved.                                      *
7
***************************************************************************
8
*/
9
10
//
11
//  File:  rbbinode.cpp
12
//
13
//         Implementation of class RBBINode, which represents a node in the
14
//         tree generated when parsing the Rules Based Break Iterator rules.
15
//
16
//         This "Class" is actually closer to a struct.
17
//         Code using it is expected to directly access fields much of the time.
18
//
19
20
#include "unicode/utypes.h"
21
22
#if !UCONFIG_NO_BREAK_ITERATION
23
24
#include "unicode/unistr.h"
25
#include "unicode/uniset.h"
26
#include "unicode/uchar.h"
27
#include "unicode/parsepos.h"
28
29
#include "cstr.h"
30
#include "uvector.h"
31
32
#include "rbbirb.h"
33
#include "rbbinode.h"
34
35
#include "uassert.h"
36
37
38
U_NAMESPACE_BEGIN
39
40
#ifdef RBBI_DEBUG
41
static int  gLastSerial = 0;
42
#endif
43
44
45
//-------------------------------------------------------------------------
46
//
47
//    Constructor.   Just set the fields to reasonable default values.
48
//
49
//-------------------------------------------------------------------------
50
11.5M
RBBINode::RBBINode(NodeType t, UErrorCode& status) : UMemory() {
51
11.5M
    if (U_FAILURE(status)) {
52
0
        return;
53
0
    }
54
#ifdef RBBI_DEBUG
55
    fSerialNum    = ++gLastSerial;
56
#endif
57
11.5M
    fType         = t;
58
11.5M
    fParent       = nullptr;
59
11.5M
    fLeftChild    = nullptr;
60
11.5M
    fRightChild   = nullptr;
61
11.5M
    fInputSet     = nullptr;
62
11.5M
    fFirstPos     = 0;
63
11.5M
    fLastPos      = 0;
64
11.5M
    fNullable     = false;
65
11.5M
    fLookAheadEnd = false;
66
11.5M
    fRuleRoot     = false;
67
11.5M
    fChainIn      = false;
68
11.5M
    fVal          = 0;
69
11.5M
    fPrecedence   = precZero;
70
71
11.5M
    fFirstPosSet  = new UVector(status);
72
11.5M
    fLastPosSet   = new UVector(status);
73
11.5M
    fFollowPos    = new UVector(status);
74
11.5M
    if (U_SUCCESS(status) &&
75
11.5M
        (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
76
0
        status =  U_MEMORY_ALLOCATION_ERROR;
77
0
    }
78
11.5M
    if      (t==opCat)    {fPrecedence = precOpCat;}
79
6.08M
    else if (t==opOr)     {fPrecedence = precOpOr;}
80
5.94M
    else if (t==opStart)  {fPrecedence = precStart;}
81
5.90M
    else if (t==opLParen) {fPrecedence = precLParen;}
82
83
11.5M
}
84
85
86
907k
RBBINode::RBBINode(const RBBINode &other, UErrorCode& status) : UMemory(other) {
87
907k
    if (U_FAILURE(status)) {
88
0
        return;
89
0
    }
90
#ifdef RBBI_DEBUG
91
    fSerialNum   = ++gLastSerial;
92
#endif
93
907k
    fType        = other.fType;
94
907k
    fParent      = nullptr;
95
907k
    fLeftChild   = nullptr;
96
907k
    fRightChild  = nullptr;
97
907k
    fInputSet    = other.fInputSet;
98
907k
    fPrecedence  = other.fPrecedence;
99
907k
    fText        = other.fText;
100
907k
    fFirstPos    = other.fFirstPos;
101
907k
    fLastPos     = other.fLastPos;
102
907k
    fNullable    = other.fNullable;
103
907k
    fVal         = other.fVal;
104
907k
    fRuleRoot    = false;
105
907k
    fChainIn     = other.fChainIn;
106
907k
    fFirstPosSet = new UVector(status);   // TODO - get a real status from somewhere
107
907k
    fLastPosSet  = new UVector(status);
108
907k
    fFollowPos   = new UVector(status);
109
907k
    if (U_SUCCESS(status) &&
110
907k
        (fFirstPosSet == nullptr || fLastPosSet == nullptr || fFollowPos == nullptr)) {
111
0
        status =  U_MEMORY_ALLOCATION_ERROR;
112
0
    }
113
907k
}
114
115
116
//-------------------------------------------------------------------------
117
//
118
//    Destructor.   Deletes both this node AND any child nodes,
119
//                  except in the case of variable reference nodes.  For
120
//                  these, the l. child points back to the definition, which
121
//                  is common for all references to the variable, meaning
122
//                  it can't be deleted here.
123
//
124
//-------------------------------------------------------------------------
125
12.4M
RBBINode::~RBBINode() {
126
    // printf("deleting node %8x   serial %4d\n", this, this->fSerialNum);
127
12.4M
    delete fInputSet;
128
12.4M
    fInputSet = nullptr;
129
130
12.4M
    switch (this->fType) {
131
2.25k
    case varRef:
132
5.54M
    case setRef:
133
        // for these node types, multiple instances point to the same "children"
134
        // Storage ownership of children handled elsewhere.  Don't delete here.
135
5.54M
        break;
136
137
6.93M
    default:
138
        // Avoid using a recursive implementation because of stack overflow problems.
139
        // See bug ICU-22584.
140
        // delete        fLeftChild;
141
6.93M
        NRDeleteNode(fLeftChild);
142
6.93M
        fLeftChild =   nullptr;
143
        // delete        fRightChild;
144
6.93M
        NRDeleteNode(fRightChild);
145
6.93M
        fRightChild = nullptr;
146
12.4M
    }
147
148
12.4M
    delete fFirstPosSet;
149
12.4M
    delete fLastPosSet;
150
12.4M
    delete fFollowPos;
151
12.4M
}
152
153
/**
154
 * Non-recursive delete of a node + its children. Used from the node destructor
155
 * instead of the more obvious recursive implementation to avoid problems with
156
 * stack overflow with some perverse test rule data (from fuzzing).
157
 */
158
13.8M
void RBBINode::NRDeleteNode(RBBINode *node) {
159
13.8M
    if (node == nullptr) {
160
13.7M
        return;
161
13.7M
    }
162
163
111k
    RBBINode *stopNode = node->fParent;
164
111k
    RBBINode *nextNode = node;
165
24.0M
    while (nextNode != stopNode && nextNode != nullptr) {
166
23.9M
        RBBINode *currentNode = nextNode;
167
168
23.9M
        if ((currentNode->fLeftChild == nullptr && currentNode->fRightChild == nullptr) ||
169
17.2M
                currentNode->fType == varRef ||      // varRef and setRef nodes do not
170
17.2M
                currentNode->fType == setRef) {      // own their children nodes.
171
            // CurrentNode is effectively a leaf node; it's safe to go ahead and delete it.
172
12.0M
            nextNode = currentNode->fParent;
173
12.0M
            if (nextNode) {
174
12.0M
                if (nextNode->fLeftChild == currentNode) {
175
6.05M
                    nextNode->fLeftChild = nullptr;
176
6.05M
                } else if (nextNode->fRightChild == currentNode) {
177
5.95M
                    nextNode->fRightChild = nullptr;
178
5.95M
                }
179
12.0M
            }
180
12.0M
            delete currentNode;
181
12.0M
        } else if (currentNode->fLeftChild) {
182
5.94M
            nextNode = currentNode->fLeftChild;
183
5.94M
            if (nextNode->fParent == nullptr) {
184
480
                nextNode->fParent = currentNode;
185
                // fParent isn't always set; do it now if not.
186
480
            }
187
5.94M
            U_ASSERT(nextNode->fParent == currentNode);
188
5.94M
        } else if (currentNode->fRightChild) {
189
5.94M
            nextNode = currentNode->fRightChild;
190
5.94M
            if (nextNode->fParent == nullptr) {
191
537
                nextNode->fParent = currentNode;
192
                // fParent isn't always set; do it now if not.
193
537
            }
194
5.94M
            U_ASSERT(nextNode->fParent == currentNode);
195
5.94M
        }
196
23.9M
    }
197
111k
}
198
199
//-------------------------------------------------------------------------
200
//
201
//    cloneTree     Make a copy of the subtree rooted at this node.
202
//                  Discard any variable references encountered along the way,
203
//                  and replace with copies of the variable's definitions.
204
//                  Used to replicate the expression underneath variable
205
//                  references in preparation for generating the DFA tables.
206
//
207
//-------------------------------------------------------------------------
208
constexpr int kRecursiveDepthLimit = 3500;
209
924k
RBBINode *RBBINode::cloneTree(UErrorCode &status, int depth) {
210
924k
    if (U_FAILURE(status)) {
211
0
        return nullptr;
212
0
    }
213
    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
214
    // to avoid stack overflow crash.
215
924k
    if (depth > kRecursiveDepthLimit) {
216
22
        status = U_INPUT_TOO_LONG_ERROR;
217
22
        return nullptr;
218
22
    }
219
924k
    RBBINode    *n;
220
221
924k
    if (fType == RBBINode::varRef) {
222
        // If the current node is a variable reference, skip over it
223
        //   and clone the definition of the variable instead.
224
344
        n = fLeftChild->cloneTree(status, depth+1);
225
344
        if (U_FAILURE(status)) {
226
22
            return nullptr;
227
22
        }
228
924k
    } else if (fType == RBBINode::uset) {
229
17.0k
        n = this;
230
907k
    } else {
231
907k
        n = new RBBINode(*this, status);
232
907k
        if (U_FAILURE(status)) {
233
0
            delete n;
234
0
            return nullptr;
235
0
        }
236
        // Check for null pointer.
237
907k
        if (n == nullptr) {
238
0
            status =  U_MEMORY_ALLOCATION_ERROR;
239
0
            return nullptr;
240
0
        }
241
907k
        if (fLeftChild != nullptr) {
242
393k
            n->fLeftChild          = fLeftChild->cloneTree(status, depth+1);
243
393k
            if (U_FAILURE(status)) {
244
49.5k
                delete n;
245
49.5k
                return nullptr;
246
49.5k
            }
247
343k
            n->fLeftChild->fParent = n;
248
343k
        }
249
857k
        if (fRightChild != nullptr) {
250
326k
            n->fRightChild          = fRightChild->cloneTree(status, depth+1);
251
326k
            if (U_FAILURE(status)) {
252
75
                delete n;
253
75
                return nullptr;
254
75
            }
255
326k
            n->fRightChild->fParent = n;
256
326k
        }
257
857k
    }
258
875k
    return n;
259
924k
}
260
261
262
263
//-------------------------------------------------------------------------
264
//
265
//   flattenVariables   Walk a parse tree, replacing any variable
266
//                      references with a copy of the variable's definition.
267
//                      Aside from variables, the tree is not changed.
268
//
269
//                      Return the root of the tree.  If the root was not a variable
270
//                      reference, it remains unchanged - the root we started with
271
//                      is the root we return.  If, however, the root was a variable
272
//                      reference, the root of the newly cloned replacement tree will
273
//                      be returned, and the original tree deleted.
274
//
275
//                      This function works by recursively walking the tree
276
//                      without doing anything until a variable reference is
277
//                      found, then calling cloneTree() at that point.  Any
278
//                      nested references are handled by cloneTree(), not here.
279
//
280
//-------------------------------------------------------------------------
281
1.59M
RBBINode *RBBINode::flattenVariables(UErrorCode& status, int depth) {
282
1.59M
    if (U_FAILURE(status)) {
283
0
        return this;
284
0
    }
285
    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
286
    // to avoid stack overflow crash.
287
1.59M
    if (depth > kRecursiveDepthLimit) {
288
24
        status = U_INPUT_TOO_LONG_ERROR;
289
24
        return this;
290
24
    }
291
1.59M
    if (fType == varRef) {
292
344
        RBBINode *retNode  = fLeftChild->cloneTree(status, depth+1);
293
344
        if (U_FAILURE(status)) {
294
22
            return this;
295
22
        }
296
322
        retNode->fRuleRoot = this->fRuleRoot;
297
322
        retNode->fChainIn  = this->fChainIn;
298
322
        delete this;   // TODO: undefined behavior. Fix.
299
322
        return retNode;
300
344
    }
301
302
1.59M
    if (fLeftChild != nullptr) {
303
1.05M
        fLeftChild = fLeftChild->flattenVariables(status, depth+1);
304
1.05M
        if (fLeftChild == nullptr) {
305
0
            status = U_MEMORY_ALLOCATION_ERROR;
306
0
        }
307
1.05M
        if (U_FAILURE(status)) {
308
111k
            return this;
309
111k
        }
310
941k
        fLeftChild->fParent  = this;
311
941k
    }
312
1.48M
    if (fRightChild != nullptr) {
313
542k
        fRightChild = fRightChild->flattenVariables(status, depth+1);
314
542k
        if (fRightChild == nullptr) {
315
0
            status = U_MEMORY_ALLOCATION_ERROR;
316
0
        }
317
542k
        if (U_FAILURE(status)) {
318
96
            return this;
319
96
        }
320
542k
        fRightChild->fParent = this;
321
542k
    }
322
1.48M
    return this;
323
1.48M
}
324
325
326
//-------------------------------------------------------------------------
327
//
328
//  flattenSets    Walk the parse tree, replacing any nodes of type setRef
329
//                 with a copy of the expression tree for the set.  A set's
330
//                 equivalent expression tree is precomputed and saved as
331
//                 the left child of the uset node.
332
//
333
//-------------------------------------------------------------------------
334
246k
void RBBINode::flattenSets(UErrorCode &status, int depth) {
335
246k
    if (U_FAILURE(status)) {
336
0
        return;
337
0
    }
338
    // If the depth of the stack is too deep, we return U_INPUT_TOO_LONG_ERROR
339
    // to avoid stack overflow crash.
340
246k
    if (depth > kRecursiveDepthLimit) {
341
0
        status = U_INPUT_TOO_LONG_ERROR;
342
0
        return;
343
0
    }
344
246k
    U_ASSERT(fType != setRef);
345
346
246k
    if (fLeftChild != nullptr) {
347
225k
        if (fLeftChild->fType==setRef) {
348
22.2k
            RBBINode *setRefNode = fLeftChild;
349
22.2k
            RBBINode *usetNode   = setRefNode->fLeftChild;
350
22.2k
            RBBINode *replTree   = usetNode->fLeftChild;
351
22.2k
            fLeftChild           = replTree->cloneTree(status, depth+1);
352
22.2k
            if (U_FAILURE(status)) {
353
0
                delete setRefNode;
354
0
                return;
355
0
            }
356
22.2k
            fLeftChild->fParent  = this;
357
22.2k
            delete setRefNode;
358
203k
        } else {
359
203k
            fLeftChild->flattenSets(status, depth+1);
360
203k
        }
361
225k
    }
362
363
246k
    if (fRightChild != nullptr) {
364
222k
        if (fRightChild->fType==setRef) {
365
181k
            RBBINode *setRefNode = fRightChild;
366
181k
            RBBINode *usetNode   = setRefNode->fLeftChild;
367
181k
            RBBINode *replTree   = usetNode->fLeftChild;
368
181k
            fRightChild           = replTree->cloneTree(status, depth+1);
369
181k
            if (U_FAILURE(status)) {
370
0
                delete setRefNode;
371
0
                return;
372
0
            }
373
181k
            fRightChild->fParent  = this;
374
181k
            delete setRefNode;
375
181k
        } else {
376
40.9k
            fRightChild->flattenSets(status, depth+1);
377
40.9k
        }
378
222k
    }
379
246k
}
380
381
382
383
//-------------------------------------------------------------------------
384
//
385
//   findNodes()     Locate all the nodes of the specified type, starting
386
//                   at the specified root.
387
//
388
//-------------------------------------------------------------------------
389
3.47M
void   RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
390
    /* test for buffer overflows */
391
3.47M
    if (U_FAILURE(status)) {
392
0
        return;
393
0
    }
394
3.47M
    U_ASSERT(!dest->hasDeleter());
395
3.47M
    if (fType == kind) {
396
147k
        dest->addElement(this, status);
397
147k
    }
398
3.47M
    if (fLeftChild != nullptr) {
399
1.73M
        fLeftChild->findNodes(dest, kind, status);
400
1.73M
    }
401
3.47M
    if (fRightChild != nullptr) {
402
1.72M
        fRightChild->findNodes(dest, kind, status);
403
1.72M
    }
404
3.47M
}
405
406
407
//-------------------------------------------------------------------------
408
//
409
//    print.         Print out a single node, for debugging.
410
//
411
//-------------------------------------------------------------------------
412
#ifdef RBBI_DEBUG
413
414
static int32_t serial(const RBBINode *node) {
415
    return (node == nullptr? -1 : node->fSerialNum);
416
}
417
418
419
void RBBINode::printNode(const RBBINode *node) {
420
    static const char * const nodeTypeNames[] = {
421
                "setRef",
422
                "uset",
423
                "varRef",
424
                "leafChar",
425
                "lookAhead",
426
                "tag",
427
                "endMark",
428
                "opStart",
429
                "opCat",
430
                "opOr",
431
                "opStar",
432
                "opPlus",
433
                "opQuestion",
434
                "opBreak",
435
                "opReverse",
436
                "opLParen"
437
    };
438
439
    if (node==nullptr) {
440
        RBBIDebugPrintf("%10p", (void *)node);
441
    } else {
442
        RBBIDebugPrintf("%10p %5d %12s %c%c  %5d       %5d     %5d       %6d     %d ",
443
            (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
444
            node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
445
            serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
446
            node->fFirstPos, node->fVal);
447
        if (node->fType == varRef) {
448
            RBBI_DEBUG_printUnicodeString(node->fText);
449
        }
450
    }
451
    RBBIDebugPrintf("\n");
452
}
453
#endif
454
455
456
#ifdef RBBI_DEBUG
457
U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
458
    RBBIDebugPrintf("%*s", minWidth, CStr(s)());
459
}
460
#endif
461
462
463
//-------------------------------------------------------------------------
464
//
465
//    print.         Print out the tree of nodes rooted at "this"
466
//
467
//-------------------------------------------------------------------------
468
#ifdef RBBI_DEBUG
469
void RBBINode::printNodeHeader() {
470
    RBBIDebugPrintf(" Address   serial        type     LeftChild  RightChild   Parent   position value\n");
471
}
472
    
473
void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
474
    if (printHeading) {
475
        printNodeHeader();
476
    }
477
    printNode(node);
478
    if (node != nullptr) {
479
        // Only dump the definition under a variable reference if asked to.
480
        // Unconditionally dump children of all other node types.
481
        if (node->fType != varRef) {
482
            if (node->fLeftChild != nullptr) {
483
                printTree(node->fLeftChild, false);
484
            }
485
            
486
            if (node->fRightChild != nullptr) {
487
                printTree(node->fRightChild, false);
488
            }
489
        }
490
    }
491
}
492
#endif
493
494
495
496
U_NAMESPACE_END
497
498
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */