Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/rbbinode.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
***************************************************************************
5
*   Copyright (C) 2002-2016 International Business Machines Corporation   *
6
*   and others. All rights reserved.                                      *
7
***************************************************************************
8
*/
9
10
//
11
//  File:  rbbinode.cpp
12
//
13
//         Implementation of class RBBINode, which represents a node in the
14
//         tree generated when parsing the Rules Based Break Iterator rules.
15
//
16
//         This "Class" is actually closer to a struct.
17
//         Code using it is expected to directly access fields much of the time.
18
//
19
20
#include "unicode/utypes.h"
21
22
#if !UCONFIG_NO_BREAK_ITERATION
23
24
#include "unicode/unistr.h"
25
#include "unicode/uniset.h"
26
#include "unicode/uchar.h"
27
#include "unicode/parsepos.h"
28
29
#include "cstr.h"
30
#include "uvector.h"
31
32
#include "rbbirb.h"
33
#include "rbbinode.h"
34
35
#include "uassert.h"
36
37
38
U_NAMESPACE_BEGIN
39
40
#ifdef RBBI_DEBUG
41
static int  gLastSerial = 0;
42
#endif
43
44
45
//-------------------------------------------------------------------------
46
//
47
//    Constructor.   Just set the fields to reasonable default values.
48
//
49
//-------------------------------------------------------------------------
50
0
RBBINode::RBBINode(NodeType t) : UMemory() {
51
#ifdef RBBI_DEBUG
52
    fSerialNum    = ++gLastSerial;
53
#endif
54
0
    fType         = t;
55
0
    fParent       = NULL;
56
0
    fLeftChild    = NULL;
57
0
    fRightChild   = NULL;
58
0
    fInputSet     = NULL;
59
0
    fFirstPos     = 0;
60
0
    fLastPos      = 0;
61
0
    fNullable     = FALSE;
62
0
    fLookAheadEnd = FALSE;
63
0
    fRuleRoot     = FALSE;
64
0
    fChainIn      = FALSE;
65
0
    fVal          = 0;
66
0
    fPrecedence   = precZero;
67
68
0
    UErrorCode     status = U_ZERO_ERROR;
69
0
    fFirstPosSet  = new UVector(status);  // TODO - get a real status from somewhere
70
0
    fLastPosSet   = new UVector(status);
71
0
    fFollowPos    = new UVector(status);
72
0
    if      (t==opCat)    {fPrecedence = precOpCat;}
73
0
    else if (t==opOr)     {fPrecedence = precOpOr;}
74
0
    else if (t==opStart)  {fPrecedence = precStart;}
75
0
    else if (t==opLParen) {fPrecedence = precLParen;}
76
77
0
}
78
79
80
0
RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
81
#ifdef RBBI_DEBUG
82
    fSerialNum   = ++gLastSerial;
83
#endif
84
0
    fType        = other.fType;
85
0
    fParent      = NULL;
86
0
    fLeftChild   = NULL;
87
0
    fRightChild  = NULL;
88
0
    fInputSet    = other.fInputSet;
89
0
    fPrecedence  = other.fPrecedence;
90
0
    fText        = other.fText;
91
0
    fFirstPos    = other.fFirstPos;
92
0
    fLastPos     = other.fLastPos;
93
0
    fNullable    = other.fNullable;
94
0
    fVal         = other.fVal;
95
0
    fRuleRoot    = FALSE;
96
0
    fChainIn     = other.fChainIn;
97
0
    UErrorCode     status = U_ZERO_ERROR;
98
0
    fFirstPosSet = new UVector(status);   // TODO - get a real status from somewhere
99
0
    fLastPosSet  = new UVector(status);
100
0
    fFollowPos   = new UVector(status);
101
0
}
102
103
104
//-------------------------------------------------------------------------
105
//
106
//    Destructor.   Deletes both this node AND any child nodes,
107
//                  except in the case of variable reference nodes.  For
108
//                  these, the l. child points back to the definition, which
109
//                  is common for all references to the variable, meaning
110
//                  it can't be deleted here.
111
//
112
//-------------------------------------------------------------------------
113
0
RBBINode::~RBBINode() {
114
    // printf("deleting node %8x   serial %4d\n", this, this->fSerialNum);
115
0
    delete fInputSet;
116
0
    fInputSet = NULL;
117
118
0
    switch (this->fType) {
119
0
    case varRef:
120
0
    case setRef:
121
        // for these node types, multiple instances point to the same "children"
122
        // Storage ownership of children handled elsewhere.  Don't delete here.
123
0
        break;
124
125
0
    default:
126
0
        delete        fLeftChild;
127
0
        fLeftChild =   NULL;
128
0
        delete        fRightChild;
129
0
        fRightChild = NULL;
130
0
    }
131
132
133
0
    delete fFirstPosSet;
134
0
    delete fLastPosSet;
135
0
    delete fFollowPos;
136
137
0
}
138
139
140
//-------------------------------------------------------------------------
141
//
142
//    cloneTree     Make a copy of the subtree rooted at this node.
143
//                  Discard any variable references encountered along the way,
144
//                  and replace with copies of the variable's definitions.
145
//                  Used to replicate the expression underneath variable
146
//                  references in preparation for generating the DFA tables.
147
//
148
//-------------------------------------------------------------------------
149
0
RBBINode *RBBINode::cloneTree() {
150
0
    RBBINode    *n;
151
152
0
    if (fType == RBBINode::varRef) {
153
        // If the current node is a variable reference, skip over it
154
        //   and clone the definition of the variable instead.
155
0
        n = fLeftChild->cloneTree();
156
0
    } else if (fType == RBBINode::uset) {
157
0
        n = this;
158
0
    } else {
159
0
        n = new RBBINode(*this);
160
        // Check for null pointer.
161
0
        if (n != NULL) {
162
0
            if (fLeftChild != NULL) {
163
0
                n->fLeftChild          = fLeftChild->cloneTree();
164
0
                n->fLeftChild->fParent = n;
165
0
            }
166
0
            if (fRightChild != NULL) {
167
0
                n->fRightChild          = fRightChild->cloneTree();
168
0
                n->fRightChild->fParent = n;
169
0
            }
170
0
        }
171
0
    }
172
0
    return n;
173
0
}
174
175
176
177
//-------------------------------------------------------------------------
178
//
179
//   flattenVariables   Walk a parse tree, replacing any variable
180
//                      references with a copy of the variable's definition.
181
//                      Aside from variables, the tree is not changed.
182
//
183
//                      Return the root of the tree.  If the root was not a variable
184
//                      reference, it remains unchanged - the root we started with
185
//                      is the root we return.  If, however, the root was a variable
186
//                      reference, the root of the newly cloned replacement tree will
187
//                      be returned, and the original tree deleted.
188
//
189
//                      This function works by recursively walking the tree
190
//                      without doing anything until a variable reference is
191
//                      found, then calling cloneTree() at that point.  Any
192
//                      nested references are handled by cloneTree(), not here.
193
//
194
//-------------------------------------------------------------------------
195
0
RBBINode *RBBINode::flattenVariables() {
196
0
    if (fType == varRef) {
197
0
        RBBINode *retNode  = fLeftChild->cloneTree();
198
0
        if (retNode != NULL) {
199
0
            retNode->fRuleRoot = this->fRuleRoot;
200
0
            retNode->fChainIn  = this->fChainIn;
201
0
        }
202
0
        delete this;   // TODO: undefined behavior. Fix.
203
0
        return retNode;
204
0
    }
205
206
0
    if (fLeftChild != NULL) {
207
0
        fLeftChild = fLeftChild->flattenVariables();
208
0
        fLeftChild->fParent  = this;
209
0
    }
210
0
    if (fRightChild != NULL) {
211
0
        fRightChild = fRightChild->flattenVariables();
212
0
        fRightChild->fParent = this;
213
0
    }
214
0
    return this;
215
0
}
216
217
218
//-------------------------------------------------------------------------
219
//
220
//  flattenSets    Walk the parse tree, replacing any nodes of type setRef
221
//                 with a copy of the expression tree for the set.  A set's
222
//                 equivalent expression tree is precomputed and saved as
223
//                 the left child of the uset node.
224
//
225
//-------------------------------------------------------------------------
226
0
void RBBINode::flattenSets() {
227
0
    U_ASSERT(fType != setRef);
228
229
0
    if (fLeftChild != NULL) {
230
0
        if (fLeftChild->fType==setRef) {
231
0
            RBBINode *setRefNode = fLeftChild;
232
0
            RBBINode *usetNode   = setRefNode->fLeftChild;
233
0
            RBBINode *replTree   = usetNode->fLeftChild;
234
0
            fLeftChild           = replTree->cloneTree();
235
0
            fLeftChild->fParent  = this;
236
0
            delete setRefNode;
237
0
        } else {
238
0
            fLeftChild->flattenSets();
239
0
        }
240
0
    }
241
242
0
    if (fRightChild != NULL) {
243
0
        if (fRightChild->fType==setRef) {
244
0
            RBBINode *setRefNode = fRightChild;
245
0
            RBBINode *usetNode   = setRefNode->fLeftChild;
246
0
            RBBINode *replTree   = usetNode->fLeftChild;
247
0
            fRightChild           = replTree->cloneTree();
248
0
            fRightChild->fParent  = this;
249
0
            delete setRefNode;
250
0
        } else {
251
0
            fRightChild->flattenSets();
252
0
        }
253
0
    }
254
0
}
255
256
257
258
//-------------------------------------------------------------------------
259
//
260
//   findNodes()     Locate all the nodes of the specified type, starting
261
//                   at the specified root.
262
//
263
//-------------------------------------------------------------------------
264
0
void   RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
265
    /* test for buffer overflows */
266
0
    if (U_FAILURE(status)) {
267
0
        return;
268
0
    }
269
0
    if (fType == kind) {
270
0
        dest->addElementX(this, status);
271
0
    }
272
0
    if (fLeftChild != NULL) {
273
0
        fLeftChild->findNodes(dest, kind, status);
274
0
    }
275
0
    if (fRightChild != NULL) {
276
0
        fRightChild->findNodes(dest, kind, status);
277
0
    }
278
0
}
279
280
281
//-------------------------------------------------------------------------
282
//
283
//    print.         Print out a single node, for debugging.
284
//
285
//-------------------------------------------------------------------------
286
#ifdef RBBI_DEBUG
287
288
static int32_t serial(const RBBINode *node) {
289
    return (node == NULL? -1 : node->fSerialNum);
290
}
291
292
293
void RBBINode::printNode(const RBBINode *node) {
294
    static const char * const nodeTypeNames[] = {
295
                "setRef",
296
                "uset",
297
                "varRef",
298
                "leafChar",
299
                "lookAhead",
300
                "tag",
301
                "endMark",
302
                "opStart",
303
                "opCat",
304
                "opOr",
305
                "opStar",
306
                "opPlus",
307
                "opQuestion",
308
                "opBreak",
309
                "opReverse",
310
                "opLParen"
311
    };
312
313
    if (node==NULL) {
314
        RBBIDebugPrintf("%10p", (void *)node);
315
    } else {
316
        RBBIDebugPrintf("%10p %5d %12s %c%c  %5d       %5d     %5d       %6d     %d ",
317
            (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
318
            node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
319
            serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
320
            node->fFirstPos, node->fVal);
321
        if (node->fType == varRef) {
322
            RBBI_DEBUG_printUnicodeString(node->fText);
323
        }
324
    }
325
    RBBIDebugPrintf("\n");
326
}
327
#endif
328
329
330
#ifdef RBBI_DEBUG
331
U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
332
    RBBIDebugPrintf("%*s", minWidth, CStr(s)());
333
}
334
#endif
335
336
337
//-------------------------------------------------------------------------
338
//
339
//    print.         Print out the tree of nodes rooted at "this"
340
//
341
//-------------------------------------------------------------------------
342
#ifdef RBBI_DEBUG
343
void RBBINode::printNodeHeader() {
344
    RBBIDebugPrintf(" Address   serial        type     LeftChild  RightChild   Parent   position value\n");
345
}
346
    
347
void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
348
    if (printHeading) {
349
        printNodeHeader();
350
    }
351
    printNode(node);
352
    if (node != NULL) {
353
        // Only dump the definition under a variable reference if asked to.
354
        // Unconditionally dump children of all other node types.
355
        if (node->fType != varRef) {
356
            if (node->fLeftChild != NULL) {
357
                printTree(node->fLeftChild, FALSE);
358
            }
359
            
360
            if (node->fRightChild != NULL) {
361
                printTree(node->fRightChild, FALSE);
362
            }
363
        }
364
    }
365
}
366
#endif
367
368
369
370
U_NAMESPACE_END
371
372
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */