Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/ucharstrie.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2010-2011, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  ucharstrie.h
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2010nov14
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
#include "unicode/appendable.h"
19
#include "unicode/ucharstrie.h"
20
#include "unicode/uobject.h"
21
#include "unicode/utf16.h"
22
#include "cmemory.h"
23
#include "uassert.h"
24
25
U_NAMESPACE_BEGIN
26
27
0
UCharsTrie::~UCharsTrie() {
28
0
    uprv_free(ownedArray_);
29
0
}
30
31
UStringTrieResult
32
0
UCharsTrie::current() const {
33
0
    const UChar *pos=pos_;
34
0
    if(pos==NULL) {
35
0
        return USTRINGTRIE_NO_MATCH;
36
0
    } else {
37
0
        int32_t node;
38
0
        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
39
0
                valueResult(node) : USTRINGTRIE_NO_VALUE;
40
0
    }
41
0
}
42
43
UStringTrieResult
44
0
UCharsTrie::firstForCodePoint(UChar32 cp) {
45
0
    return cp<=0xffff ?
46
0
        first(cp) :
47
0
        (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
48
0
            next(U16_TRAIL(cp)) :
49
0
            USTRINGTRIE_NO_MATCH);
50
0
}
51
52
UStringTrieResult
53
0
UCharsTrie::nextForCodePoint(UChar32 cp) {
54
0
    return cp<=0xffff ?
55
0
        next(cp) :
56
0
        (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
57
0
            next(U16_TRAIL(cp)) :
58
0
            USTRINGTRIE_NO_MATCH);
59
0
}
60
61
UStringTrieResult
62
0
UCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
63
    // Branch according to the current unit.
64
0
    if(length==0) {
65
0
        length=*pos++;
66
0
    }
67
0
    ++length;
68
    // The length of the branch is the number of units to select from.
69
    // The data structure encodes a binary search.
70
0
    while(length>kMaxBranchLinearSubNodeLength) {
71
0
        if(uchar<*pos++) {
72
0
            length>>=1;
73
0
            pos=jumpByDelta(pos);
74
0
        } else {
75
0
            length=length-(length>>1);
76
0
            pos=skipDelta(pos);
77
0
        }
78
0
    }
79
    // Drop down to linear search for the last few units.
80
    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
81
    // and divides length by 2.
82
0
    do {
83
0
        if(uchar==*pos++) {
84
0
            UStringTrieResult result;
85
0
            int32_t node=*pos;
86
0
            if(node&kValueIsFinal) {
87
                // Leave the final value for getValue() to read.
88
0
                result=USTRINGTRIE_FINAL_VALUE;
89
0
            } else {
90
                // Use the non-final value as the jump delta.
91
0
                ++pos;
92
                // int32_t delta=readValue(pos, node);
93
0
                int32_t delta;
94
0
                if(node<kMinTwoUnitValueLead) {
95
0
                    delta=node;
96
0
                } else if(node<kThreeUnitValueLead) {
97
0
                    delta=((node-kMinTwoUnitValueLead)<<16)|*pos++;
98
0
                } else {
99
0
                    delta=(pos[0]<<16)|pos[1];
100
0
                    pos+=2;
101
0
                }
102
                // end readValue()
103
0
                pos+=delta;
104
0
                node=*pos;
105
0
                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
106
0
            }
107
0
            pos_=pos;
108
0
            return result;
109
0
        }
110
0
        --length;
111
0
        pos=skipValue(pos);
112
0
    } while(length>1);
113
0
    if(uchar==*pos++) {
114
0
        pos_=pos;
115
0
        int32_t node=*pos;
116
0
        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
117
0
    } else {
118
0
        stop();
119
0
        return USTRINGTRIE_NO_MATCH;
120
0
    }
121
0
}
122
123
UStringTrieResult
124
0
UCharsTrie::nextImpl(const UChar *pos, int32_t uchar) {
125
0
    int32_t node=*pos++;
126
0
    for(;;) {
127
0
        if(node<kMinLinearMatch) {
128
0
            return branchNext(pos, node, uchar);
129
0
        } else if(node<kMinValueLead) {
130
            // Match the first of length+1 units.
131
0
            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
132
0
            if(uchar==*pos++) {
133
0
                remainingMatchLength_=--length;
134
0
                pos_=pos;
135
0
                return (length<0 && (node=*pos)>=kMinValueLead) ?
136
0
                        valueResult(node) : USTRINGTRIE_NO_VALUE;
137
0
            } else {
138
                // No match.
139
0
                break;
140
0
            }
141
0
        } else if(node&kValueIsFinal) {
142
            // No further matching units.
143
0
            break;
144
0
        } else {
145
            // Skip intermediate value.
146
0
            pos=skipNodeValue(pos, node);
147
0
            node&=kNodeTypeMask;
148
0
        }
149
0
    }
150
0
    stop();
151
0
    return USTRINGTRIE_NO_MATCH;
152
0
}
153
154
UStringTrieResult
155
0
UCharsTrie::next(int32_t uchar) {
156
0
    const UChar *pos=pos_;
157
0
    if(pos==NULL) {
158
0
        return USTRINGTRIE_NO_MATCH;
159
0
    }
160
0
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
161
0
    if(length>=0) {
162
        // Remaining part of a linear-match node.
163
0
        if(uchar==*pos++) {
164
0
            remainingMatchLength_=--length;
165
0
            pos_=pos;
166
0
            int32_t node;
167
0
            return (length<0 && (node=*pos)>=kMinValueLead) ?
168
0
                    valueResult(node) : USTRINGTRIE_NO_VALUE;
169
0
        } else {
170
0
            stop();
171
0
            return USTRINGTRIE_NO_MATCH;
172
0
        }
173
0
    }
174
0
    return nextImpl(pos, uchar);
175
0
}
176
177
UStringTrieResult
178
0
UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) {
179
0
    const UChar *s=ptr;
180
0
    if(sLength<0 ? *s==0 : sLength==0) {
181
        // Empty input.
182
0
        return current();
183
0
    }
184
0
    const UChar *pos=pos_;
185
0
    if(pos==NULL) {
186
0
        return USTRINGTRIE_NO_MATCH;
187
0
    }
188
0
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
189
0
    for(;;) {
190
        // Fetch the next input unit, if there is one.
191
        // Continue a linear-match node without rechecking sLength<0.
192
0
        int32_t uchar;
193
0
        if(sLength<0) {
194
0
            for(;;) {
195
0
                if((uchar=*s++)==0) {
196
0
                    remainingMatchLength_=length;
197
0
                    pos_=pos;
198
0
                    int32_t node;
199
0
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
200
0
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
201
0
                }
202
0
                if(length<0) {
203
0
                    remainingMatchLength_=length;
204
0
                    break;
205
0
                }
206
0
                if(uchar!=*pos) {
207
0
                    stop();
208
0
                    return USTRINGTRIE_NO_MATCH;
209
0
                }
210
0
                ++pos;
211
0
                --length;
212
0
            }
213
0
        } else {
214
0
            for(;;) {
215
0
                if(sLength==0) {
216
0
                    remainingMatchLength_=length;
217
0
                    pos_=pos;
218
0
                    int32_t node;
219
0
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
220
0
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
221
0
                }
222
0
                uchar=*s++;
223
0
                --sLength;
224
0
                if(length<0) {
225
0
                    remainingMatchLength_=length;
226
0
                    break;
227
0
                }
228
0
                if(uchar!=*pos) {
229
0
                    stop();
230
0
                    return USTRINGTRIE_NO_MATCH;
231
0
                }
232
0
                ++pos;
233
0
                --length;
234
0
            }
235
0
        }
236
0
        int32_t node=*pos++;
237
0
        for(;;) {
238
0
            if(node<kMinLinearMatch) {
239
0
                UStringTrieResult result=branchNext(pos, node, uchar);
240
0
                if(result==USTRINGTRIE_NO_MATCH) {
241
0
                    return USTRINGTRIE_NO_MATCH;
242
0
                }
243
                // Fetch the next input unit, if there is one.
244
0
                if(sLength<0) {
245
0
                    if((uchar=*s++)==0) {
246
0
                        return result;
247
0
                    }
248
0
                } else {
249
0
                    if(sLength==0) {
250
0
                        return result;
251
0
                    }
252
0
                    uchar=*s++;
253
0
                    --sLength;
254
0
                }
255
0
                if(result==USTRINGTRIE_FINAL_VALUE) {
256
                    // No further matching units.
257
0
                    stop();
258
0
                    return USTRINGTRIE_NO_MATCH;
259
0
                }
260
0
                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
261
0
                node=*pos++;
262
0
            } else if(node<kMinValueLead) {
263
                // Match length+1 units.
264
0
                length=node-kMinLinearMatch;  // Actual match length minus 1.
265
0
                if(uchar!=*pos) {
266
0
                    stop();
267
0
                    return USTRINGTRIE_NO_MATCH;
268
0
                }
269
0
                ++pos;
270
0
                --length;
271
0
                break;
272
0
            } else if(node&kValueIsFinal) {
273
                // No further matching units.
274
0
                stop();
275
0
                return USTRINGTRIE_NO_MATCH;
276
0
            } else {
277
                // Skip intermediate value.
278
0
                pos=skipNodeValue(pos, node);
279
0
                node&=kNodeTypeMask;
280
0
            }
281
0
        }
282
0
    }
283
0
}
284
285
const UChar *
286
UCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
287
0
                                      UBool haveUniqueValue, int32_t &uniqueValue) {
288
0
    while(length>kMaxBranchLinearSubNodeLength) {
289
0
        ++pos;  // ignore the comparison unit
290
0
        if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
291
0
            return NULL;
292
0
        }
293
0
        length=length-(length>>1);
294
0
        pos=skipDelta(pos);
295
0
    }
296
0
    do {
297
0
        ++pos;  // ignore a comparison unit
298
        // handle its value
299
0
        int32_t node=*pos++;
300
0
        UBool isFinal=(UBool)(node>>15);
301
0
        node&=0x7fff;
302
0
        int32_t value=readValue(pos, node);
303
0
        pos=skipValue(pos, node);
304
0
        if(isFinal) {
305
0
            if(haveUniqueValue) {
306
0
                if(value!=uniqueValue) {
307
0
                    return NULL;
308
0
                }
309
0
            } else {
310
0
                uniqueValue=value;
311
0
                haveUniqueValue=TRUE;
312
0
            }
313
0
        } else {
314
0
            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
315
0
                return NULL;
316
0
            }
317
0
            haveUniqueValue=TRUE;
318
0
        }
319
0
    } while(--length>1);
320
0
    return pos+1;  // ignore the last comparison unit
321
0
}
322
323
UBool
324
0
UCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
325
0
    int32_t node=*pos++;
326
0
    for(;;) {
327
0
        if(node<kMinLinearMatch) {
328
0
            if(node==0) {
329
0
                node=*pos++;
330
0
            }
331
0
            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
332
0
            if(pos==NULL) {
333
0
                return FALSE;
334
0
            }
335
0
            haveUniqueValue=TRUE;
336
0
            node=*pos++;
337
0
        } else if(node<kMinValueLead) {
338
            // linear-match node
339
0
            pos+=node-kMinLinearMatch+1;  // Ignore the match units.
340
0
            node=*pos++;
341
0
        } else {
342
0
            UBool isFinal=(UBool)(node>>15);
343
0
            int32_t value;
344
0
            if(isFinal) {
345
0
                value=readValue(pos, node&0x7fff);
346
0
            } else {
347
0
                value=readNodeValue(pos, node);
348
0
            }
349
0
            if(haveUniqueValue) {
350
0
                if(value!=uniqueValue) {
351
0
                    return FALSE;
352
0
                }
353
0
            } else {
354
0
                uniqueValue=value;
355
0
                haveUniqueValue=TRUE;
356
0
            }
357
0
            if(isFinal) {
358
0
                return TRUE;
359
0
            }
360
0
            pos=skipNodeValue(pos, node);
361
0
            node&=kNodeTypeMask;
362
0
        }
363
0
    }
364
0
}
365
366
int32_t
367
0
UCharsTrie::getNextUChars(Appendable &out) const {
368
0
    const UChar *pos=pos_;
369
0
    if(pos==NULL) {
370
0
        return 0;
371
0
    }
372
0
    if(remainingMatchLength_>=0) {
373
0
        out.appendCodeUnit(*pos);  // Next unit of a pending linear-match node.
374
0
        return 1;
375
0
    }
376
0
    int32_t node=*pos++;
377
0
    if(node>=kMinValueLead) {
378
0
        if(node&kValueIsFinal) {
379
0
            return 0;
380
0
        } else {
381
0
            pos=skipNodeValue(pos, node);
382
0
            node&=kNodeTypeMask;
383
0
        }
384
0
    }
385
0
    if(node<kMinLinearMatch) {
386
0
        if(node==0) {
387
0
            node=*pos++;
388
0
        }
389
0
        out.reserveAppendCapacity(++node);
390
0
        getNextBranchUChars(pos, node, out);
391
0
        return node;
392
0
    } else {
393
        // First unit of the linear-match node.
394
0
        out.appendCodeUnit(*pos);
395
0
        return 1;
396
0
    }
397
0
}
398
399
void
400
0
UCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
401
0
    while(length>kMaxBranchLinearSubNodeLength) {
402
0
        ++pos;  // ignore the comparison unit
403
0
        getNextBranchUChars(jumpByDelta(pos), length>>1, out);
404
0
        length=length-(length>>1);
405
0
        pos=skipDelta(pos);
406
0
    }
407
0
    do {
408
0
        out.appendCodeUnit(*pos++);
409
0
        pos=skipValue(pos);
410
0
    } while(--length>1);
411
0
    out.appendCodeUnit(*pos);
412
0
}
413
414
U_NAMESPACE_END