Coverage Report

Created: 2026-05-06 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/ucharstrie.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2010-2011, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  ucharstrie.h
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2010nov14
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
#include "unicode/appendable.h"
19
#include "unicode/ucharstrie.h"
20
#include "unicode/uobject.h"
21
#include "unicode/utf16.h"
22
#include "cmemory.h"
23
#include "uassert.h"
24
25
U_NAMESPACE_BEGIN
26
27
273M
UCharsTrie::~UCharsTrie() {
28
273M
    uprv_free(ownedArray_);
29
273M
}
30
31
UStringTrieResult
32
4.61k
UCharsTrie::current() const {
33
4.61k
    const char16_t *pos=pos_;
34
4.61k
    if(pos==nullptr) {
35
222
        return USTRINGTRIE_NO_MATCH;
36
4.39k
    } else {
37
4.39k
        int32_t node;
38
4.39k
        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
39
4.36k
                valueResult(node) : USTRINGTRIE_NO_VALUE;
40
4.39k
    }
41
4.61k
}
42
43
UStringTrieResult
44
1.87M
UCharsTrie::firstForCodePoint(UChar32 cp) {
45
1.87M
    return cp<=0xffff ?
46
1.74M
        first(cp) :
47
1.87M
        (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
48
119k
            next(U16_TRAIL(cp)) :
49
128k
            USTRINGTRIE_NO_MATCH);
50
1.87M
}
51
52
UStringTrieResult
53
22.6M
UCharsTrie::nextForCodePoint(UChar32 cp) {
54
22.6M
    return cp<=0xffff ?
55
22.4M
        next(cp) :
56
22.6M
        (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
57
97.0k
            next(U16_TRAIL(cp)) :
58
139k
            USTRINGTRIE_NO_MATCH);
59
22.6M
}
60
61
UStringTrieResult
62
589M
UCharsTrie::branchNext(const char16_t *pos, int32_t length, int32_t uchar) {
63
    // Branch according to the current unit.
64
589M
    if(length==0) {
65
328M
        length=*pos++;
66
328M
    }
67
589M
    ++length;
68
    // The length of the branch is the number of units to select from.
69
    // The data structure encodes a binary search.
70
4.56G
    while(length>kMaxBranchLinearSubNodeLength) {
71
3.97G
        if(uchar<*pos++) {
72
2.57G
            length>>=1;
73
2.57G
            pos=jumpByDelta(pos);
74
2.57G
        } else {
75
1.40G
            length=length-(length>>1);
76
1.40G
            pos=skipDelta(pos);
77
1.40G
        }
78
3.97G
    }
79
    // Drop down to linear search for the last few units.
80
    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
81
    // and divides length by 2.
82
1.32G
    do {
83
1.32G
        if(uchar==*pos++) {
84
354M
            UStringTrieResult result;
85
354M
            int32_t node=*pos;
86
354M
            if(node&kValueIsFinal) {
87
                // Leave the final value for getValue() to read.
88
37.8M
                result=USTRINGTRIE_FINAL_VALUE;
89
316M
            } else {
90
                // Use the non-final value as the jump delta.
91
316M
                ++pos;
92
                // int32_t delta=readValue(pos, node);
93
316M
                int32_t delta;
94
316M
                if(node<kMinTwoUnitValueLead) {
95
316M
                    delta=node;
96
316M
                } else if(node<kThreeUnitValueLead) {
97
17.9k
                    delta=((node-kMinTwoUnitValueLead)<<16)|*pos++;
98
17.9k
                } else {
99
0
                    delta=(pos[0]<<16)|pos[1];
100
0
                    pos+=2;
101
0
                }
102
                // end readValue()
103
316M
                pos+=delta;
104
316M
                node=*pos;
105
316M
                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
106
316M
            }
107
354M
            pos_=pos;
108
354M
            return result;
109
354M
        }
110
966M
        --length;
111
966M
        pos=skipValue(pos);
112
966M
    } while(length>1);
113
235M
    if(uchar==*pos++) {
114
83.9M
        pos_=pos;
115
83.9M
        int32_t node=*pos;
116
83.9M
        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
117
151M
    } else {
118
151M
        stop();
119
151M
        return USTRINGTRIE_NO_MATCH;
120
151M
    }
121
235M
}
122
123
UStringTrieResult
124
678M
UCharsTrie::nextImpl(const char16_t *pos, int32_t uchar) {
125
678M
    int32_t node=*pos++;
126
824M
    for(;;) {
127
824M
        if(node<kMinLinearMatch) {
128
589M
            return branchNext(pos, node, uchar);
129
589M
        } else if(node<kMinValueLead) {
130
            // Match the first of length+1 units.
131
88.9M
            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
132
88.9M
            if(uchar==*pos++) {
133
44.5M
                remainingMatchLength_=--length;
134
44.5M
                pos_=pos;
135
44.5M
                return (length<0 && (node=*pos)>=kMinValueLead) ?
136
24.7M
                        valueResult(node) : USTRINGTRIE_NO_VALUE;
137
44.5M
            } else {
138
                // No match.
139
44.4M
                break;
140
44.4M
            }
141
145M
        } else if(node&kValueIsFinal) {
142
            // No further matching units.
143
2
            break;
144
145M
        } else {
145
            // Skip intermediate value.
146
145M
            pos=skipNodeValue(pos, node);
147
145M
            node&=kNodeTypeMask;
148
145M
        }
149
824M
    }
150
44.4M
    stop();
151
44.4M
    return USTRINGTRIE_NO_MATCH;
152
678M
}
153
154
UStringTrieResult
155
467M
UCharsTrie::next(int32_t uchar) {
156
467M
    const char16_t *pos=pos_;
157
467M
    if(pos==nullptr) {
158
9.06M
        return USTRINGTRIE_NO_MATCH;
159
9.06M
    }
160
458M
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
161
458M
    if(length>=0) {
162
        // Remaining part of a linear-match node.
163
52.4M
        if(uchar==*pos++) {
164
52.1M
            remainingMatchLength_=--length;
165
52.1M
            pos_=pos;
166
52.1M
            int32_t node;
167
52.1M
            return (length<0 && (node=*pos)>=kMinValueLead) ?
168
29.0M
                    valueResult(node) : USTRINGTRIE_NO_VALUE;
169
52.1M
        } else {
170
306k
            stop();
171
306k
            return USTRINGTRIE_NO_MATCH;
172
306k
        }
173
52.4M
    }
174
406M
    return nextImpl(pos, uchar);
175
458M
}
176
177
UStringTrieResult
178
568
UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) {
179
568
    const char16_t *s=ptr;
180
568
    if(sLength<0 ? *s==0 : sLength==0) {
181
        // Empty input.
182
0
        return current();
183
0
    }
184
568
    const char16_t *pos=pos_;
185
568
    if(pos==nullptr) {
186
0
        return USTRINGTRIE_NO_MATCH;
187
0
    }
188
568
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
189
682
    for(;;) {
190
        // Fetch the next input unit, if there is one.
191
        // Continue a linear-match node without rechecking sLength<0.
192
682
        int32_t uchar;
193
682
        if(sLength<0) {
194
0
            for(;;) {
195
0
                if((uchar=*s++)==0) {
196
0
                    remainingMatchLength_=length;
197
0
                    pos_=pos;
198
0
                    int32_t node;
199
0
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
200
0
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
201
0
                }
202
0
                if(length<0) {
203
0
                    remainingMatchLength_=length;
204
0
                    break;
205
0
                }
206
0
                if(uchar!=*pos) {
207
0
                    stop();
208
0
                    return USTRINGTRIE_NO_MATCH;
209
0
                }
210
0
                ++pos;
211
0
                --length;
212
0
            }
213
682
        } else {
214
727
            for(;;) {
215
727
                if(sLength==0) {
216
12
                    remainingMatchLength_=length;
217
12
                    pos_=pos;
218
12
                    int32_t node;
219
12
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
220
10
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
221
12
                }
222
715
                uchar=*s++;
223
715
                --sLength;
224
715
                if(length<0) {
225
640
                    remainingMatchLength_=length;
226
640
                    break;
227
640
                }
228
75
                if(uchar!=*pos) {
229
30
                    stop();
230
30
                    return USTRINGTRIE_NO_MATCH;
231
30
                }
232
45
                ++pos;
233
45
                --length;
234
45
            }
235
682
        }
236
640
        int32_t node=*pos++;
237
931
        for(;;) {
238
931
            if(node<kMinLinearMatch) {
239
638
                UStringTrieResult result=branchNext(pos, node, uchar);
240
638
                if(result==USTRINGTRIE_NO_MATCH) {
241
325
                    return USTRINGTRIE_NO_MATCH;
242
325
                }
243
                // Fetch the next input unit, if there is one.
244
313
                if(sLength<0) {
245
0
                    if((uchar=*s++)==0) {
246
0
                        return result;
247
0
                    }
248
313
                } else {
249
313
                    if(sLength==0) {
250
9
                        return result;
251
9
                    }
252
304
                    uchar=*s++;
253
304
                    --sLength;
254
304
                }
255
304
                if(result==USTRINGTRIE_FINAL_VALUE) {
256
                    // No further matching units.
257
14
                    stop();
258
14
                    return USTRINGTRIE_NO_MATCH;
259
14
                }
260
290
                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
261
290
                node=*pos++;
262
293
            } else if(node<kMinValueLead) {
263
                // Match length+1 units.
264
282
                length=node-kMinLinearMatch;  // Actual match length minus 1.
265
282
                if(uchar!=*pos) {
266
168
                    stop();
267
168
                    return USTRINGTRIE_NO_MATCH;
268
168
                }
269
114
                ++pos;
270
114
                --length;
271
114
                break;
272
282
            } else if(node&kValueIsFinal) {
273
                // No further matching units.
274
10
                stop();
275
10
                return USTRINGTRIE_NO_MATCH;
276
10
            } else {
277
                // Skip intermediate value.
278
1
                pos=skipNodeValue(pos, node);
279
1
                node&=kNodeTypeMask;
280
1
            }
281
931
        }
282
640
    }
283
568
}
284
285
const char16_t *
286
UCharsTrie::findUniqueValueFromBranch(const char16_t *pos, int32_t length,
287
0
                                      UBool haveUniqueValue, int32_t &uniqueValue) {
288
0
    while(length>kMaxBranchLinearSubNodeLength) {
289
0
        ++pos;  // ignore the comparison unit
290
0
        if(nullptr==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
291
0
            return nullptr;
292
0
        }
293
0
        length=length-(length>>1);
294
0
        pos=skipDelta(pos);
295
0
    }
296
0
    do {
297
0
        ++pos;  // ignore a comparison unit
298
        // handle its value
299
0
        int32_t node=*pos++;
300
0
        UBool isFinal = static_cast<UBool>(node >> 15);
301
0
        node&=0x7fff;
302
0
        int32_t value=readValue(pos, node);
303
0
        pos=skipValue(pos, node);
304
0
        if(isFinal) {
305
0
            if(haveUniqueValue) {
306
0
                if(value!=uniqueValue) {
307
0
                    return nullptr;
308
0
                }
309
0
            } else {
310
0
                uniqueValue=value;
311
0
                haveUniqueValue=true;
312
0
            }
313
0
        } else {
314
0
            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
315
0
                return nullptr;
316
0
            }
317
0
            haveUniqueValue=true;
318
0
        }
319
0
    } while(--length>1);
320
0
    return pos+1;  // ignore the last comparison unit
321
0
}
322
323
UBool
324
0
UCharsTrie::findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
325
0
    int32_t node=*pos++;
326
0
    for(;;) {
327
0
        if(node<kMinLinearMatch) {
328
0
            if(node==0) {
329
0
                node=*pos++;
330
0
            }
331
0
            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
332
0
            if(pos==nullptr) {
333
0
                return false;
334
0
            }
335
0
            haveUniqueValue=true;
336
0
            node=*pos++;
337
0
        } else if(node<kMinValueLead) {
338
            // linear-match node
339
0
            pos+=node-kMinLinearMatch+1;  // Ignore the match units.
340
0
            node=*pos++;
341
0
        } else {
342
0
            UBool isFinal = static_cast<UBool>(node >> 15);
343
0
            int32_t value;
344
0
            if(isFinal) {
345
0
                value=readValue(pos, node&0x7fff);
346
0
            } else {
347
0
                value=readNodeValue(pos, node);
348
0
            }
349
0
            if(haveUniqueValue) {
350
0
                if(value!=uniqueValue) {
351
0
                    return false;
352
0
                }
353
0
            } else {
354
0
                uniqueValue=value;
355
0
                haveUniqueValue=true;
356
0
            }
357
0
            if(isFinal) {
358
0
                return true;
359
0
            }
360
0
            pos=skipNodeValue(pos, node);
361
0
            node&=kNodeTypeMask;
362
0
        }
363
0
    }
364
0
}
365
366
int32_t
367
0
UCharsTrie::getNextUChars(Appendable &out) const {
368
0
    const char16_t *pos=pos_;
369
0
    if(pos==nullptr) {
370
0
        return 0;
371
0
    }
372
0
    if(remainingMatchLength_>=0) {
373
0
        out.appendCodeUnit(*pos);  // Next unit of a pending linear-match node.
374
0
        return 1;
375
0
    }
376
0
    int32_t node=*pos++;
377
0
    if(node>=kMinValueLead) {
378
0
        if(node&kValueIsFinal) {
379
0
            return 0;
380
0
        } else {
381
0
            pos=skipNodeValue(pos, node);
382
0
            node&=kNodeTypeMask;
383
0
        }
384
0
    }
385
0
    if(node<kMinLinearMatch) {
386
0
        if(node==0) {
387
0
            node=*pos++;
388
0
        }
389
0
        out.reserveAppendCapacity(++node);
390
0
        getNextBranchUChars(pos, node, out);
391
0
        return node;
392
0
    } else {
393
        // First unit of the linear-match node.
394
0
        out.appendCodeUnit(*pos);
395
0
        return 1;
396
0
    }
397
0
}
398
399
void
400
0
UCharsTrie::getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out) {
401
0
    while(length>kMaxBranchLinearSubNodeLength) {
402
0
        ++pos;  // ignore the comparison unit
403
0
        getNextBranchUChars(jumpByDelta(pos), length>>1, out);
404
0
        length=length-(length>>1);
405
0
        pos=skipDelta(pos);
406
0
    }
407
0
    do {
408
0
        out.appendCodeUnit(*pos++);
409
0
        pos=skipValue(pos);
410
0
    } while(--length>1);
411
0
    out.appendCodeUnit(*pos);
412
0
}
413
414
U_NAMESPACE_END