Coverage Report

Created: 2025-12-07 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/ucharstrie.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*   Copyright (C) 2010-2011, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
*   file name:  ucharstrie.h
9
*   encoding:   UTF-8
10
*   tab size:   8 (not used)
11
*   indentation:4
12
*
13
*   created on: 2010nov14
14
*   created by: Markus W. Scherer
15
*/
16
17
#include "unicode/utypes.h"
18
#include "unicode/appendable.h"
19
#include "unicode/ucharstrie.h"
20
#include "unicode/uobject.h"
21
#include "unicode/utf16.h"
22
#include "cmemory.h"
23
#include "uassert.h"
24
25
U_NAMESPACE_BEGIN
26
27
281M
UCharsTrie::~UCharsTrie() {
28
281M
    uprv_free(ownedArray_);
29
281M
}
30
31
UStringTrieResult
32
4.36k
UCharsTrie::current() const {
33
4.36k
    const char16_t *pos=pos_;
34
4.36k
    if(pos==nullptr) {
35
214
        return USTRINGTRIE_NO_MATCH;
36
4.15k
    } else {
37
4.15k
        int32_t node;
38
4.15k
        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
39
4.12k
                valueResult(node) : USTRINGTRIE_NO_VALUE;
40
4.15k
    }
41
4.36k
}
42
43
UStringTrieResult
44
2.38M
UCharsTrie::firstForCodePoint(UChar32 cp) {
45
2.38M
    return cp<=0xffff ?
46
2.25M
        first(cp) :
47
2.38M
        (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
48
124k
            next(U16_TRAIL(cp)) :
49
133k
            USTRINGTRIE_NO_MATCH);
50
2.38M
}
51
52
UStringTrieResult
53
30.0M
UCharsTrie::nextForCodePoint(UChar32 cp) {
54
30.0M
    return cp<=0xffff ?
55
29.8M
        next(cp) :
56
30.0M
        (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
57
147k
            next(U16_TRAIL(cp)) :
58
199k
            USTRINGTRIE_NO_MATCH);
59
30.0M
}
60
61
UStringTrieResult
62
570M
UCharsTrie::branchNext(const char16_t *pos, int32_t length, int32_t uchar) {
63
    // Branch according to the current unit.
64
570M
    if(length==0) {
65
329M
        length=*pos++;
66
329M
    }
67
570M
    ++length;
68
    // The length of the branch is the number of units to select from.
69
    // The data structure encodes a binary search.
70
4.56G
    while(length>kMaxBranchLinearSubNodeLength) {
71
3.99G
        if(uchar<*pos++) {
72
2.50G
            length>>=1;
73
2.50G
            pos=jumpByDelta(pos);
74
2.50G
        } else {
75
1.48G
            length=length-(length>>1);
76
1.48G
            pos=skipDelta(pos);
77
1.48G
        }
78
3.99G
    }
79
    // Drop down to linear search for the last few units.
80
    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
81
    // and divides length by 2.
82
1.29G
    do {
83
1.29G
        if(uchar==*pos++) {
84
320M
            UStringTrieResult result;
85
320M
            int32_t node=*pos;
86
320M
            if(node&kValueIsFinal) {
87
                // Leave the final value for getValue() to read.
88
34.0M
                result=USTRINGTRIE_FINAL_VALUE;
89
286M
            } else {
90
                // Use the non-final value as the jump delta.
91
286M
                ++pos;
92
                // int32_t delta=readValue(pos, node);
93
286M
                int32_t delta;
94
286M
                if(node<kMinTwoUnitValueLead) {
95
286M
                    delta=node;
96
286M
                } else if(node<kThreeUnitValueLead) {
97
15.8k
                    delta=((node-kMinTwoUnitValueLead)<<16)|*pos++;
98
15.8k
                } else {
99
0
                    delta=(pos[0]<<16)|pos[1];
100
0
                    pos+=2;
101
0
                }
102
                // end readValue()
103
286M
                pos+=delta;
104
286M
                node=*pos;
105
286M
                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
106
286M
            }
107
320M
            pos_=pos;
108
320M
            return result;
109
320M
        }
110
978M
        --length;
111
978M
        pos=skipValue(pos);
112
978M
    } while(length>1);
113
249M
    if(uchar==*pos++) {
114
91.5M
        pos_=pos;
115
91.5M
        int32_t node=*pos;
116
91.5M
        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
117
158M
    } else {
118
158M
        stop();
119
158M
        return USTRINGTRIE_NO_MATCH;
120
158M
    }
121
249M
}
122
123
UStringTrieResult
124
648M
UCharsTrie::nextImpl(const char16_t *pos, int32_t uchar) {
125
648M
    int32_t node=*pos++;
126
780M
    for(;;) {
127
780M
        if(node<kMinLinearMatch) {
128
570M
            return branchNext(pos, node, uchar);
129
570M
        } else if(node<kMinValueLead) {
130
            // Match the first of length+1 units.
131
77.8M
            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
132
77.8M
            if(uchar==*pos++) {
133
39.8M
                remainingMatchLength_=--length;
134
39.8M
                pos_=pos;
135
39.8M
                return (length<0 && (node=*pos)>=kMinValueLead) ?
136
22.8M
                        valueResult(node) : USTRINGTRIE_NO_VALUE;
137
39.8M
            } else {
138
                // No match.
139
37.9M
                break;
140
37.9M
            }
141
132M
        } else if(node&kValueIsFinal) {
142
            // No further matching units.
143
2
            break;
144
132M
        } else {
145
            // Skip intermediate value.
146
132M
            pos=skipNodeValue(pos, node);
147
132M
            node&=kNodeTypeMask;
148
132M
        }
149
780M
    }
150
37.9M
    stop();
151
37.9M
    return USTRINGTRIE_NO_MATCH;
152
648M
}
153
154
UStringTrieResult
155
429M
UCharsTrie::next(int32_t uchar) {
156
429M
    const char16_t *pos=pos_;
157
429M
    if(pos==nullptr) {
158
11.6M
        return USTRINGTRIE_NO_MATCH;
159
11.6M
    }
160
418M
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
161
418M
    if(length>=0) {
162
        // Remaining part of a linear-match node.
163
50.3M
        if(uchar==*pos++) {
164
49.9M
            remainingMatchLength_=--length;
165
49.9M
            pos_=pos;
166
49.9M
            int32_t node;
167
49.9M
            return (length<0 && (node=*pos)>=kMinValueLead) ?
168
29.5M
                    valueResult(node) : USTRINGTRIE_NO_VALUE;
169
49.9M
        } else {
170
411k
            stop();
171
411k
            return USTRINGTRIE_NO_MATCH;
172
411k
        }
173
50.3M
    }
174
368M
    return nextImpl(pos, uchar);
175
418M
}
176
177
UStringTrieResult
178
459
UCharsTrie::next(ConstChar16Ptr ptr, int32_t sLength) {
179
459
    const char16_t *s=ptr;
180
459
    if(sLength<0 ? *s==0 : sLength==0) {
181
        // Empty input.
182
0
        return current();
183
0
    }
184
459
    const char16_t *pos=pos_;
185
459
    if(pos==nullptr) {
186
0
        return USTRINGTRIE_NO_MATCH;
187
0
    }
188
459
    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
189
544
    for(;;) {
190
        // Fetch the next input unit, if there is one.
191
        // Continue a linear-match node without rechecking sLength<0.
192
544
        int32_t uchar;
193
544
        if(sLength<0) {
194
0
            for(;;) {
195
0
                if((uchar=*s++)==0) {
196
0
                    remainingMatchLength_=length;
197
0
                    pos_=pos;
198
0
                    int32_t node;
199
0
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
200
0
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
201
0
                }
202
0
                if(length<0) {
203
0
                    remainingMatchLength_=length;
204
0
                    break;
205
0
                }
206
0
                if(uchar!=*pos) {
207
0
                    stop();
208
0
                    return USTRINGTRIE_NO_MATCH;
209
0
                }
210
0
                ++pos;
211
0
                --length;
212
0
            }
213
544
        } else {
214
565
            for(;;) {
215
565
                if(sLength==0) {
216
11
                    remainingMatchLength_=length;
217
11
                    pos_=pos;
218
11
                    int32_t node;
219
11
                    return (length<0 && (node=*pos)>=kMinValueLead) ?
220
9
                            valueResult(node) : USTRINGTRIE_NO_VALUE;
221
11
                }
222
554
                uchar=*s++;
223
554
                --sLength;
224
554
                if(length<0) {
225
508
                    remainingMatchLength_=length;
226
508
                    break;
227
508
                }
228
46
                if(uchar!=*pos) {
229
25
                    stop();
230
25
                    return USTRINGTRIE_NO_MATCH;
231
25
                }
232
21
                ++pos;
233
21
                --length;
234
21
            }
235
544
        }
236
508
        int32_t node=*pos++;
237
731
        for(;;) {
238
731
            if(node<kMinLinearMatch) {
239
500
                UStringTrieResult result=branchNext(pos, node, uchar);
240
500
                if(result==USTRINGTRIE_NO_MATCH) {
241
262
                    return USTRINGTRIE_NO_MATCH;
242
262
                }
243
                // Fetch the next input unit, if there is one.
244
238
                if(sLength<0) {
245
0
                    if((uchar=*s++)==0) {
246
0
                        return result;
247
0
                    }
248
238
                } else {
249
238
                    if(sLength==0) {
250
9
                        return result;
251
9
                    }
252
229
                    uchar=*s++;
253
229
                    --sLength;
254
229
                }
255
229
                if(result==USTRINGTRIE_FINAL_VALUE) {
256
                    // No further matching units.
257
7
                    stop();
258
7
                    return USTRINGTRIE_NO_MATCH;
259
7
                }
260
222
                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
261
222
                node=*pos++;
262
231
            } else if(node<kMinValueLead) {
263
                // Match length+1 units.
264
226
                length=node-kMinLinearMatch;  // Actual match length minus 1.
265
226
                if(uchar!=*pos) {
266
141
                    stop();
267
141
                    return USTRINGTRIE_NO_MATCH;
268
141
                }
269
85
                ++pos;
270
85
                --length;
271
85
                break;
272
226
            } else if(node&kValueIsFinal) {
273
                // No further matching units.
274
4
                stop();
275
4
                return USTRINGTRIE_NO_MATCH;
276
4
            } else {
277
                // Skip intermediate value.
278
1
                pos=skipNodeValue(pos, node);
279
1
                node&=kNodeTypeMask;
280
1
            }
281
731
        }
282
508
    }
283
459
}
284
285
const char16_t *
286
UCharsTrie::findUniqueValueFromBranch(const char16_t *pos, int32_t length,
287
0
                                      UBool haveUniqueValue, int32_t &uniqueValue) {
288
0
    while(length>kMaxBranchLinearSubNodeLength) {
289
0
        ++pos;  // ignore the comparison unit
290
0
        if(nullptr==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
291
0
            return nullptr;
292
0
        }
293
0
        length=length-(length>>1);
294
0
        pos=skipDelta(pos);
295
0
    }
296
0
    do {
297
0
        ++pos;  // ignore a comparison unit
298
        // handle its value
299
0
        int32_t node=*pos++;
300
0
        UBool isFinal = static_cast<UBool>(node >> 15);
301
0
        node&=0x7fff;
302
0
        int32_t value=readValue(pos, node);
303
0
        pos=skipValue(pos, node);
304
0
        if(isFinal) {
305
0
            if(haveUniqueValue) {
306
0
                if(value!=uniqueValue) {
307
0
                    return nullptr;
308
0
                }
309
0
            } else {
310
0
                uniqueValue=value;
311
0
                haveUniqueValue=true;
312
0
            }
313
0
        } else {
314
0
            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
315
0
                return nullptr;
316
0
            }
317
0
            haveUniqueValue=true;
318
0
        }
319
0
    } while(--length>1);
320
0
    return pos+1;  // ignore the last comparison unit
321
0
}
322
323
UBool
324
0
UCharsTrie::findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
325
0
    int32_t node=*pos++;
326
0
    for(;;) {
327
0
        if(node<kMinLinearMatch) {
328
0
            if(node==0) {
329
0
                node=*pos++;
330
0
            }
331
0
            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
332
0
            if(pos==nullptr) {
333
0
                return false;
334
0
            }
335
0
            haveUniqueValue=true;
336
0
            node=*pos++;
337
0
        } else if(node<kMinValueLead) {
338
            // linear-match node
339
0
            pos+=node-kMinLinearMatch+1;  // Ignore the match units.
340
0
            node=*pos++;
341
0
        } else {
342
0
            UBool isFinal = static_cast<UBool>(node >> 15);
343
0
            int32_t value;
344
0
            if(isFinal) {
345
0
                value=readValue(pos, node&0x7fff);
346
0
            } else {
347
0
                value=readNodeValue(pos, node);
348
0
            }
349
0
            if(haveUniqueValue) {
350
0
                if(value!=uniqueValue) {
351
0
                    return false;
352
0
                }
353
0
            } else {
354
0
                uniqueValue=value;
355
0
                haveUniqueValue=true;
356
0
            }
357
0
            if(isFinal) {
358
0
                return true;
359
0
            }
360
0
            pos=skipNodeValue(pos, node);
361
0
            node&=kNodeTypeMask;
362
0
        }
363
0
    }
364
0
}
365
366
int32_t
367
0
UCharsTrie::getNextUChars(Appendable &out) const {
368
0
    const char16_t *pos=pos_;
369
0
    if(pos==nullptr) {
370
0
        return 0;
371
0
    }
372
0
    if(remainingMatchLength_>=0) {
373
0
        out.appendCodeUnit(*pos);  // Next unit of a pending linear-match node.
374
0
        return 1;
375
0
    }
376
0
    int32_t node=*pos++;
377
0
    if(node>=kMinValueLead) {
378
0
        if(node&kValueIsFinal) {
379
0
            return 0;
380
0
        } else {
381
0
            pos=skipNodeValue(pos, node);
382
0
            node&=kNodeTypeMask;
383
0
        }
384
0
    }
385
0
    if(node<kMinLinearMatch) {
386
0
        if(node==0) {
387
0
            node=*pos++;
388
0
        }
389
0
        out.reserveAppendCapacity(++node);
390
0
        getNextBranchUChars(pos, node, out);
391
0
        return node;
392
0
    } else {
393
        // First unit of the linear-match node.
394
0
        out.appendCodeUnit(*pos);
395
0
        return 1;
396
0
    }
397
0
}
398
399
void
400
0
UCharsTrie::getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out) {
401
0
    while(length>kMaxBranchLinearSubNodeLength) {
402
0
        ++pos;  // ignore the comparison unit
403
0
        getNextBranchUChars(jumpByDelta(pos), length>>1, out);
404
0
        length=length-(length>>1);
405
0
        pos=skipDelta(pos);
406
0
    }
407
0
    do {
408
0
        out.appendCodeUnit(*pos++);
409
0
        pos=skipValue(pos);
410
0
    } while(--length>1);
411
0
    out.appendCodeUnit(*pos);
412
0
}
413
414
U_NAMESPACE_END