Coverage Report

Created: 2025-11-07 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/i18n/collationrootelements.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2013-2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* collationrootelements.cpp
9
*
10
* created on: 2013mar05
11
* created by: Markus W. Scherer
12
*/
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_COLLATION
17
18
#include "collation.h"
19
#include "collationrootelements.h"
20
#include "uassert.h"
21
22
U_NAMESPACE_BEGIN
23
24
int64_t
25
0
CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const {
26
0
    if(p == 0) { return 0; }
27
0
    U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]);
28
0
    int32_t index = findP(p);
29
0
    uint32_t q = elements[index];
30
0
    uint32_t secTer;
31
0
    if(p == (q & 0xffffff00)) {
32
        // p == elements[index] is a root primary. Find the CE before it.
33
        // We must not be in a primary range.
34
0
        U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
35
0
        secTer = elements[index - 1];
36
0
        if((secTer & SEC_TER_DELTA_FLAG) == 0) {
37
            // Primary CE just before p.
38
0
            p = secTer & 0xffffff00;
39
0
            secTer = Collation::COMMON_SEC_AND_TER_CE;
40
0
        } else {
41
            // secTer = last secondary & tertiary for the previous primary
42
0
            index -= 2;
43
0
            for(;;) {
44
0
                p = elements[index];
45
0
                if((p & SEC_TER_DELTA_FLAG) == 0) {
46
0
                    p &= 0xffffff00;
47
0
                    break;
48
0
                }
49
0
                --index;
50
0
            }
51
0
        }
52
0
    } else {
53
        // p > elements[index] which is the previous primary.
54
        // Find the last secondary & tertiary weights for it.
55
0
        p = q & 0xffffff00;
56
0
        secTer = Collation::COMMON_SEC_AND_TER_CE;
57
0
        for(;;) {
58
0
            q = elements[++index];
59
0
            if((q & SEC_TER_DELTA_FLAG) == 0) {
60
                // We must not be in a primary range.
61
0
                U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
62
0
                break;
63
0
            }
64
0
            secTer = q;
65
0
        }
66
0
    }
67
0
    return (static_cast<int64_t>(p) << 32) | (secTer & ~SEC_TER_DELTA_FLAG);
68
0
}
69
70
int64_t
71
6
CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const {
72
6
    if(p == 0) { return 0; }
73
6
    int32_t index = findP(p);
74
6
    if(p != (elements[index] & 0xffffff00)) {
75
6
        for(;;) {
76
6
            p = elements[++index];
77
6
            if((p & SEC_TER_DELTA_FLAG) == 0) {
78
                // First primary after p. We must not be in a primary range.
79
6
                U_ASSERT((p & PRIMARY_STEP_MASK) == 0);
80
6
                break;
81
6
            }
82
6
        }
83
6
    }
84
    // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0.
85
6
    return (static_cast<int64_t>(p) << 32) | Collation::COMMON_SEC_AND_TER_CE;
86
6
}
87
88
uint32_t
89
4.92k
CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const {
90
4.92k
    int32_t index = findPrimary(p);
91
4.92k
    int32_t step;
92
4.92k
    uint32_t q = elements[index];
93
4.92k
    if(p == (q & 0xffffff00)) {
94
        // Found p itself. Return the previous primary.
95
        // See if p is at the end of a previous range.
96
2.54k
        step = static_cast<int32_t>(q) & PRIMARY_STEP_MASK;
97
2.54k
        if(step == 0) {
98
            // p is not at the end of a range. Look for the previous primary.
99
4.19k
            do {
100
4.19k
                p = elements[--index];
101
4.19k
            } while((p & SEC_TER_DELTA_FLAG) != 0);
102
2.09k
            return p & 0xffffff00;
103
2.09k
        }
104
2.54k
    } else {
105
        // p is in a range, and not at the start.
106
2.38k
        uint32_t nextElement = elements[index + 1];
107
2.38k
        U_ASSERT(isEndOfPrimaryRange(nextElement));
108
2.38k
        step = static_cast<int32_t>(nextElement) & PRIMARY_STEP_MASK;
109
2.38k
    }
110
    // Return the previous range primary.
111
2.83k
    if((p & 0xffff) == 0) {
112
2.47k
        return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step);
113
2.47k
    } else {
114
359
        return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step);
115
359
    }
116
2.83k
}
117
118
uint32_t
119
800
CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const {
120
800
    int32_t index;
121
800
    uint32_t previousSec, sec;
122
800
    if(p == 0) {
123
0
        index = static_cast<int32_t>(elements[IX_FIRST_SECONDARY_INDEX]);
124
        // Gap at the beginning of the secondary CE range.
125
0
        previousSec = 0;
126
0
        sec = elements[index] >> 16;
127
800
    } else {
128
800
        index = findPrimary(p) + 1;
129
800
        previousSec = Collation::BEFORE_WEIGHT16;
130
800
        sec = getFirstSecTerForPrimary(index) >> 16;
131
800
    }
132
800
    U_ASSERT(s >= sec);
133
800
    while(s > sec) {
134
0
        previousSec = sec;
135
0
        U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
136
0
        sec = elements[index++] >> 16;
137
0
    }
138
800
    U_ASSERT(sec == s);
139
800
    return previousSec;
140
800
}
141
142
uint32_t
143
330
CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const {
144
330
    U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0);
145
330
    int32_t index;
146
330
    uint32_t previousTer, secTer;
147
330
    if(p == 0) {
148
12
        if(s == 0) {
149
0
            index = static_cast<int32_t>(elements[IX_FIRST_TERTIARY_INDEX]);
150
            // Gap at the beginning of the tertiary CE range.
151
0
            previousTer = 0;
152
12
        } else {
153
12
            index = static_cast<int32_t>(elements[IX_FIRST_SECONDARY_INDEX]);
154
12
            previousTer = Collation::BEFORE_WEIGHT16;
155
12
        }
156
12
        secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
157
318
    } else {
158
318
        index = findPrimary(p) + 1;
159
318
        previousTer = Collation::BEFORE_WEIGHT16;
160
318
        secTer = getFirstSecTerForPrimary(index);
161
318
    }
162
330
    uint32_t st = (s << 16) | t;
163
930
    while(st > secTer) {
164
600
        if((secTer >> 16) == s) { previousTer = secTer; }
165
600
        U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0);
166
600
        secTer = elements[index++] & ~SEC_TER_DELTA_FLAG;
167
600
    }
168
330
    U_ASSERT(secTer == st);
169
330
    return previousTer & 0xffff;
170
330
}
171
172
uint32_t
173
7.38k
CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const {
174
7.38k
    U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1]));
175
7.38k
    uint32_t q = elements[++index];
176
7.38k
    int32_t step;
177
7.38k
    if ((q & SEC_TER_DELTA_FLAG) == 0 && (step = static_cast<int32_t>(q) & PRIMARY_STEP_MASK) != 0) {
178
        // Return the next primary in this range.
179
3.80k
        if((p & 0xffff) == 0) {
180
2.22k
            return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step);
181
2.22k
        } else {
182
1.58k
            return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step);
183
1.58k
        }
184
3.80k
    } else {
185
        // Return the next primary in the list.
186
20.3k
        while((q & SEC_TER_DELTA_FLAG) != 0) {
187
16.8k
            q = elements[++index];
188
16.8k
        }
189
3.57k
        U_ASSERT((q & PRIMARY_STEP_MASK) == 0);
190
3.57k
        return q;
191
3.57k
    }
192
7.38k
}
193
194
uint32_t
195
3.97k
CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const {
196
3.97k
    uint32_t secTer;
197
3.97k
    uint32_t secLimit;
198
3.97k
    if(index == 0) {
199
        // primary = 0
200
947
        U_ASSERT(s != 0);
201
947
        index = static_cast<int32_t>(elements[IX_FIRST_SECONDARY_INDEX]);
202
947
        secTer = elements[index];
203
        // Gap at the end of the secondary CE range.
204
947
        secLimit = 0x10000;
205
3.02k
    } else {
206
3.02k
        U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
207
3.02k
        secTer = getFirstSecTerForPrimary(index + 1);
208
        // If this is an explicit sec/ter unit, then it will be read once more.
209
        // Gap for secondaries of primary CEs.
210
3.02k
        secLimit = getSecondaryBoundary();
211
3.02k
    }
212
133k
    for(;;) {
213
133k
        uint32_t sec = secTer >> 16;
214
133k
        if(sec > s) { return sec; }
215
131k
        secTer = elements[++index];
216
131k
        if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; }
217
131k
    }
218
3.97k
}
219
220
uint32_t
221
2.02k
CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const {
222
2.02k
    uint32_t secTer;
223
2.02k
    uint32_t terLimit;
224
2.02k
    if(index == 0) {
225
        // primary = 0
226
335
        if(s == 0) {
227
68
            U_ASSERT(t != 0);
228
68
            index = static_cast<int32_t>(elements[IX_FIRST_TERTIARY_INDEX]);
229
            // Gap at the end of the tertiary CE range.
230
68
            terLimit = 0x4000;
231
267
        } else {
232
267
            index = static_cast<int32_t>(elements[IX_FIRST_SECONDARY_INDEX]);
233
            // Gap for tertiaries of primary/secondary CEs.
234
267
            terLimit = getTertiaryBoundary();
235
267
        }
236
335
        secTer = elements[index] & ~SEC_TER_DELTA_FLAG;
237
1.69k
    } else {
238
1.69k
        U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]);
239
1.69k
        secTer = getFirstSecTerForPrimary(index + 1);
240
        // If this is an explicit sec/ter unit, then it will be read once more.
241
1.69k
        terLimit = getTertiaryBoundary();
242
1.69k
    }
243
2.02k
    uint32_t st = (s << 16) | t;
244
18.4k
    for(;;) {
245
18.4k
        if(secTer > st) {
246
961
            U_ASSERT((secTer >> 16) == s);
247
961
            return secTer & 0xffff;
248
961
        }
249
17.5k
        secTer = elements[++index];
250
        // No tertiary greater than t for this primary+secondary.
251
17.5k
        if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; }
252
16.4k
        secTer &= ~SEC_TER_DELTA_FLAG;
253
16.4k
    }
254
2.02k
}
255
256
uint32_t
257
5.83k
CollationRootElements::getFirstSecTerForPrimary(int32_t index) const {
258
5.83k
    uint32_t secTer = elements[index];
259
5.83k
    if((secTer & SEC_TER_DELTA_FLAG) == 0) {
260
        // No sec/ter delta.
261
2.80k
        return Collation::COMMON_SEC_AND_TER_CE;
262
2.80k
    }
263
3.03k
    secTer &= ~SEC_TER_DELTA_FLAG;
264
3.03k
    if(secTer > Collation::COMMON_SEC_AND_TER_CE) {
265
        // Implied sec/ter.
266
2.70k
        return Collation::COMMON_SEC_AND_TER_CE;
267
2.70k
    }
268
    // Explicit sec/ter below common/common.
269
326
    return secTer;
270
3.03k
}
271
272
int32_t
273
19.2k
CollationRootElements::findPrimary(uint32_t p) const {
274
    // Requirement: p must occur as a root primary.
275
19.2k
    U_ASSERT((p & 0xff) == 0);  // at most a 3-byte primary
276
19.2k
    int32_t index = findP(p);
277
    // If p is in a range, then we just assume that p is an actual primary in this range.
278
    // (Too cumbersome/expensive to check.)
279
    // Otherwise, it must be an exact match.
280
19.2k
    U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00));
281
19.2k
    return index;
282
19.2k
}
283
284
int32_t
285
19.2k
CollationRootElements::findP(uint32_t p) const {
286
    // p need not occur as a root primary.
287
    // For example, it might be a reordering group boundary.
288
19.2k
    U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE);
289
    // modified binary search
290
19.2k
    int32_t start = static_cast<int32_t>(elements[IX_FIRST_PRIMARY_INDEX]);
291
19.2k
    U_ASSERT(p >= elements[start]);
292
19.2k
    int32_t limit = length - 1;
293
19.2k
    U_ASSERT(elements[limit] >= PRIMARY_SENTINEL);
294
19.2k
    U_ASSERT(p < elements[limit]);
295
243k
    while((start + 1) < limit) {
296
        // Invariant: elements[start] and elements[limit] are primaries,
297
        // and elements[start]<=p<=elements[limit].
298
233k
        int32_t i = (start + limit) / 2;
299
233k
        uint32_t q = elements[i];
300
233k
        if((q & SEC_TER_DELTA_FLAG) != 0) {
301
            // Find the next primary.
302
94.0k
            int32_t j = i + 1;
303
326k
            for(;;) {
304
326k
                if(j == limit) { break; }
305
313k
                q = elements[j];
306
313k
                if((q & SEC_TER_DELTA_FLAG) == 0) {
307
80.4k
                    i = j;
308
80.4k
                    break;
309
80.4k
                }
310
232k
                ++j;
311
232k
            }
312
94.0k
            if((q & SEC_TER_DELTA_FLAG) != 0) {
313
                // Find the preceding primary.
314
13.6k
                j = i - 1;
315
47.2k
                for(;;) {
316
47.2k
                    if(j == start) { break; }
317
38.3k
                    q = elements[j];
318
38.3k
                    if((q & SEC_TER_DELTA_FLAG) == 0) {
319
4.71k
                        i = j;
320
4.71k
                        break;
321
4.71k
                    }
322
33.6k
                    --j;
323
33.6k
                }
324
13.6k
                if((q & SEC_TER_DELTA_FLAG) != 0) {
325
                    // No primary between start and limit.
326
8.91k
                    break;
327
8.91k
                }
328
13.6k
            }
329
94.0k
        }
330
224k
        if(p < (q & 0xffffff00)) {  // Reset the "step" bits of a range end primary.
331
109k
            limit = i;
332
115k
        } else {
333
115k
            start = i;
334
115k
        }
335
224k
    }
336
19.2k
    return start;
337
19.2k
}
338
339
U_NAMESPACE_END
340
341
#endif  // !UCONFIG_NO_COLLATION