Coverage Report

Created: 2026-02-05 06:34

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/i18n/collationcompare.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 1996-2015, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* collationcompare.cpp
9
*
10
* created on: 2012feb14 with new and old collation code
11
* created by: Markus W. Scherer
12
*/
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_COLLATION
17
18
#include "unicode/ucol.h"
19
#include "cmemory.h"
20
#include "collation.h"
21
#include "collationcompare.h"
22
#include "collationiterator.h"
23
#include "collationsettings.h"
24
#include "uassert.h"
25
26
U_NAMESPACE_BEGIN
27
28
UCollationResult
29
CollationCompare::compareUpToQuaternary(CollationIterator &left, CollationIterator &right,
30
                                        const CollationSettings &settings,
31
6.92k
                                        UErrorCode &errorCode) {
32
6.92k
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
33
34
6.92k
    int32_t options = settings.options;
35
6.92k
    uint32_t variableTop;
36
6.92k
    if((options & CollationSettings::ALTERNATE_MASK) == 0) {
37
6.58k
        variableTop = 0;
38
6.58k
    } else {
39
        // +1 so that we can use "<" and primary ignorables test out early.
40
334
        variableTop = settings.variableTop + 1;
41
334
    }
42
6.92k
    UBool anyVariable = false;
43
44
    // Fetch CEs, compare primaries, store secondary & tertiary weights.
45
408k
    for(;;) {
46
        // We fetch CEs until we get a non-ignorable primary or reach the end.
47
408k
        uint32_t leftPrimary;
48
447k
        do {
49
447k
            int64_t ce = left.nextCE(errorCode);
50
447k
            leftPrimary = static_cast<uint32_t>(ce >> 32);
51
447k
            if(leftPrimary < variableTop && leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY) {
52
                // Variable CE, shift it to quaternary level.
53
                // Ignore all following primary ignorables, and shift further variable CEs.
54
745
                anyVariable = true;
55
1.90k
                do {
56
                    // Store only the primary of the variable CE.
57
1.90k
                    left.setCurrentCE(ce & INT64_C(0xffffffff00000000));
58
2.80k
                    for(;;) {
59
2.80k
                        ce = left.nextCE(errorCode);
60
2.80k
                        leftPrimary = static_cast<uint32_t>(ce >> 32);
61
2.80k
                        if(leftPrimary == 0) {
62
903
                            left.setCurrentCE(0);
63
1.90k
                        } else {
64
1.90k
                            break;
65
1.90k
                        }
66
2.80k
                    }
67
1.90k
                } while(leftPrimary < variableTop &&
68
1.27k
                        leftPrimary > Collation::MERGE_SEPARATOR_PRIMARY);
69
745
            }
70
447k
        } while(leftPrimary == 0);
71
72
408k
        uint32_t rightPrimary;
73
448k
        do {
74
448k
            int64_t ce = right.nextCE(errorCode);
75
448k
            rightPrimary = static_cast<uint32_t>(ce >> 32);
76
448k
            if(rightPrimary < variableTop && rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY) {
77
                // Variable CE, shift it to quaternary level.
78
                // Ignore all following primary ignorables, and shift further variable CEs.
79
741
                anyVariable = true;
80
2.03k
                do {
81
                    // Store only the primary of the variable CE.
82
2.03k
                    right.setCurrentCE(ce & INT64_C(0xffffffff00000000));
83
3.01k
                    for(;;) {
84
3.01k
                        ce = right.nextCE(errorCode);
85
3.01k
                        rightPrimary = static_cast<uint32_t>(ce >> 32);
86
3.01k
                        if(rightPrimary == 0) {
87
980
                            right.setCurrentCE(0);
88
2.03k
                        } else {
89
2.03k
                            break;
90
2.03k
                        }
91
3.01k
                    }
92
2.03k
                } while(rightPrimary < variableTop &&
93
1.40k
                        rightPrimary > Collation::MERGE_SEPARATOR_PRIMARY);
94
741
            }
95
448k
        } while(rightPrimary == 0);
96
97
408k
        if(leftPrimary != rightPrimary) {
98
            // Return the primary difference, with script reordering.
99
5.77k
            if(settings.hasReordering()) {
100
2.57k
                leftPrimary = settings.reorder(leftPrimary);
101
2.57k
                rightPrimary = settings.reorder(rightPrimary);
102
2.57k
            }
103
5.77k
            return (leftPrimary < rightPrimary) ? UCOL_LESS : UCOL_GREATER;
104
5.77k
        }
105
402k
        if(leftPrimary == Collation::NO_CE_PRIMARY) { break; }
106
402k
    }
107
1.14k
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
108
109
    // Compare the buffered secondary & tertiary weights.
110
    // We might skip the secondary level but continue with the case level
111
    // which is turned on separately.
112
1.14k
    if(CollationSettings::getStrength(options) >= UCOL_SECONDARY) {
113
1.10k
        if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
114
1.02k
            int32_t leftIndex = 0;
115
1.02k
            int32_t rightIndex = 0;
116
35.1k
            for(;;) {
117
35.1k
                uint32_t leftSecondary;
118
53.5k
                do {
119
53.5k
                    leftSecondary = static_cast<uint32_t>(left.getCE(leftIndex++)) >> 16;
120
53.5k
                } while(leftSecondary == 0);
121
122
35.1k
                uint32_t rightSecondary;
123
53.5k
                do {
124
53.5k
                    rightSecondary = static_cast<uint32_t>(right.getCE(rightIndex++)) >> 16;
125
53.5k
                } while(rightSecondary == 0);
126
127
35.1k
                if(leftSecondary != rightSecondary) {
128
93
                    return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
129
93
                }
130
35.0k
                if(leftSecondary == Collation::NO_CE_WEIGHT16) { break; }
131
35.0k
            }
132
1.02k
        } else {
133
            // The backwards secondary level compares secondary weights backwards
134
            // within segments separated by the merge separator (U+FFFE, weight 02).
135
82
            int32_t leftStart = 0;
136
82
            int32_t rightStart = 0;
137
441
            for(;;) {
138
                // Find the merge separator or the NO_CE terminator.
139
441
                uint32_t p;
140
441
                int32_t leftLimit = leftStart;
141
9.40k
                while ((p = static_cast<uint32_t>(left.getCE(leftLimit) >> 32)) >
142
9.40k
                            Collation::MERGE_SEPARATOR_PRIMARY ||
143
8.96k
                        p == 0) {
144
8.96k
                    ++leftLimit;
145
8.96k
                }
146
441
                int32_t rightLimit = rightStart;
147
9.40k
                while ((p = static_cast<uint32_t>(right.getCE(rightLimit) >> 32)) >
148
9.40k
                            Collation::MERGE_SEPARATOR_PRIMARY ||
149
8.96k
                        p == 0) {
150
8.96k
                    ++rightLimit;
151
8.96k
                }
152
153
                // Compare the segments.
154
441
                int32_t leftIndex = leftLimit;
155
441
                int32_t rightIndex = rightLimit;
156
1.19k
                for(;;) {
157
1.19k
                    int32_t leftSecondary = 0;
158
2.24k
                    while(leftSecondary == 0 && leftIndex > leftStart) {
159
1.05k
                        leftSecondary = static_cast<uint32_t>(left.getCE(--leftIndex)) >> 16;
160
1.05k
                    }
161
162
1.19k
                    int32_t rightSecondary = 0;
163
2.22k
                    while(rightSecondary == 0 && rightIndex > rightStart) {
164
1.03k
                        rightSecondary = static_cast<uint32_t>(right.getCE(--rightIndex)) >> 16;
165
1.03k
                    }
166
167
1.19k
                    if(leftSecondary != rightSecondary) {
168
43
                        return (leftSecondary < rightSecondary) ? UCOL_LESS : UCOL_GREATER;
169
43
                    }
170
1.15k
                    if(leftSecondary == 0) { break; }
171
1.15k
                }
172
173
                // Did we reach the end of either string?
174
                // Both strings have the same number of merge separators,
175
                // or else there would have been a primary-level difference.
176
398
                U_ASSERT(left.getCE(leftLimit) == right.getCE(rightLimit));
177
398
                if(p == Collation::NO_CE_PRIMARY) { break; }
178
                // Skip both merge separators and continue.
179
359
                leftStart = leftLimit + 1;
180
359
                rightStart = rightLimit + 1;
181
359
            }
182
82
        }
183
1.10k
    }
184
185
1.00k
    if((options & CollationSettings::CASE_LEVEL) != 0) {
186
0
        int32_t strength = CollationSettings::getStrength(options);
187
0
        int32_t leftIndex = 0;
188
0
        int32_t rightIndex = 0;
189
0
        for(;;) {
190
0
            uint32_t leftCase, leftLower32, rightCase;
191
0
            if(strength == UCOL_PRIMARY) {
192
                // Primary+caseLevel: Ignore case level weights of primary ignorables.
193
                // Otherwise we would get a-umlaut > a
194
                // which is not desirable for accent-insensitive sorting.
195
                // Check for (lower 32 bits) == 0 as well because variable CEs are stored
196
                // with only primary weights.
197
0
                int64_t ce;
198
0
                do {
199
0
                    ce = left.getCE(leftIndex++);
200
0
                    leftCase = static_cast<uint32_t>(ce);
201
0
                } while (static_cast<uint32_t>(ce >> 32) == 0 || leftCase == 0);
202
0
                leftLower32 = leftCase;
203
0
                leftCase &= 0xc000;
204
205
0
                do {
206
0
                    ce = right.getCE(rightIndex++);
207
0
                    rightCase = static_cast<uint32_t>(ce);
208
0
                } while (static_cast<uint32_t>(ce >> 32) == 0 || rightCase == 0);
209
0
                rightCase &= 0xc000;
210
0
            } else {
211
                // Secondary+caseLevel: By analogy with the above,
212
                // ignore case level weights of secondary ignorables.
213
                //
214
                // Note: A tertiary CE has uppercase case bits (0.0.ut)
215
                // to keep tertiary+caseFirst well-formed.
216
                //
217
                // Tertiary+caseLevel: Also ignore case level weights of secondary ignorables.
218
                // Otherwise a tertiary CE's uppercase would be no greater than
219
                // a primary/secondary CE's uppercase.
220
                // (See UCA well-formedness condition 2.)
221
                // We could construct a special case weight higher than uppercase,
222
                // but it's simpler to always ignore case weights of secondary ignorables,
223
                // turning 0.0.ut into 0.0.0.t.
224
                // (See LDML Collation, Case Parameters.)
225
0
                do {
226
0
                    leftCase = static_cast<uint32_t>(left.getCE(leftIndex++));
227
0
                } while(leftCase <= 0xffff);
228
0
                leftLower32 = leftCase;
229
0
                leftCase &= 0xc000;
230
231
0
                do {
232
0
                    rightCase = static_cast<uint32_t>(right.getCE(rightIndex++));
233
0
                } while(rightCase <= 0xffff);
234
0
                rightCase &= 0xc000;
235
0
            }
236
237
            // No need to handle NO_CE and MERGE_SEPARATOR specially:
238
            // There is one case weight for each previous-level weight,
239
            // so level length differences were handled there.
240
0
            if(leftCase != rightCase) {
241
0
                if((options & CollationSettings::UPPER_FIRST) == 0) {
242
0
                    return (leftCase < rightCase) ? UCOL_LESS : UCOL_GREATER;
243
0
                } else {
244
0
                    return (leftCase < rightCase) ? UCOL_GREATER : UCOL_LESS;
245
0
                }
246
0
            }
247
0
            if((leftLower32 >> 16) == Collation::NO_CE_WEIGHT16) { break; }
248
0
        }
249
0
    }
250
1.00k
    if(CollationSettings::getStrength(options) <= UCOL_SECONDARY) { return UCOL_EQUAL; }
251
252
947
    uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
253
254
947
    int32_t leftIndex = 0;
255
947
    int32_t rightIndex = 0;
256
947
    uint32_t anyQuaternaries = 0;
257
31.3k
    for(;;) {
258
31.3k
        uint32_t leftLower32, leftTertiary;
259
49.7k
        do {
260
49.7k
            leftLower32 = static_cast<uint32_t>(left.getCE(leftIndex++));
261
49.7k
            anyQuaternaries |= leftLower32;
262
49.7k
            U_ASSERT((leftLower32 & Collation::ONLY_TERTIARY_MASK) != 0 ||
263
49.7k
                     (leftLower32 & 0xc0c0) == 0);
264
49.7k
            leftTertiary = leftLower32 & tertiaryMask;
265
49.7k
        } while(leftTertiary == 0);
266
267
31.3k
        uint32_t rightLower32, rightTertiary;
268
49.8k
        do {
269
49.8k
            rightLower32 = static_cast<uint32_t>(right.getCE(rightIndex++));
270
49.8k
            anyQuaternaries |= rightLower32;
271
49.8k
            U_ASSERT((rightLower32 & Collation::ONLY_TERTIARY_MASK) != 0 ||
272
49.8k
                     (rightLower32 & 0xc0c0) == 0);
273
49.8k
            rightTertiary = rightLower32 & tertiaryMask;
274
49.8k
        } while(rightTertiary == 0);
275
276
31.3k
        if(leftTertiary != rightTertiary) {
277
41
            if(CollationSettings::sortsTertiaryUpperCaseFirst(options)) {
278
                // Pass through NO_CE and keep real tertiary weights larger than that.
279
                // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
280
                // to keep tertiary CEs well-formed.
281
                // Their case+tertiary weights must be greater than those of
282
                // primary and secondary CEs.
283
7
                if(leftTertiary > Collation::NO_CE_WEIGHT16) {
284
7
                    if(leftLower32 > 0xffff) {
285
7
                        leftTertiary ^= 0xc000;
286
7
                    } else {
287
0
                        leftTertiary += 0x4000;
288
0
                    }
289
7
                }
290
7
                if(rightTertiary > Collation::NO_CE_WEIGHT16) {
291
7
                    if(rightLower32 > 0xffff) {
292
7
                        rightTertiary ^= 0xc000;
293
7
                    } else {
294
0
                        rightTertiary += 0x4000;
295
0
                    }
296
7
                }
297
7
            }
298
41
            return (leftTertiary < rightTertiary) ? UCOL_LESS : UCOL_GREATER;
299
41
        }
300
31.2k
        if(leftTertiary == Collation::NO_CE_WEIGHT16) { break; }
301
31.2k
    }
302
906
    if(CollationSettings::getStrength(options) <= UCOL_TERTIARY) { return UCOL_EQUAL; }
303
304
879
    if(!anyVariable && (anyQuaternaries & 0xc0) == 0) {
305
        // If there are no "variable" CEs and no non-zero quaternary weights,
306
        // then there are no quaternary differences.
307
767
        return UCOL_EQUAL;
308
767
    }
309
310
112
    leftIndex = 0;
311
112
    rightIndex = 0;
312
1.75k
    for(;;) {
313
1.75k
        uint32_t leftQuaternary;
314
2.73k
        do {
315
2.73k
            int64_t ce = left.getCE(leftIndex++);
316
2.73k
            leftQuaternary = static_cast<uint32_t>(ce) & 0xffff;
317
2.73k
            if(leftQuaternary <= Collation::NO_CE_WEIGHT16) {
318
                // Variable primary or completely ignorable or NO_CE.
319
1.45k
                leftQuaternary = static_cast<uint32_t>(ce >> 32);
320
1.45k
            } else {
321
                // Regular CE, not tertiary ignorable.
322
                // Preserve the quaternary weight in bits 7..6.
323
1.27k
                leftQuaternary |= 0xffffff3f;
324
1.27k
            }
325
2.73k
        } while(leftQuaternary == 0);
326
327
1.75k
        uint32_t rightQuaternary;
328
2.71k
        do {
329
2.71k
            int64_t ce = right.getCE(rightIndex++);
330
2.71k
            rightQuaternary = static_cast<uint32_t>(ce) & 0xffff;
331
2.71k
            if(rightQuaternary <= Collation::NO_CE_WEIGHT16) {
332
                // Variable primary or completely ignorable or NO_CE.
333
1.44k
                rightQuaternary = static_cast<uint32_t>(ce >> 32);
334
1.44k
            } else {
335
                // Regular CE, not tertiary ignorable.
336
                // Preserve the quaternary weight in bits 7..6.
337
1.27k
                rightQuaternary |= 0xffffff3f;
338
1.27k
            }
339
2.71k
        } while(rightQuaternary == 0);
340
341
1.75k
        if(leftQuaternary != rightQuaternary) {
342
            // Return the difference, with script reordering.
343
80
            if(settings.hasReordering()) {
344
80
                leftQuaternary = settings.reorder(leftQuaternary);
345
80
                rightQuaternary = settings.reorder(rightQuaternary);
346
80
            }
347
80
            return (leftQuaternary < rightQuaternary) ? UCOL_LESS : UCOL_GREATER;
348
80
        }
349
1.67k
        if(leftQuaternary == Collation::NO_CE_PRIMARY) { break; }
350
1.67k
    }
351
32
    return UCOL_EQUAL;
352
112
}
353
354
U_NAMESPACE_END
355
356
#endif  // !UCONFIG_NO_COLLATION