Coverage Report

Created: 2026-01-25 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/i18n/collationsettings.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2013-2015, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* collationsettings.cpp
9
*
10
* created on: 2013feb07
11
* created by: Markus W. Scherer
12
*/
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_COLLATION
17
18
#include "unicode/ucol.h"
19
#include "cmemory.h"
20
#include "collation.h"
21
#include "collationdata.h"
22
#include "collationsettings.h"
23
#include "sharedobject.h"
24
#include "uassert.h"
25
#include "umutex.h"
26
#include "uvectr32.h"
27
28
U_NAMESPACE_BEGIN
29
30
CollationSettings::CollationSettings(const CollationSettings &other)
31
14.3k
        : SharedObject(other),
32
14.3k
          options(other.options), variableTop(other.variableTop),
33
14.3k
          reorderTable(nullptr),
34
14.3k
          minHighNoReorder(other.minHighNoReorder),
35
14.3k
          reorderRanges(nullptr), reorderRangesLength(0),
36
14.3k
          reorderCodes(nullptr), reorderCodesLength(0), reorderCodesCapacity(0),
37
14.3k
          fastLatinOptions(other.fastLatinOptions) {
38
14.3k
    UErrorCode errorCode = U_ZERO_ERROR;
39
14.3k
    copyReorderingFrom(other, errorCode);
40
14.3k
    if(fastLatinOptions >= 0) {
41
14.2k
        uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
42
14.2k
    }
43
14.3k
}
44
45
14.1k
CollationSettings::~CollationSettings() {
46
14.1k
    if(reorderCodesCapacity != 0) {
47
886
        uprv_free(const_cast<int32_t *>(reorderCodes));
48
886
    }
49
14.1k
}
50
51
bool
52
0
CollationSettings::operator==(const CollationSettings &other) const {
53
0
    if(options != other.options) { return false; }
54
0
    if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return false; }
55
0
    if(reorderCodesLength != other.reorderCodesLength) { return false; }
56
0
    for(int32_t i = 0; i < reorderCodesLength; ++i) {
57
0
        if(reorderCodes[i] != other.reorderCodes[i]) { return false; }
58
0
    }
59
0
    return true;
60
0
}
61
62
int32_t
63
0
CollationSettings::hashCode() const {
64
0
    int32_t h = options << 8;
65
0
    if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
66
0
    h ^= reorderCodesLength;
67
0
    for(int32_t i = 0; i < reorderCodesLength; ++i) {
68
0
        h ^= (reorderCodes[i] << i);
69
0
    }
70
0
    return h;
71
0
}
72
73
void
74
11.0k
CollationSettings::resetReordering() {
75
    // When we turn off reordering, we want to set a nullptr permutation
76
    // rather than a no-op permutation.
77
    // Keep the memory via reorderCodes and its capacity.
78
11.0k
    reorderTable = nullptr;
79
11.0k
    minHighNoReorder = 0;
80
11.0k
    reorderRangesLength = 0;
81
11.0k
    reorderCodesLength = 0;
82
11.0k
}
83
84
void
85
CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
86
                                   const uint32_t *ranges, int32_t rangesLength,
87
79
                                   const uint8_t *table, UErrorCode &errorCode) {
88
79
    if(U_FAILURE(errorCode)) { return; }
89
79
    if(table != nullptr &&
90
79
            (rangesLength == 0 ?
91
43
                    !reorderTableHasSplitBytes(table) :
92
79
                    rangesLength >= 2 &&
93
                    // The first offset must be 0. The last offset must not be 0.
94
79
                    (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
95
        // We need to release the memory before setting the alias pointer.
96
79
        if(reorderCodesCapacity != 0) {
97
0
            uprv_free(const_cast<int32_t *>(reorderCodes));
98
0
            reorderCodesCapacity = 0;
99
0
        }
100
79
        reorderTable = table;
101
79
        reorderCodes = codes;
102
79
        reorderCodesLength = length;
103
        // Drop ranges before the first split byte. They are reordered by the table.
104
        // This then speeds up reordering of the remaining ranges.
105
79
        int32_t firstSplitByteRangeIndex = 0;
106
204
        while(firstSplitByteRangeIndex < rangesLength &&
107
161
                (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
108
            // The second byte of the primary limit is 0.
109
125
            ++firstSplitByteRangeIndex;
110
125
        }
111
79
        if(firstSplitByteRangeIndex == rangesLength) {
112
43
            U_ASSERT(!reorderTableHasSplitBytes(table));
113
43
            minHighNoReorder = 0;
114
43
            reorderRanges = nullptr;
115
43
            reorderRangesLength = 0;
116
43
        } else {
117
36
            U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
118
36
            minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
119
36
            reorderRanges = ranges + firstSplitByteRangeIndex;
120
36
            reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
121
36
        }
122
79
        return;
123
79
    }
124
    // Regenerate missing data.
125
0
    setReordering(data, codes, length, errorCode);
126
0
}
127
128
void
129
CollationSettings::setReordering(const CollationData &data,
130
                                 const int32_t *codes, int32_t codesLength,
131
3.46k
                                 UErrorCode &errorCode) {
132
3.46k
    if(U_FAILURE(errorCode)) { return; }
133
3.46k
    if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
134
1
        resetReordering();
135
1
        return;
136
1
    }
137
3.46k
    UVector32 rangesList(errorCode);
138
3.46k
    data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
139
3.46k
    if(U_FAILURE(errorCode)) { return; }
140
3.40k
    int32_t rangesLength = rangesList.size();
141
3.40k
    if(rangesLength == 0) {
142
33
        resetReordering();
143
33
        return;
144
33
    }
145
3.37k
    const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
146
    // ranges[] contains at least two (limit, offset) pairs.
147
    // The first offset must be 0. The last offset must not be 0.
148
    // Separators (at the low end) and trailing weights (at the high end)
149
    // are never reordered.
150
3.37k
    U_ASSERT(rangesLength >= 2);
151
3.37k
    U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
152
3.37k
    minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
153
154
    // Write the lead byte permutation table.
155
    // Set a 0 for each lead byte that has a range boundary in the middle.
156
3.37k
    uint8_t table[256];
157
3.37k
    int32_t b = 0;
158
3.37k
    int32_t firstSplitByteRangeIndex = -1;
159
13.2k
    for(int32_t i = 0; i < rangesLength; ++i) {
160
9.87k
        uint32_t pair = ranges[i];
161
9.87k
        int32_t limit1 = static_cast<int32_t>(pair >> 24);
162
388k
        while(b < limit1) {
163
378k
            table[b] = static_cast<uint8_t>(b + pair);
164
378k
            ++b;
165
378k
        }
166
        // Check the second byte of the limit.
167
9.87k
        if((pair & 0xff0000) != 0) {
168
1.46k
            table[limit1] = 0;
169
1.46k
            b = limit1 + 1;
170
1.46k
            if(firstSplitByteRangeIndex < 0) {
171
1.25k
                firstSplitByteRangeIndex = i;
172
1.25k
            }
173
1.46k
        }
174
9.87k
    }
175
487k
    while(b <= 0xff) {
176
484k
        table[b] = static_cast<uint8_t>(b);
177
484k
        ++b;
178
484k
    }
179
3.37k
    if(firstSplitByteRangeIndex < 0) {
180
        // The lead byte permutation table alone suffices for reordering.
181
2.12k
        rangesLength = 0;
182
2.12k
    } else {
183
        // Remove the ranges below the first split byte.
184
1.25k
        ranges += firstSplitByteRangeIndex;
185
1.25k
        rangesLength -= firstSplitByteRangeIndex;
186
1.25k
    }
187
3.37k
    setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
188
3.37k
}
189
190
void
191
CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
192
                                    const uint32_t *ranges, int32_t rangesLength,
193
3.37k
                                    const uint8_t *table, UErrorCode &errorCode) {
194
3.37k
    if(U_FAILURE(errorCode)) { return; }
195
3.37k
    int32_t *ownedCodes;
196
3.37k
    int32_t totalLength = codesLength + rangesLength;
197
3.37k
    U_ASSERT(totalLength > 0);
198
3.37k
    if (totalLength <= 0) {
199
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
200
0
        return;
201
0
    }
202
3.37k
    if(totalLength <= reorderCodesCapacity) {
203
2.40k
        ownedCodes = const_cast<int32_t *>(reorderCodes);
204
2.40k
    } else {
205
        // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
206
967
        int32_t capacity = (totalLength + 3) & ~3;  // round up to a multiple of 4 ints
207
967
        ownedCodes = static_cast<int32_t*>(uprv_malloc(capacity * 4 + 256));
208
967
        if(ownedCodes == nullptr) {
209
0
            resetReordering();
210
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
211
0
            return;
212
0
        }
213
967
        if(reorderCodesCapacity != 0) {
214
81
            uprv_free(const_cast<int32_t *>(reorderCodes));
215
81
        }
216
967
        reorderCodes = ownedCodes;
217
967
        reorderCodesCapacity = capacity;
218
967
    }
219
3.37k
    uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
220
3.37k
    uprv_memcpy(ownedCodes, codes, codesLength * 4);
221
3.37k
    uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
222
3.37k
    reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
223
3.37k
    reorderCodesLength = codesLength;
224
3.37k
    reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
225
3.37k
    reorderRangesLength = rangesLength;
226
3.37k
}
227
228
void
229
14.3k
CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
230
14.3k
    if(U_FAILURE(errorCode)) { return; }
231
14.3k
    if(!other.hasReordering()) {
232
11.0k
        resetReordering();
233
11.0k
        return;
234
11.0k
    }
235
3.27k
    minHighNoReorder = other.minHighNoReorder;
236
3.27k
    if(other.reorderCodesCapacity == 0) {
237
        // The reorder arrays are aliased to memory-mapped data.
238
3.27k
        reorderTable = other.reorderTable;
239
3.27k
        reorderRanges = other.reorderRanges;
240
3.27k
        reorderRangesLength = other.reorderRangesLength;
241
3.27k
        reorderCodes = other.reorderCodes;
242
3.27k
        reorderCodesLength = other.reorderCodesLength;
243
3.27k
    } else {
244
0
        setReorderArrays(other.reorderCodes, other.reorderCodesLength,
245
0
                         other.reorderRanges, other.reorderRangesLength,
246
0
                         other.reorderTable, errorCode);
247
0
    }
248
3.27k
}
249
250
UBool
251
43
CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
252
43
    U_ASSERT(table[0] == 0);
253
11.0k
    for(int32_t i = 1; i < 256; ++i) {
254
10.9k
        if(table[i] == 0) {
255
0
            return true;
256
0
        }
257
10.9k
    }
258
43
    return false;
259
43
}
260
261
uint32_t
262
4.63k
CollationSettings::reorderEx(uint32_t p) const {
263
4.63k
    if(p >= minHighNoReorder) { return p; }
264
    // Round up p so that its lower 16 bits are >= any offset bits.
265
    // Then compare q directly with (limit, offset) pairs.
266
4.32k
    uint32_t q = p | 0xffff;
267
4.32k
    uint32_t r;
268
4.32k
    const uint32_t *ranges = reorderRanges;
269
8.20k
    while(q >= (r = *ranges)) { ++ranges; }
270
4.32k
    return p + (r << 24);
271
4.63k
}
272
273
void
274
6.73k
CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
275
6.73k
    if(U_FAILURE(errorCode)) { return; }
276
6.73k
    int32_t noStrength = options & ~STRENGTH_MASK;
277
6.73k
    switch(value) {
278
1.43k
    case UCOL_PRIMARY:
279
2.70k
    case UCOL_SECONDARY:
280
2.71k
    case UCOL_TERTIARY:
281
4.03k
    case UCOL_QUATERNARY:
282
6.72k
    case UCOL_IDENTICAL:
283
6.72k
        options = noStrength | (value << STRENGTH_SHIFT);
284
6.72k
        break;
285
0
    case UCOL_DEFAULT:
286
0
        options = noStrength | (defaultOptions & STRENGTH_MASK);
287
0
        break;
288
12
    default:
289
12
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
290
12
        break;
291
6.73k
    }
292
6.73k
}
293
294
void
295
CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
296
2.06k
                           int32_t defaultOptions, UErrorCode &errorCode) {
297
2.06k
    if(U_FAILURE(errorCode)) { return; }
298
2.06k
    switch(value) {
299
2.06k
    case UCOL_ON:
300
2.06k
        options |= bit;
301
2.06k
        break;
302
0
    case UCOL_OFF:
303
0
        options &= ~bit;
304
0
        break;
305
0
    case UCOL_DEFAULT:
306
0
        options = (options & ~bit) | (defaultOptions & bit);
307
0
        break;
308
1
    default:
309
1
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
310
1
        break;
311
2.06k
    }
312
2.06k
}
313
314
void
315
CollationSettings::setCaseFirst(UColAttributeValue value,
316
130
                                int32_t defaultOptions, UErrorCode &errorCode) {
317
130
    if(U_FAILURE(errorCode)) { return; }
318
130
    int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
319
130
    switch(value) {
320
0
    case UCOL_OFF:
321
0
        options = noCaseFirst;
322
0
        break;
323
1
    case UCOL_LOWER_FIRST:
324
1
        options = noCaseFirst | CASE_FIRST;
325
1
        break;
326
118
    case UCOL_UPPER_FIRST:
327
118
        options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
328
118
        break;
329
0
    case UCOL_DEFAULT:
330
0
        options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
331
0
        break;
332
11
    default:
333
11
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
334
11
        break;
335
130
    }
336
130
}
337
338
void
339
CollationSettings::setAlternateHandling(UColAttributeValue value,
340
499
                                        int32_t defaultOptions, UErrorCode &errorCode) {
341
499
    if(U_FAILURE(errorCode)) { return; }
342
499
    int32_t noAlternate = options & ~ALTERNATE_MASK;
343
499
    switch(value) {
344
0
    case UCOL_NON_IGNORABLE:
345
0
        options = noAlternate;
346
0
        break;
347
486
    case UCOL_SHIFTED:
348
486
        options = noAlternate | SHIFTED;
349
486
        break;
350
0
    case UCOL_DEFAULT:
351
0
        options = noAlternate | (defaultOptions & ALTERNATE_MASK);
352
0
        break;
353
13
    default:
354
13
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
355
13
        break;
356
499
    }
357
499
}
358
359
void
360
14
CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
361
14
    if(U_FAILURE(errorCode)) { return; }
362
14
    int32_t noMax = options & ~MAX_VARIABLE_MASK;
363
14
    switch(value) {
364
11
    case MAX_VAR_SPACE:
365
11
    case MAX_VAR_PUNCT:
366
11
    case MAX_VAR_SYMBOL:
367
14
    case MAX_VAR_CURRENCY:
368
14
        options = noMax | (value << MAX_VARIABLE_SHIFT);
369
14
        break;
370
0
    case UCOL_DEFAULT:
371
0
        options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
372
0
        break;
373
0
    default:
374
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
375
0
        break;
376
14
    }
377
14
}
378
379
U_NAMESPACE_END
380
381
#endif  // !UCONFIG_NO_COLLATION