/src/icu/icu4c/source/i18n/collationsettings.cpp
Line | Count | Source |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * Copyright (C) 2013-2015, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ******************************************************************************* |
8 | | * collationsettings.cpp |
9 | | * |
10 | | * created on: 2013feb07 |
11 | | * created by: Markus W. Scherer |
12 | | */ |
13 | | |
14 | | #include "unicode/utypes.h" |
15 | | |
16 | | #if !UCONFIG_NO_COLLATION |
17 | | |
18 | | #include "unicode/ucol.h" |
19 | | #include "cmemory.h" |
20 | | #include "collation.h" |
21 | | #include "collationdata.h" |
22 | | #include "collationsettings.h" |
23 | | #include "sharedobject.h" |
24 | | #include "uassert.h" |
25 | | #include "umutex.h" |
26 | | #include "uvectr32.h" |
27 | | |
28 | | U_NAMESPACE_BEGIN |
29 | | |
30 | | CollationSettings::CollationSettings(const CollationSettings &other) |
31 | 14.3k | : SharedObject(other), |
32 | 14.3k | options(other.options), variableTop(other.variableTop), |
33 | 14.3k | reorderTable(nullptr), |
34 | 14.3k | minHighNoReorder(other.minHighNoReorder), |
35 | 14.3k | reorderRanges(nullptr), reorderRangesLength(0), |
36 | 14.3k | reorderCodes(nullptr), reorderCodesLength(0), reorderCodesCapacity(0), |
37 | 14.3k | fastLatinOptions(other.fastLatinOptions) { |
38 | 14.3k | UErrorCode errorCode = U_ZERO_ERROR; |
39 | 14.3k | copyReorderingFrom(other, errorCode); |
40 | 14.3k | if(fastLatinOptions >= 0) { |
41 | 14.2k | uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries)); |
42 | 14.2k | } |
43 | 14.3k | } |
44 | | |
45 | 14.1k | CollationSettings::~CollationSettings() { |
46 | 14.1k | if(reorderCodesCapacity != 0) { |
47 | 886 | uprv_free(const_cast<int32_t *>(reorderCodes)); |
48 | 886 | } |
49 | 14.1k | } |
50 | | |
51 | | bool |
52 | 0 | CollationSettings::operator==(const CollationSettings &other) const { |
53 | 0 | if(options != other.options) { return false; } |
54 | 0 | if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return false; } |
55 | 0 | if(reorderCodesLength != other.reorderCodesLength) { return false; } |
56 | 0 | for(int32_t i = 0; i < reorderCodesLength; ++i) { |
57 | 0 | if(reorderCodes[i] != other.reorderCodes[i]) { return false; } |
58 | 0 | } |
59 | 0 | return true; |
60 | 0 | } |
61 | | |
62 | | int32_t |
63 | 0 | CollationSettings::hashCode() const { |
64 | 0 | int32_t h = options << 8; |
65 | 0 | if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; } |
66 | 0 | h ^= reorderCodesLength; |
67 | 0 | for(int32_t i = 0; i < reorderCodesLength; ++i) { |
68 | 0 | h ^= (reorderCodes[i] << i); |
69 | 0 | } |
70 | 0 | return h; |
71 | 0 | } |
72 | | |
73 | | void |
74 | 11.0k | CollationSettings::resetReordering() { |
75 | | // When we turn off reordering, we want to set a nullptr permutation |
76 | | // rather than a no-op permutation. |
77 | | // Keep the memory via reorderCodes and its capacity. |
78 | 11.0k | reorderTable = nullptr; |
79 | 11.0k | minHighNoReorder = 0; |
80 | 11.0k | reorderRangesLength = 0; |
81 | 11.0k | reorderCodesLength = 0; |
82 | 11.0k | } |
83 | | |
84 | | void |
85 | | CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, |
86 | | const uint32_t *ranges, int32_t rangesLength, |
87 | 79 | const uint8_t *table, UErrorCode &errorCode) { |
88 | 79 | if(U_FAILURE(errorCode)) { return; } |
89 | 79 | if(table != nullptr && |
90 | 79 | (rangesLength == 0 ? |
91 | 43 | !reorderTableHasSplitBytes(table) : |
92 | 79 | rangesLength >= 2 && |
93 | | // The first offset must be 0. The last offset must not be 0. |
94 | 79 | (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) { |
95 | | // We need to release the memory before setting the alias pointer. |
96 | 79 | if(reorderCodesCapacity != 0) { |
97 | 0 | uprv_free(const_cast<int32_t *>(reorderCodes)); |
98 | 0 | reorderCodesCapacity = 0; |
99 | 0 | } |
100 | 79 | reorderTable = table; |
101 | 79 | reorderCodes = codes; |
102 | 79 | reorderCodesLength = length; |
103 | | // Drop ranges before the first split byte. They are reordered by the table. |
104 | | // This then speeds up reordering of the remaining ranges. |
105 | 79 | int32_t firstSplitByteRangeIndex = 0; |
106 | 204 | while(firstSplitByteRangeIndex < rangesLength && |
107 | 161 | (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { |
108 | | // The second byte of the primary limit is 0. |
109 | 125 | ++firstSplitByteRangeIndex; |
110 | 125 | } |
111 | 79 | if(firstSplitByteRangeIndex == rangesLength) { |
112 | 43 | U_ASSERT(!reorderTableHasSplitBytes(table)); |
113 | 43 | minHighNoReorder = 0; |
114 | 43 | reorderRanges = nullptr; |
115 | 43 | reorderRangesLength = 0; |
116 | 43 | } else { |
117 | 36 | U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); |
118 | 36 | minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; |
119 | 36 | reorderRanges = ranges + firstSplitByteRangeIndex; |
120 | 36 | reorderRangesLength = rangesLength - firstSplitByteRangeIndex; |
121 | 36 | } |
122 | 79 | return; |
123 | 79 | } |
124 | | // Regenerate missing data. |
125 | 0 | setReordering(data, codes, length, errorCode); |
126 | 0 | } |
127 | | |
128 | | void |
129 | | CollationSettings::setReordering(const CollationData &data, |
130 | | const int32_t *codes, int32_t codesLength, |
131 | 3.46k | UErrorCode &errorCode) { |
132 | 3.46k | if(U_FAILURE(errorCode)) { return; } |
133 | 3.46k | if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) { |
134 | 1 | resetReordering(); |
135 | 1 | return; |
136 | 1 | } |
137 | 3.46k | UVector32 rangesList(errorCode); |
138 | 3.46k | data.makeReorderRanges(codes, codesLength, rangesList, errorCode); |
139 | 3.46k | if(U_FAILURE(errorCode)) { return; } |
140 | 3.40k | int32_t rangesLength = rangesList.size(); |
141 | 3.40k | if(rangesLength == 0) { |
142 | 33 | resetReordering(); |
143 | 33 | return; |
144 | 33 | } |
145 | 3.37k | const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer()); |
146 | | // ranges[] contains at least two (limit, offset) pairs. |
147 | | // The first offset must be 0. The last offset must not be 0. |
148 | | // Separators (at the low end) and trailing weights (at the high end) |
149 | | // are never reordered. |
150 | 3.37k | U_ASSERT(rangesLength >= 2); |
151 | 3.37k | U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); |
152 | 3.37k | minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; |
153 | | |
154 | | // Write the lead byte permutation table. |
155 | | // Set a 0 for each lead byte that has a range boundary in the middle. |
156 | 3.37k | uint8_t table[256]; |
157 | 3.37k | int32_t b = 0; |
158 | 3.37k | int32_t firstSplitByteRangeIndex = -1; |
159 | 13.2k | for(int32_t i = 0; i < rangesLength; ++i) { |
160 | 9.87k | uint32_t pair = ranges[i]; |
161 | 9.87k | int32_t limit1 = static_cast<int32_t>(pair >> 24); |
162 | 388k | while(b < limit1) { |
163 | 378k | table[b] = static_cast<uint8_t>(b + pair); |
164 | 378k | ++b; |
165 | 378k | } |
166 | | // Check the second byte of the limit. |
167 | 9.87k | if((pair & 0xff0000) != 0) { |
168 | 1.46k | table[limit1] = 0; |
169 | 1.46k | b = limit1 + 1; |
170 | 1.46k | if(firstSplitByteRangeIndex < 0) { |
171 | 1.25k | firstSplitByteRangeIndex = i; |
172 | 1.25k | } |
173 | 1.46k | } |
174 | 9.87k | } |
175 | 487k | while(b <= 0xff) { |
176 | 484k | table[b] = static_cast<uint8_t>(b); |
177 | 484k | ++b; |
178 | 484k | } |
179 | 3.37k | if(firstSplitByteRangeIndex < 0) { |
180 | | // The lead byte permutation table alone suffices for reordering. |
181 | 2.12k | rangesLength = 0; |
182 | 2.12k | } else { |
183 | | // Remove the ranges below the first split byte. |
184 | 1.25k | ranges += firstSplitByteRangeIndex; |
185 | 1.25k | rangesLength -= firstSplitByteRangeIndex; |
186 | 1.25k | } |
187 | 3.37k | setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode); |
188 | 3.37k | } |
189 | | |
190 | | void |
191 | | CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, |
192 | | const uint32_t *ranges, int32_t rangesLength, |
193 | 3.37k | const uint8_t *table, UErrorCode &errorCode) { |
194 | 3.37k | if(U_FAILURE(errorCode)) { return; } |
195 | 3.37k | int32_t *ownedCodes; |
196 | 3.37k | int32_t totalLength = codesLength + rangesLength; |
197 | 3.37k | U_ASSERT(totalLength > 0); |
198 | 3.37k | if (totalLength <= 0) { |
199 | 0 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
200 | 0 | return; |
201 | 0 | } |
202 | 3.37k | if(totalLength <= reorderCodesCapacity) { |
203 | 2.40k | ownedCodes = const_cast<int32_t *>(reorderCodes); |
204 | 2.40k | } else { |
205 | | // Allocate one memory block for the codes, the ranges, and the 16-aligned table. |
206 | 967 | int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints |
207 | 967 | ownedCodes = static_cast<int32_t*>(uprv_malloc(capacity * 4 + 256)); |
208 | 967 | if(ownedCodes == nullptr) { |
209 | 0 | resetReordering(); |
210 | 0 | errorCode = U_MEMORY_ALLOCATION_ERROR; |
211 | 0 | return; |
212 | 0 | } |
213 | 967 | if(reorderCodesCapacity != 0) { |
214 | 81 | uprv_free(const_cast<int32_t *>(reorderCodes)); |
215 | 81 | } |
216 | 967 | reorderCodes = ownedCodes; |
217 | 967 | reorderCodesCapacity = capacity; |
218 | 967 | } |
219 | 3.37k | uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); |
220 | 3.37k | uprv_memcpy(ownedCodes, codes, codesLength * 4); |
221 | 3.37k | uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); |
222 | 3.37k | reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity); |
223 | 3.37k | reorderCodesLength = codesLength; |
224 | 3.37k | reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength; |
225 | 3.37k | reorderRangesLength = rangesLength; |
226 | 3.37k | } |
227 | | |
228 | | void |
229 | 14.3k | CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) { |
230 | 14.3k | if(U_FAILURE(errorCode)) { return; } |
231 | 14.3k | if(!other.hasReordering()) { |
232 | 11.0k | resetReordering(); |
233 | 11.0k | return; |
234 | 11.0k | } |
235 | 3.27k | minHighNoReorder = other.minHighNoReorder; |
236 | 3.27k | if(other.reorderCodesCapacity == 0) { |
237 | | // The reorder arrays are aliased to memory-mapped data. |
238 | 3.27k | reorderTable = other.reorderTable; |
239 | 3.27k | reorderRanges = other.reorderRanges; |
240 | 3.27k | reorderRangesLength = other.reorderRangesLength; |
241 | 3.27k | reorderCodes = other.reorderCodes; |
242 | 3.27k | reorderCodesLength = other.reorderCodesLength; |
243 | 3.27k | } else { |
244 | 0 | setReorderArrays(other.reorderCodes, other.reorderCodesLength, |
245 | 0 | other.reorderRanges, other.reorderRangesLength, |
246 | 0 | other.reorderTable, errorCode); |
247 | 0 | } |
248 | 3.27k | } |
249 | | |
250 | | UBool |
251 | 43 | CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { |
252 | 43 | U_ASSERT(table[0] == 0); |
253 | 11.0k | for(int32_t i = 1; i < 256; ++i) { |
254 | 10.9k | if(table[i] == 0) { |
255 | 0 | return true; |
256 | 0 | } |
257 | 10.9k | } |
258 | 43 | return false; |
259 | 43 | } |
260 | | |
261 | | uint32_t |
262 | 4.63k | CollationSettings::reorderEx(uint32_t p) const { |
263 | 4.63k | if(p >= minHighNoReorder) { return p; } |
264 | | // Round up p so that its lower 16 bits are >= any offset bits. |
265 | | // Then compare q directly with (limit, offset) pairs. |
266 | 4.32k | uint32_t q = p | 0xffff; |
267 | 4.32k | uint32_t r; |
268 | 4.32k | const uint32_t *ranges = reorderRanges; |
269 | 8.20k | while(q >= (r = *ranges)) { ++ranges; } |
270 | 4.32k | return p + (r << 24); |
271 | 4.63k | } |
272 | | |
273 | | void |
274 | 6.73k | CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { |
275 | 6.73k | if(U_FAILURE(errorCode)) { return; } |
276 | 6.73k | int32_t noStrength = options & ~STRENGTH_MASK; |
277 | 6.73k | switch(value) { |
278 | 1.43k | case UCOL_PRIMARY: |
279 | 2.70k | case UCOL_SECONDARY: |
280 | 2.71k | case UCOL_TERTIARY: |
281 | 4.03k | case UCOL_QUATERNARY: |
282 | 6.72k | case UCOL_IDENTICAL: |
283 | 6.72k | options = noStrength | (value << STRENGTH_SHIFT); |
284 | 6.72k | break; |
285 | 0 | case UCOL_DEFAULT: |
286 | 0 | options = noStrength | (defaultOptions & STRENGTH_MASK); |
287 | 0 | break; |
288 | 12 | default: |
289 | 12 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
290 | 12 | break; |
291 | 6.73k | } |
292 | 6.73k | } |
293 | | |
294 | | void |
295 | | CollationSettings::setFlag(int32_t bit, UColAttributeValue value, |
296 | 2.06k | int32_t defaultOptions, UErrorCode &errorCode) { |
297 | 2.06k | if(U_FAILURE(errorCode)) { return; } |
298 | 2.06k | switch(value) { |
299 | 2.06k | case UCOL_ON: |
300 | 2.06k | options |= bit; |
301 | 2.06k | break; |
302 | 0 | case UCOL_OFF: |
303 | 0 | options &= ~bit; |
304 | 0 | break; |
305 | 0 | case UCOL_DEFAULT: |
306 | 0 | options = (options & ~bit) | (defaultOptions & bit); |
307 | 0 | break; |
308 | 1 | default: |
309 | 1 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
310 | 1 | break; |
311 | 2.06k | } |
312 | 2.06k | } |
313 | | |
314 | | void |
315 | | CollationSettings::setCaseFirst(UColAttributeValue value, |
316 | 130 | int32_t defaultOptions, UErrorCode &errorCode) { |
317 | 130 | if(U_FAILURE(errorCode)) { return; } |
318 | 130 | int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK; |
319 | 130 | switch(value) { |
320 | 0 | case UCOL_OFF: |
321 | 0 | options = noCaseFirst; |
322 | 0 | break; |
323 | 1 | case UCOL_LOWER_FIRST: |
324 | 1 | options = noCaseFirst | CASE_FIRST; |
325 | 1 | break; |
326 | 118 | case UCOL_UPPER_FIRST: |
327 | 118 | options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK; |
328 | 118 | break; |
329 | 0 | case UCOL_DEFAULT: |
330 | 0 | options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK); |
331 | 0 | break; |
332 | 11 | default: |
333 | 11 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
334 | 11 | break; |
335 | 130 | } |
336 | 130 | } |
337 | | |
338 | | void |
339 | | CollationSettings::setAlternateHandling(UColAttributeValue value, |
340 | 499 | int32_t defaultOptions, UErrorCode &errorCode) { |
341 | 499 | if(U_FAILURE(errorCode)) { return; } |
342 | 499 | int32_t noAlternate = options & ~ALTERNATE_MASK; |
343 | 499 | switch(value) { |
344 | 0 | case UCOL_NON_IGNORABLE: |
345 | 0 | options = noAlternate; |
346 | 0 | break; |
347 | 486 | case UCOL_SHIFTED: |
348 | 486 | options = noAlternate | SHIFTED; |
349 | 486 | break; |
350 | 0 | case UCOL_DEFAULT: |
351 | 0 | options = noAlternate | (defaultOptions & ALTERNATE_MASK); |
352 | 0 | break; |
353 | 13 | default: |
354 | 13 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
355 | 13 | break; |
356 | 499 | } |
357 | 499 | } |
358 | | |
359 | | void |
360 | 14 | CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { |
361 | 14 | if(U_FAILURE(errorCode)) { return; } |
362 | 14 | int32_t noMax = options & ~MAX_VARIABLE_MASK; |
363 | 14 | switch(value) { |
364 | 11 | case MAX_VAR_SPACE: |
365 | 11 | case MAX_VAR_PUNCT: |
366 | 11 | case MAX_VAR_SYMBOL: |
367 | 14 | case MAX_VAR_CURRENCY: |
368 | 14 | options = noMax | (value << MAX_VARIABLE_SHIFT); |
369 | 14 | break; |
370 | 0 | case UCOL_DEFAULT: |
371 | 0 | options = noMax | (defaultOptions & MAX_VARIABLE_MASK); |
372 | 0 | break; |
373 | 0 | default: |
374 | 0 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
375 | 0 | break; |
376 | 14 | } |
377 | 14 | } |
378 | | |
379 | | U_NAMESPACE_END |
380 | | |
381 | | #endif // !UCONFIG_NO_COLLATION |