/src/icu/source/i18n/collationrootelements.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ******************************************************************************* |
5 | | * Copyright (C) 2013-2014, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ******************************************************************************* |
8 | | * collationrootelements.cpp |
9 | | * |
10 | | * created on: 2013mar05 |
11 | | * created by: Markus W. Scherer |
12 | | */ |
13 | | |
14 | | #include "unicode/utypes.h" |
15 | | |
16 | | #if !UCONFIG_NO_COLLATION |
17 | | |
18 | | #include "collation.h" |
19 | | #include "collationrootelements.h" |
20 | | #include "uassert.h" |
21 | | |
22 | | U_NAMESPACE_BEGIN |
23 | | |
24 | | int64_t |
25 | 0 | CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const { |
26 | 0 | if(p == 0) { return 0; } |
27 | 0 | U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]); |
28 | 0 | int32_t index = findP(p); |
29 | 0 | uint32_t q = elements[index]; |
30 | 0 | uint32_t secTer; |
31 | 0 | if(p == (q & 0xffffff00)) { |
32 | | // p == elements[index] is a root primary. Find the CE before it. |
33 | | // We must not be in a primary range. |
34 | 0 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); |
35 | 0 | secTer = elements[index - 1]; |
36 | 0 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { |
37 | | // Primary CE just before p. |
38 | 0 | p = secTer & 0xffffff00; |
39 | 0 | secTer = Collation::COMMON_SEC_AND_TER_CE; |
40 | 0 | } else { |
41 | | // secTer = last secondary & tertiary for the previous primary |
42 | 0 | index -= 2; |
43 | 0 | for(;;) { |
44 | 0 | p = elements[index]; |
45 | 0 | if((p & SEC_TER_DELTA_FLAG) == 0) { |
46 | 0 | p &= 0xffffff00; |
47 | 0 | break; |
48 | 0 | } |
49 | 0 | --index; |
50 | 0 | } |
51 | 0 | } |
52 | 0 | } else { |
53 | | // p > elements[index] which is the previous primary. |
54 | | // Find the last secondary & tertiary weights for it. |
55 | 0 | p = q & 0xffffff00; |
56 | 0 | secTer = Collation::COMMON_SEC_AND_TER_CE; |
57 | 0 | for(;;) { |
58 | 0 | q = elements[++index]; |
59 | 0 | if((q & SEC_TER_DELTA_FLAG) == 0) { |
60 | | // We must not be in a primary range. |
61 | 0 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); |
62 | 0 | break; |
63 | 0 | } |
64 | 0 | secTer = q; |
65 | 0 | } |
66 | 0 | } |
67 | 0 | return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); |
68 | 0 | } |
69 | | |
70 | | int64_t |
71 | 0 | CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const { |
72 | 0 | if(p == 0) { return 0; } |
73 | 0 | int32_t index = findP(p); |
74 | 0 | if(p != (elements[index] & 0xffffff00)) { |
75 | 0 | for(;;) { |
76 | 0 | p = elements[++index]; |
77 | 0 | if((p & SEC_TER_DELTA_FLAG) == 0) { |
78 | | // First primary after p. We must not be in a primary range. |
79 | 0 | U_ASSERT((p & PRIMARY_STEP_MASK) == 0); |
80 | 0 | break; |
81 | 0 | } |
82 | 0 | } |
83 | 0 | } |
84 | | // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. |
85 | 0 | return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE; |
86 | 0 | } |
87 | | |
88 | | uint32_t |
89 | 0 | CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const { |
90 | 0 | int32_t index = findPrimary(p); |
91 | 0 | int32_t step; |
92 | 0 | uint32_t q = elements[index]; |
93 | 0 | if(p == (q & 0xffffff00)) { |
94 | | // Found p itself. Return the previous primary. |
95 | | // See if p is at the end of a previous range. |
96 | 0 | step = (int32_t)q & PRIMARY_STEP_MASK; |
97 | 0 | if(step == 0) { |
98 | | // p is not at the end of a range. Look for the previous primary. |
99 | 0 | do { |
100 | 0 | p = elements[--index]; |
101 | 0 | } while((p & SEC_TER_DELTA_FLAG) != 0); |
102 | 0 | return p & 0xffffff00; |
103 | 0 | } |
104 | 0 | } else { |
105 | | // p is in a range, and not at the start. |
106 | 0 | uint32_t nextElement = elements[index + 1]; |
107 | 0 | U_ASSERT(isEndOfPrimaryRange(nextElement)); |
108 | 0 | step = (int32_t)nextElement & PRIMARY_STEP_MASK; |
109 | 0 | } |
110 | | // Return the previous range primary. |
111 | 0 | if((p & 0xffff) == 0) { |
112 | 0 | return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step); |
113 | 0 | } else { |
114 | 0 | return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step); |
115 | 0 | } |
116 | 0 | } |
117 | | |
118 | | uint32_t |
119 | 0 | CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const { |
120 | 0 | int32_t index; |
121 | 0 | uint32_t previousSec, sec; |
122 | 0 | if(p == 0) { |
123 | 0 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; |
124 | | // Gap at the beginning of the secondary CE range. |
125 | 0 | previousSec = 0; |
126 | 0 | sec = elements[index] >> 16; |
127 | 0 | } else { |
128 | 0 | index = findPrimary(p) + 1; |
129 | 0 | previousSec = Collation::BEFORE_WEIGHT16; |
130 | 0 | sec = getFirstSecTerForPrimary(index) >> 16; |
131 | 0 | } |
132 | 0 | U_ASSERT(s >= sec); |
133 | 0 | while(s > sec) { |
134 | 0 | previousSec = sec; |
135 | 0 | U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); |
136 | 0 | sec = elements[index++] >> 16; |
137 | 0 | } |
138 | 0 | U_ASSERT(sec == s); |
139 | 0 | return previousSec; |
140 | 0 | } |
141 | | |
142 | | uint32_t |
143 | 0 | CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const { |
144 | 0 | U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0); |
145 | 0 | int32_t index; |
146 | 0 | uint32_t previousTer, secTer; |
147 | 0 | if(p == 0) { |
148 | 0 | if(s == 0) { |
149 | 0 | index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; |
150 | | // Gap at the beginning of the tertiary CE range. |
151 | 0 | previousTer = 0; |
152 | 0 | } else { |
153 | 0 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; |
154 | 0 | previousTer = Collation::BEFORE_WEIGHT16; |
155 | 0 | } |
156 | 0 | secTer = elements[index] & ~SEC_TER_DELTA_FLAG; |
157 | 0 | } else { |
158 | 0 | index = findPrimary(p) + 1; |
159 | 0 | previousTer = Collation::BEFORE_WEIGHT16; |
160 | 0 | secTer = getFirstSecTerForPrimary(index); |
161 | 0 | } |
162 | 0 | uint32_t st = (s << 16) | t; |
163 | 0 | while(st > secTer) { |
164 | 0 | if((secTer >> 16) == s) { previousTer = secTer; } |
165 | 0 | U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); |
166 | 0 | secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; |
167 | 0 | } |
168 | 0 | U_ASSERT(secTer == st); |
169 | 0 | return previousTer & 0xffff; |
170 | 0 | } |
171 | | |
172 | | uint32_t |
173 | 0 | CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const { |
174 | 0 | U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1])); |
175 | 0 | uint32_t q = elements[++index]; |
176 | 0 | int32_t step; |
177 | 0 | if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) { |
178 | | // Return the next primary in this range. |
179 | 0 | if((p & 0xffff) == 0) { |
180 | 0 | return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step); |
181 | 0 | } else { |
182 | 0 | return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step); |
183 | 0 | } |
184 | 0 | } else { |
185 | | // Return the next primary in the list. |
186 | 0 | while((q & SEC_TER_DELTA_FLAG) != 0) { |
187 | 0 | q = elements[++index]; |
188 | 0 | } |
189 | 0 | U_ASSERT((q & PRIMARY_STEP_MASK) == 0); |
190 | 0 | return q; |
191 | 0 | } |
192 | 0 | } |
193 | | |
194 | | uint32_t |
195 | 0 | CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const { |
196 | 0 | uint32_t secTer; |
197 | 0 | uint32_t secLimit; |
198 | 0 | if(index == 0) { |
199 | | // primary = 0 |
200 | 0 | U_ASSERT(s != 0); |
201 | 0 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; |
202 | 0 | secTer = elements[index]; |
203 | | // Gap at the end of the secondary CE range. |
204 | 0 | secLimit = 0x10000; |
205 | 0 | } else { |
206 | 0 | U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); |
207 | 0 | secTer = getFirstSecTerForPrimary(index + 1); |
208 | | // If this is an explicit sec/ter unit, then it will be read once more. |
209 | | // Gap for secondaries of primary CEs. |
210 | 0 | secLimit = getSecondaryBoundary(); |
211 | 0 | } |
212 | 0 | for(;;) { |
213 | 0 | uint32_t sec = secTer >> 16; |
214 | 0 | if(sec > s) { return sec; } |
215 | 0 | secTer = elements[++index]; |
216 | 0 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } |
217 | 0 | } |
218 | 0 | } |
219 | | |
220 | | uint32_t |
221 | 0 | CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const { |
222 | 0 | uint32_t secTer; |
223 | 0 | uint32_t terLimit; |
224 | 0 | if(index == 0) { |
225 | | // primary = 0 |
226 | 0 | if(s == 0) { |
227 | 0 | U_ASSERT(t != 0); |
228 | 0 | index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; |
229 | | // Gap at the end of the tertiary CE range. |
230 | 0 | terLimit = 0x4000; |
231 | 0 | } else { |
232 | 0 | index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; |
233 | | // Gap for tertiaries of primary/secondary CEs. |
234 | 0 | terLimit = getTertiaryBoundary(); |
235 | 0 | } |
236 | 0 | secTer = elements[index] & ~SEC_TER_DELTA_FLAG; |
237 | 0 | } else { |
238 | 0 | U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); |
239 | 0 | secTer = getFirstSecTerForPrimary(index + 1); |
240 | | // If this is an explicit sec/ter unit, then it will be read once more. |
241 | 0 | terLimit = getTertiaryBoundary(); |
242 | 0 | } |
243 | 0 | uint32_t st = (s << 16) | t; |
244 | 0 | for(;;) { |
245 | 0 | if(secTer > st) { |
246 | 0 | U_ASSERT((secTer >> 16) == s); |
247 | 0 | return secTer & 0xffff; |
248 | 0 | } |
249 | 0 | secTer = elements[++index]; |
250 | | // No tertiary greater than t for this primary+secondary. |
251 | 0 | if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } |
252 | 0 | secTer &= ~SEC_TER_DELTA_FLAG; |
253 | 0 | } |
254 | 0 | } |
255 | | |
256 | | uint32_t |
257 | 0 | CollationRootElements::getFirstSecTerForPrimary(int32_t index) const { |
258 | 0 | uint32_t secTer = elements[index]; |
259 | 0 | if((secTer & SEC_TER_DELTA_FLAG) == 0) { |
260 | | // No sec/ter delta. |
261 | 0 | return Collation::COMMON_SEC_AND_TER_CE; |
262 | 0 | } |
263 | 0 | secTer &= ~SEC_TER_DELTA_FLAG; |
264 | 0 | if(secTer > Collation::COMMON_SEC_AND_TER_CE) { |
265 | | // Implied sec/ter. |
266 | 0 | return Collation::COMMON_SEC_AND_TER_CE; |
267 | 0 | } |
268 | | // Explicit sec/ter below common/common. |
269 | 0 | return secTer; |
270 | 0 | } |
271 | | |
272 | | int32_t |
273 | 0 | CollationRootElements::findPrimary(uint32_t p) const { |
274 | | // Requirement: p must occur as a root primary. |
275 | 0 | U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary |
276 | 0 | int32_t index = findP(p); |
277 | | // If p is in a range, then we just assume that p is an actual primary in this range. |
278 | | // (Too cumbersome/expensive to check.) |
279 | | // Otherwise, it must be an exact match. |
280 | 0 | U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00)); |
281 | 0 | return index; |
282 | 0 | } |
283 | | |
284 | | int32_t |
285 | 0 | CollationRootElements::findP(uint32_t p) const { |
286 | | // p need not occur as a root primary. |
287 | | // For example, it might be a reordering group boundary. |
288 | 0 | U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE); |
289 | | // modified binary search |
290 | 0 | int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX]; |
291 | 0 | U_ASSERT(p >= elements[start]); |
292 | 0 | int32_t limit = length - 1; |
293 | 0 | U_ASSERT(elements[limit] >= PRIMARY_SENTINEL); |
294 | 0 | U_ASSERT(p < elements[limit]); |
295 | 0 | while((start + 1) < limit) { |
296 | | // Invariant: elements[start] and elements[limit] are primaries, |
297 | | // and elements[start]<=p<=elements[limit]. |
298 | 0 | int32_t i = (start + limit) / 2; |
299 | 0 | uint32_t q = elements[i]; |
300 | 0 | if((q & SEC_TER_DELTA_FLAG) != 0) { |
301 | | // Find the next primary. |
302 | 0 | int32_t j = i + 1; |
303 | 0 | for(;;) { |
304 | 0 | if(j == limit) { break; } |
305 | 0 | q = elements[j]; |
306 | 0 | if((q & SEC_TER_DELTA_FLAG) == 0) { |
307 | 0 | i = j; |
308 | 0 | break; |
309 | 0 | } |
310 | 0 | ++j; |
311 | 0 | } |
312 | 0 | if((q & SEC_TER_DELTA_FLAG) != 0) { |
313 | | // Find the preceding primary. |
314 | 0 | j = i - 1; |
315 | 0 | for(;;) { |
316 | 0 | if(j == start) { break; } |
317 | 0 | q = elements[j]; |
318 | 0 | if((q & SEC_TER_DELTA_FLAG) == 0) { |
319 | 0 | i = j; |
320 | 0 | break; |
321 | 0 | } |
322 | 0 | --j; |
323 | 0 | } |
324 | 0 | if((q & SEC_TER_DELTA_FLAG) != 0) { |
325 | | // No primary between start and limit. |
326 | 0 | break; |
327 | 0 | } |
328 | 0 | } |
329 | 0 | } |
330 | 0 | if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary. |
331 | 0 | limit = i; |
332 | 0 | } else { |
333 | 0 | start = i; |
334 | 0 | } |
335 | 0 | } |
336 | 0 | return start; |
337 | 0 | } |
338 | | |
339 | | U_NAMESPACE_END |
340 | | |
341 | | #endif // !UCONFIG_NO_COLLATION |