/src/mozilla-central/intl/icu/source/i18n/ucoleitr.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ****************************************************************************** |
5 | | * Copyright (C) 2001-2016, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ****************************************************************************** |
8 | | * |
9 | | * File ucoleitr.cpp |
10 | | * |
11 | | * Modification History: |
12 | | * |
13 | | * Date Name Description |
14 | | * 02/15/2001 synwee Modified all methods to process its own function |
15 | | * instead of calling the equivalent c++ api (coleitr.h) |
16 | | * 2012-2014 markus Rewritten in C++ again. |
17 | | ******************************************************************************/ |
18 | | |
19 | | #include "unicode/utypes.h" |
20 | | |
21 | | #if !UCONFIG_NO_COLLATION |
22 | | |
23 | | #include "unicode/coleitr.h" |
24 | | #include "unicode/tblcoll.h" |
25 | | #include "unicode/ucoleitr.h" |
26 | | #include "unicode/ustring.h" |
27 | | #include "unicode/sortkey.h" |
28 | | #include "unicode/uobject.h" |
29 | | #include "cmemory.h" |
30 | | #include "usrchimp.h" |
31 | | |
32 | | U_NAMESPACE_USE |
33 | | |
34 | | #define BUFFER_LENGTH 100 |
35 | | |
36 | | #define DEFAULT_BUFFER_SIZE 16 |
37 | 0 | #define BUFFER_GROW 8 |
38 | | |
39 | 0 | #define ARRAY_COPY(dst, src, count) uprv_memcpy((void *) (dst), (void *) (src), (size_t)(count) * sizeof (src)[0]) |
40 | | |
41 | 0 | #define NEW_ARRAY(type, count) (type *) uprv_malloc((size_t)(count) * sizeof(type)) |
42 | | |
43 | 0 | #define DELETE_ARRAY(array) uprv_free((void *) (array)) |
44 | | |
45 | | struct RCEI |
46 | | { |
47 | | uint32_t ce; |
48 | | int32_t low; |
49 | | int32_t high; |
50 | | }; |
51 | | |
52 | | U_NAMESPACE_BEGIN |
53 | | |
54 | | struct RCEBuffer |
55 | | { |
56 | | RCEI defaultBuffer[DEFAULT_BUFFER_SIZE]; |
57 | | RCEI *buffer; |
58 | | int32_t bufferIndex; |
59 | | int32_t bufferSize; |
60 | | |
61 | | RCEBuffer(); |
62 | | ~RCEBuffer(); |
63 | | |
64 | | UBool isEmpty() const; |
65 | | void put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode); |
66 | | const RCEI *get(); |
67 | | }; |
68 | | |
69 | | RCEBuffer::RCEBuffer() |
70 | 0 | { |
71 | 0 | buffer = defaultBuffer; |
72 | 0 | bufferIndex = 0; |
73 | 0 | bufferSize = UPRV_LENGTHOF(defaultBuffer); |
74 | 0 | } |
75 | | |
76 | | RCEBuffer::~RCEBuffer() |
77 | 0 | { |
78 | 0 | if (buffer != defaultBuffer) { |
79 | 0 | DELETE_ARRAY(buffer); |
80 | 0 | } |
81 | 0 | } |
82 | | |
83 | | UBool RCEBuffer::isEmpty() const |
84 | 0 | { |
85 | 0 | return bufferIndex <= 0; |
86 | 0 | } |
87 | | |
88 | | void RCEBuffer::put(uint32_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode) |
89 | 0 | { |
90 | 0 | if (U_FAILURE(errorCode)) { |
91 | 0 | return; |
92 | 0 | } |
93 | 0 | if (bufferIndex >= bufferSize) { |
94 | 0 | RCEI *newBuffer = NEW_ARRAY(RCEI, bufferSize + BUFFER_GROW); |
95 | 0 | if (newBuffer == NULL) { |
96 | 0 | errorCode = U_MEMORY_ALLOCATION_ERROR; |
97 | 0 | return; |
98 | 0 | } |
99 | 0 | |
100 | 0 | ARRAY_COPY(newBuffer, buffer, bufferSize); |
101 | 0 |
|
102 | 0 | if (buffer != defaultBuffer) { |
103 | 0 | DELETE_ARRAY(buffer); |
104 | 0 | } |
105 | 0 |
|
106 | 0 | buffer = newBuffer; |
107 | 0 | bufferSize += BUFFER_GROW; |
108 | 0 | } |
109 | 0 |
|
110 | 0 | buffer[bufferIndex].ce = ce; |
111 | 0 | buffer[bufferIndex].low = ixLow; |
112 | 0 | buffer[bufferIndex].high = ixHigh; |
113 | 0 |
|
114 | 0 | bufferIndex += 1; |
115 | 0 | } |
116 | | |
117 | | const RCEI *RCEBuffer::get() |
118 | 0 | { |
119 | 0 | if (bufferIndex > 0) { |
120 | 0 | return &buffer[--bufferIndex]; |
121 | 0 | } |
122 | 0 | |
123 | 0 | return NULL; |
124 | 0 | } |
125 | | |
126 | | PCEBuffer::PCEBuffer() |
127 | 0 | { |
128 | 0 | buffer = defaultBuffer; |
129 | 0 | bufferIndex = 0; |
130 | 0 | bufferSize = UPRV_LENGTHOF(defaultBuffer); |
131 | 0 | } |
132 | | |
133 | | PCEBuffer::~PCEBuffer() |
134 | 0 | { |
135 | 0 | if (buffer != defaultBuffer) { |
136 | 0 | DELETE_ARRAY(buffer); |
137 | 0 | } |
138 | 0 | } |
139 | | |
140 | | void PCEBuffer::reset() |
141 | 0 | { |
142 | 0 | bufferIndex = 0; |
143 | 0 | } |
144 | | |
145 | | UBool PCEBuffer::isEmpty() const |
146 | 0 | { |
147 | 0 | return bufferIndex <= 0; |
148 | 0 | } |
149 | | |
150 | | void PCEBuffer::put(uint64_t ce, int32_t ixLow, int32_t ixHigh, UErrorCode &errorCode) |
151 | 0 | { |
152 | 0 | if (U_FAILURE(errorCode)) { |
153 | 0 | return; |
154 | 0 | } |
155 | 0 | if (bufferIndex >= bufferSize) { |
156 | 0 | PCEI *newBuffer = NEW_ARRAY(PCEI, bufferSize + BUFFER_GROW); |
157 | 0 | if (newBuffer == NULL) { |
158 | 0 | errorCode = U_MEMORY_ALLOCATION_ERROR; |
159 | 0 | return; |
160 | 0 | } |
161 | 0 | |
162 | 0 | ARRAY_COPY(newBuffer, buffer, bufferSize); |
163 | 0 |
|
164 | 0 | if (buffer != defaultBuffer) { |
165 | 0 | DELETE_ARRAY(buffer); |
166 | 0 | } |
167 | 0 |
|
168 | 0 | buffer = newBuffer; |
169 | 0 | bufferSize += BUFFER_GROW; |
170 | 0 | } |
171 | 0 |
|
172 | 0 | buffer[bufferIndex].ce = ce; |
173 | 0 | buffer[bufferIndex].low = ixLow; |
174 | 0 | buffer[bufferIndex].high = ixHigh; |
175 | 0 |
|
176 | 0 | bufferIndex += 1; |
177 | 0 | } |
178 | | |
179 | | const PCEI *PCEBuffer::get() |
180 | 0 | { |
181 | 0 | if (bufferIndex > 0) { |
182 | 0 | return &buffer[--bufferIndex]; |
183 | 0 | } |
184 | 0 | |
185 | 0 | return NULL; |
186 | 0 | } |
187 | | |
188 | 0 | UCollationPCE::UCollationPCE(UCollationElements *elems) { init(elems); } |
189 | | |
190 | 0 | UCollationPCE::UCollationPCE(CollationElementIterator *iter) { init(iter); } |
191 | | |
192 | 0 | void UCollationPCE::init(UCollationElements *elems) { |
193 | 0 | init(CollationElementIterator::fromUCollationElements(elems)); |
194 | 0 | } |
195 | | |
196 | | void UCollationPCE::init(CollationElementIterator *iter) |
197 | 0 | { |
198 | 0 | cei = iter; |
199 | 0 | init(*iter->rbc_); |
200 | 0 | } |
201 | | |
202 | | void UCollationPCE::init(const Collator &coll) |
203 | 0 | { |
204 | 0 | UErrorCode status = U_ZERO_ERROR; |
205 | 0 |
|
206 | 0 | strength = coll.getAttribute(UCOL_STRENGTH, status); |
207 | 0 | toShift = coll.getAttribute(UCOL_ALTERNATE_HANDLING, status) == UCOL_SHIFTED; |
208 | 0 | isShifted = FALSE; |
209 | 0 | variableTop = coll.getVariableTop(status); |
210 | 0 | } |
211 | | |
212 | | UCollationPCE::~UCollationPCE() |
213 | 0 | { |
214 | 0 | // nothing to do |
215 | 0 | } |
216 | | |
217 | | uint64_t UCollationPCE::processCE(uint32_t ce) |
218 | 0 | { |
219 | 0 | uint64_t primary = 0, secondary = 0, tertiary = 0, quaternary = 0; |
220 | 0 |
|
221 | 0 | // This is clean, but somewhat slow... |
222 | 0 | // We could apply the mask to ce and then |
223 | 0 | // just get all three orders... |
224 | 0 | switch(strength) { |
225 | 0 | default: |
226 | 0 | tertiary = ucol_tertiaryOrder(ce); |
227 | 0 | U_FALLTHROUGH; |
228 | 0 |
|
229 | 0 | case UCOL_SECONDARY: |
230 | 0 | secondary = ucol_secondaryOrder(ce); |
231 | 0 | U_FALLTHROUGH; |
232 | 0 |
|
233 | 0 | case UCOL_PRIMARY: |
234 | 0 | primary = ucol_primaryOrder(ce); |
235 | 0 | } |
236 | 0 |
|
237 | 0 | // **** This should probably handle continuations too. **** |
238 | 0 | // **** That means that we need 24 bits for the primary **** |
239 | 0 | // **** instead of the 16 that we're currently using. **** |
240 | 0 | // **** So we can lay out the 64 bits as: 24.12.12.16. **** |
241 | 0 | // **** Another complication with continuations is that **** |
242 | 0 | // **** the *second* CE is marked as a continuation, so **** |
243 | 0 | // **** we always have to peek ahead to know how long **** |
244 | 0 | // **** the primary is... **** |
245 | 0 | if ((toShift && variableTop > ce && primary != 0) |
246 | 0 | || (isShifted && primary == 0)) { |
247 | 0 |
|
248 | 0 | if (primary == 0) { |
249 | 0 | return UCOL_IGNORABLE; |
250 | 0 | } |
251 | 0 |
|
252 | 0 | if (strength >= UCOL_QUATERNARY) { |
253 | 0 | quaternary = primary; |
254 | 0 | } |
255 | 0 |
|
256 | 0 | primary = secondary = tertiary = 0; |
257 | 0 | isShifted = TRUE; |
258 | 0 | } else { |
259 | 0 | if (strength >= UCOL_QUATERNARY) { |
260 | 0 | quaternary = 0xFFFF; |
261 | 0 | } |
262 | 0 |
|
263 | 0 | isShifted = FALSE; |
264 | 0 | } |
265 | 0 |
|
266 | 0 | return primary << 48 | secondary << 32 | tertiary << 16 | quaternary; |
267 | 0 | } |
268 | | |
269 | | U_NAMESPACE_END |
270 | | |
271 | | /* public methods ---------------------------------------------------- */ |
272 | | |
273 | | U_CAPI UCollationElements* U_EXPORT2 |
274 | | ucol_openElements(const UCollator *coll, |
275 | | const UChar *text, |
276 | | int32_t textLength, |
277 | | UErrorCode *status) |
278 | 0 | { |
279 | 0 | if (U_FAILURE(*status)) { |
280 | 0 | return NULL; |
281 | 0 | } |
282 | 0 | if (coll == NULL || (text == NULL && textLength != 0)) { |
283 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
284 | 0 | return NULL; |
285 | 0 | } |
286 | 0 | const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); |
287 | 0 | if (rbc == NULL) { |
288 | 0 | *status = U_UNSUPPORTED_ERROR; // coll is a Collator but not a RuleBasedCollator |
289 | 0 | return NULL; |
290 | 0 | } |
291 | 0 | |
292 | 0 | UnicodeString s((UBool)(textLength < 0), text, textLength); |
293 | 0 | CollationElementIterator *cei = rbc->createCollationElementIterator(s); |
294 | 0 | if (cei == NULL) { |
295 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
296 | 0 | return NULL; |
297 | 0 | } |
298 | 0 | |
299 | 0 | return cei->toUCollationElements(); |
300 | 0 | } |
301 | | |
302 | | |
303 | | U_CAPI void U_EXPORT2 |
304 | | ucol_closeElements(UCollationElements *elems) |
305 | 0 | { |
306 | 0 | delete CollationElementIterator::fromUCollationElements(elems); |
307 | 0 | } |
308 | | |
309 | | U_CAPI void U_EXPORT2 |
310 | | ucol_reset(UCollationElements *elems) |
311 | 0 | { |
312 | 0 | CollationElementIterator::fromUCollationElements(elems)->reset(); |
313 | 0 | } |
314 | | |
315 | | U_CAPI int32_t U_EXPORT2 |
316 | | ucol_next(UCollationElements *elems, |
317 | | UErrorCode *status) |
318 | 0 | { |
319 | 0 | if (U_FAILURE(*status)) { |
320 | 0 | return UCOL_NULLORDER; |
321 | 0 | } |
322 | 0 |
|
323 | 0 | return CollationElementIterator::fromUCollationElements(elems)->next(*status); |
324 | 0 | } |
325 | | |
326 | | U_NAMESPACE_BEGIN |
327 | | |
328 | | int64_t |
329 | | UCollationPCE::nextProcessed( |
330 | | int32_t *ixLow, |
331 | | int32_t *ixHigh, |
332 | | UErrorCode *status) |
333 | 0 | { |
334 | 0 | int64_t result = UCOL_IGNORABLE; |
335 | 0 | uint32_t low = 0, high = 0; |
336 | 0 |
|
337 | 0 | if (U_FAILURE(*status)) { |
338 | 0 | return UCOL_PROCESSED_NULLORDER; |
339 | 0 | } |
340 | 0 |
|
341 | 0 | pceBuffer.reset(); |
342 | 0 |
|
343 | 0 | do { |
344 | 0 | low = cei->getOffset(); |
345 | 0 | int32_t ce = cei->next(*status); |
346 | 0 | high = cei->getOffset(); |
347 | 0 |
|
348 | 0 | if (ce == UCOL_NULLORDER) { |
349 | 0 | result = UCOL_PROCESSED_NULLORDER; |
350 | 0 | break; |
351 | 0 | } |
352 | 0 |
|
353 | 0 | result = processCE((uint32_t)ce); |
354 | 0 | } while (result == UCOL_IGNORABLE); |
355 | 0 |
|
356 | 0 | if (ixLow != NULL) { |
357 | 0 | *ixLow = low; |
358 | 0 | } |
359 | 0 |
|
360 | 0 | if (ixHigh != NULL) { |
361 | 0 | *ixHigh = high; |
362 | 0 | } |
363 | 0 |
|
364 | 0 | return result; |
365 | 0 | } |
366 | | |
367 | | U_NAMESPACE_END |
368 | | |
369 | | U_CAPI int32_t U_EXPORT2 |
370 | | ucol_previous(UCollationElements *elems, |
371 | | UErrorCode *status) |
372 | 0 | { |
373 | 0 | if(U_FAILURE(*status)) { |
374 | 0 | return UCOL_NULLORDER; |
375 | 0 | } |
376 | 0 | return CollationElementIterator::fromUCollationElements(elems)->previous(*status); |
377 | 0 | } |
378 | | |
379 | | U_NAMESPACE_BEGIN |
380 | | |
381 | | int64_t |
382 | | UCollationPCE::previousProcessed( |
383 | | int32_t *ixLow, |
384 | | int32_t *ixHigh, |
385 | | UErrorCode *status) |
386 | 0 | { |
387 | 0 | int64_t result = UCOL_IGNORABLE; |
388 | 0 | int32_t low = 0, high = 0; |
389 | 0 |
|
390 | 0 | if (U_FAILURE(*status)) { |
391 | 0 | return UCOL_PROCESSED_NULLORDER; |
392 | 0 | } |
393 | 0 |
|
394 | 0 | // pceBuffer.reset(); |
395 | 0 |
|
396 | 0 | while (pceBuffer.isEmpty()) { |
397 | 0 | // buffer raw CEs up to non-ignorable primary |
398 | 0 | RCEBuffer rceb; |
399 | 0 | int32_t ce; |
400 | 0 | |
401 | 0 | // **** do we need to reset rceb, or will it always be empty at this point **** |
402 | 0 | do { |
403 | 0 | high = cei->getOffset(); |
404 | 0 | ce = cei->previous(*status); |
405 | 0 | low = cei->getOffset(); |
406 | 0 |
|
407 | 0 | if (ce == UCOL_NULLORDER) { |
408 | 0 | if (!rceb.isEmpty()) { |
409 | 0 | break; |
410 | 0 | } |
411 | 0 | |
412 | 0 | goto finish; |
413 | 0 | } |
414 | 0 | |
415 | 0 | rceb.put((uint32_t)ce, low, high, *status); |
416 | 0 | } while (U_SUCCESS(*status) && ((ce & UCOL_PRIMARYORDERMASK) == 0 || isContinuation(ce))); |
417 | 0 |
|
418 | 0 | // process the raw CEs |
419 | 0 | while (U_SUCCESS(*status) && !rceb.isEmpty()) { |
420 | 0 | const RCEI *rcei = rceb.get(); |
421 | 0 |
|
422 | 0 | result = processCE(rcei->ce); |
423 | 0 |
|
424 | 0 | if (result != UCOL_IGNORABLE) { |
425 | 0 | pceBuffer.put(result, rcei->low, rcei->high, *status); |
426 | 0 | } |
427 | 0 | } |
428 | 0 | if (U_FAILURE(*status)) { |
429 | 0 | return UCOL_PROCESSED_NULLORDER; |
430 | 0 | } |
431 | 0 | } |
432 | 0 |
|
433 | 0 | finish: |
434 | 0 | if (pceBuffer.isEmpty()) { |
435 | 0 | // **** Is -1 the right value for ixLow, ixHigh? **** |
436 | 0 | if (ixLow != NULL) { |
437 | 0 | *ixLow = -1; |
438 | 0 | } |
439 | 0 | |
440 | 0 | if (ixHigh != NULL) { |
441 | 0 | *ixHigh = -1 |
442 | 0 | ; |
443 | 0 | } |
444 | 0 | return UCOL_PROCESSED_NULLORDER; |
445 | 0 | } |
446 | 0 |
|
447 | 0 | const PCEI *pcei = pceBuffer.get(); |
448 | 0 |
|
449 | 0 | if (ixLow != NULL) { |
450 | 0 | *ixLow = pcei->low; |
451 | 0 | } |
452 | 0 |
|
453 | 0 | if (ixHigh != NULL) { |
454 | 0 | *ixHigh = pcei->high; |
455 | 0 | } |
456 | 0 |
|
457 | 0 | return pcei->ce; |
458 | 0 | } |
459 | | |
460 | | U_NAMESPACE_END |
461 | | |
462 | | U_CAPI int32_t U_EXPORT2 |
463 | | ucol_getMaxExpansion(const UCollationElements *elems, |
464 | | int32_t order) |
465 | 0 | { |
466 | 0 | return CollationElementIterator::fromUCollationElements(elems)->getMaxExpansion(order); |
467 | 0 |
|
468 | 0 | // TODO: The old code masked the order according to strength and then did a binary search. |
469 | 0 | // However this was probably at least partially broken because of the following comment. |
470 | 0 | // Still, it might have found a match when this version may not. |
471 | 0 |
|
472 | 0 | // FIXME: with a masked search, there might be more than one hit, |
473 | 0 | // so we need to look forward and backward from the match to find all |
474 | 0 | // of the hits... |
475 | 0 | } |
476 | | |
477 | | U_CAPI void U_EXPORT2 |
478 | | ucol_setText( UCollationElements *elems, |
479 | | const UChar *text, |
480 | | int32_t textLength, |
481 | | UErrorCode *status) |
482 | 0 | { |
483 | 0 | if (U_FAILURE(*status)) { |
484 | 0 | return; |
485 | 0 | } |
486 | 0 | |
487 | 0 | if ((text == NULL && textLength != 0)) { |
488 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
489 | 0 | return; |
490 | 0 | } |
491 | 0 | UnicodeString s((UBool)(textLength < 0), text, textLength); |
492 | 0 | return CollationElementIterator::fromUCollationElements(elems)->setText(s, *status); |
493 | 0 | } |
494 | | |
495 | | U_CAPI int32_t U_EXPORT2 |
496 | | ucol_getOffset(const UCollationElements *elems) |
497 | 0 | { |
498 | 0 | return CollationElementIterator::fromUCollationElements(elems)->getOffset(); |
499 | 0 | } |
500 | | |
501 | | U_CAPI void U_EXPORT2 |
502 | | ucol_setOffset(UCollationElements *elems, |
503 | | int32_t offset, |
504 | | UErrorCode *status) |
505 | 0 | { |
506 | 0 | if (U_FAILURE(*status)) { |
507 | 0 | return; |
508 | 0 | } |
509 | 0 | |
510 | 0 | CollationElementIterator::fromUCollationElements(elems)->setOffset(offset, *status); |
511 | 0 | } |
512 | | |
513 | | U_CAPI int32_t U_EXPORT2 |
514 | | ucol_primaryOrder (int32_t order) |
515 | 0 | { |
516 | 0 | return (order >> 16) & 0xffff; |
517 | 0 | } |
518 | | |
519 | | U_CAPI int32_t U_EXPORT2 |
520 | | ucol_secondaryOrder (int32_t order) |
521 | 0 | { |
522 | 0 | return (order >> 8) & 0xff; |
523 | 0 | } |
524 | | |
525 | | U_CAPI int32_t U_EXPORT2 |
526 | | ucol_tertiaryOrder (int32_t order) |
527 | 0 | { |
528 | 0 | return order & 0xff; |
529 | 0 | } |
530 | | |
531 | | #endif /* #if !UCONFIG_NO_COLLATION */ |