/src/icu/source/i18n/rulebasedcollator.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | * Copyright (C) 1996-2015, International Business Machines  | 
6  |  | * Corporation and others.  All Rights Reserved.  | 
7  |  | *******************************************************************************  | 
8  |  | * rulebasedcollator.cpp  | 
9  |  | *  | 
10  |  | * (replaced the former tblcoll.cpp)  | 
11  |  | *  | 
12  |  | * created on: 2012feb14 with new and old collation code  | 
13  |  | * created by: Markus W. Scherer  | 
14  |  | */  | 
15  |  |  | 
16  |  | #include "unicode/utypes.h"  | 
17  |  |  | 
18  |  | #if !UCONFIG_NO_COLLATION  | 
19  |  |  | 
20  |  | #include "unicode/coll.h"  | 
21  |  | #include "unicode/coleitr.h"  | 
22  |  | #include "unicode/localpointer.h"  | 
23  |  | #include "unicode/locid.h"  | 
24  |  | #include "unicode/sortkey.h"  | 
25  |  | #include "unicode/tblcoll.h"  | 
26  |  | #include "unicode/ucol.h"  | 
27  |  | #include "unicode/uiter.h"  | 
28  |  | #include "unicode/uloc.h"  | 
29  |  | #include "unicode/uniset.h"  | 
30  |  | #include "unicode/unistr.h"  | 
31  |  | #include "unicode/usetiter.h"  | 
32  |  | #include "unicode/utf8.h"  | 
33  |  | #include "unicode/uversion.h"  | 
34  |  | #include "bocsu.h"  | 
35  |  | #include "charstr.h"  | 
36  |  | #include "cmemory.h"  | 
37  |  | #include "collation.h"  | 
38  |  | #include "collationcompare.h"  | 
39  |  | #include "collationdata.h"  | 
40  |  | #include "collationdatareader.h"  | 
41  |  | #include "collationfastlatin.h"  | 
42  |  | #include "collationiterator.h"  | 
43  |  | #include "collationkeys.h"  | 
44  |  | #include "collationroot.h"  | 
45  |  | #include "collationsets.h"  | 
46  |  | #include "collationsettings.h"  | 
47  |  | #include "collationtailoring.h"  | 
48  |  | #include "cstring.h"  | 
49  |  | #include "uassert.h"  | 
50  |  | #include "ucol_imp.h"  | 
51  |  | #include "uhash.h"  | 
52  |  | #include "uitercollationiterator.h"  | 
53  |  | #include "ustr_imp.h"  | 
54  |  | #include "utf16collationiterator.h"  | 
55  |  | #include "utf8collationiterator.h"  | 
56  |  | #include "uvectr64.h"  | 
57  |  |  | 
58  |  | U_NAMESPACE_BEGIN  | 
59  |  |  | 
60  |  | namespace { | 
61  |  |  | 
62  |  | class FixedSortKeyByteSink : public SortKeyByteSink { | 
63  |  | public:  | 
64  |  |     FixedSortKeyByteSink(char *dest, int32_t destCapacity)  | 
65  | 0  |             : SortKeyByteSink(dest, destCapacity) {} | 
66  |  |     virtual ~FixedSortKeyByteSink();  | 
67  |  |  | 
68  |  | private:  | 
69  |  |     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);  | 
70  |  |     virtual UBool Resize(int32_t appendCapacity, int32_t length);  | 
71  |  | };  | 
72  |  |  | 
73  |  | FixedSortKeyByteSink::~FixedSortKeyByteSink() {} | 
74  |  |  | 
75  |  | void  | 
76  | 0  | FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) { | 
77  |  |     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_  | 
78  |  |     // Fill the buffer completely.  | 
79  | 0  |     int32_t available = capacity_ - length;  | 
80  | 0  |     if (available > 0) { | 
81  | 0  |         uprv_memcpy(buffer_ + length, bytes, available);  | 
82  | 0  |     }  | 
83  | 0  | }  | 
84  |  |  | 
85  |  | UBool  | 
86  | 0  | FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) { | 
87  | 0  |     return FALSE;  | 
88  | 0  | }  | 
89  |  |  | 
90  |  | }  // namespace  | 
91  |  |  | 
92  |  | // Not in an anonymous namespace, so that it can be a friend of CollationKey.  | 
93  |  | class CollationKeyByteSink : public SortKeyByteSink { | 
94  |  | public:  | 
95  |  |     CollationKeyByteSink(CollationKey &key)  | 
96  | 0  |             : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),  | 
97  | 0  |               key_(key) {} | 
98  |  |     virtual ~CollationKeyByteSink();  | 
99  |  |  | 
100  |  | private:  | 
101  |  |     virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);  | 
102  |  |     virtual UBool Resize(int32_t appendCapacity, int32_t length);  | 
103  |  |  | 
104  |  |     CollationKey &key_;  | 
105  |  | };  | 
106  |  |  | 
107  | 0  | CollationKeyByteSink::~CollationKeyByteSink() {} | 
108  |  |  | 
109  |  | void  | 
110  | 0  | CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) { | 
111  |  |     // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_  | 
112  | 0  |     if (Resize(n, length)) { | 
113  | 0  |         uprv_memcpy(buffer_ + length, bytes, n);  | 
114  | 0  |     }  | 
115  | 0  | }  | 
116  |  |  | 
117  |  | UBool  | 
118  | 0  | CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) { | 
119  | 0  |     if (buffer_ == NULL) { | 
120  | 0  |         return FALSE;  // allocation failed before already  | 
121  | 0  |     }  | 
122  | 0  |     int32_t newCapacity = 2 * capacity_;  | 
123  | 0  |     int32_t altCapacity = length + 2 * appendCapacity;  | 
124  | 0  |     if (newCapacity < altCapacity) { | 
125  | 0  |         newCapacity = altCapacity;  | 
126  | 0  |     }  | 
127  | 0  |     if (newCapacity < 200) { | 
128  | 0  |         newCapacity = 200;  | 
129  | 0  |     }  | 
130  | 0  |     uint8_t *newBuffer = key_.reallocate(newCapacity, length);  | 
131  | 0  |     if (newBuffer == NULL) { | 
132  | 0  |         SetNotOk();  | 
133  | 0  |         return FALSE;  | 
134  | 0  |     }  | 
135  | 0  |     buffer_ = reinterpret_cast<char *>(newBuffer);  | 
136  | 0  |     capacity_ = newCapacity;  | 
137  | 0  |     return TRUE;  | 
138  | 0  | }  | 
139  |  |  | 
140  |  | RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)  | 
141  | 0  |         : Collator(other),  | 
142  | 0  |           data(other.data),  | 
143  | 0  |           settings(other.settings),  | 
144  | 0  |           tailoring(other.tailoring),  | 
145  | 0  |           cacheEntry(other.cacheEntry),  | 
146  | 0  |           validLocale(other.validLocale),  | 
147  | 0  |           explicitlySetAttributes(other.explicitlySetAttributes),  | 
148  | 0  |           actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) { | 
149  | 0  |     settings->addRef();  | 
150  | 0  |     cacheEntry->addRef();  | 
151  | 0  | }  | 
152  |  |  | 
153  |  | RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,  | 
154  |  |                                      const RuleBasedCollator *base, UErrorCode &errorCode)  | 
155  |  |         : data(NULL),  | 
156  |  |           settings(NULL),  | 
157  |  |           tailoring(NULL),  | 
158  |  |           cacheEntry(NULL),  | 
159  | 0  |           validLocale(""), | 
160  | 0  |           explicitlySetAttributes(0),  | 
161  | 0  |           actualLocaleIsSameAsValid(FALSE) { | 
162  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
163  | 0  |     if(bin == NULL || length == 0 || base == NULL) { | 
164  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
165  | 0  |         return;  | 
166  | 0  |     }  | 
167  | 0  |     const CollationTailoring *root = CollationRoot::getRoot(errorCode);  | 
168  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
169  | 0  |     if(base->tailoring != root) { | 
170  | 0  |         errorCode = U_UNSUPPORTED_ERROR;  | 
171  | 0  |         return;  | 
172  | 0  |     }  | 
173  | 0  |     LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));  | 
174  | 0  |     if(t.isNull() || t->isBogus()) { | 
175  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
176  | 0  |         return;  | 
177  | 0  |     }  | 
178  | 0  |     CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);  | 
179  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
180  | 0  |     t->actualLocale.setToBogus();  | 
181  | 0  |     adoptTailoring(t.orphan(), errorCode);  | 
182  | 0  | }  | 
183  |  |  | 
184  |  | RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)  | 
185  | 0  |         : data(entry->tailoring->data),  | 
186  | 0  |           settings(entry->tailoring->settings),  | 
187  | 0  |           tailoring(entry->tailoring),  | 
188  | 0  |           cacheEntry(entry),  | 
189  | 0  |           validLocale(entry->validLocale),  | 
190  | 0  |           explicitlySetAttributes(0),  | 
191  | 0  |           actualLocaleIsSameAsValid(FALSE) { | 
192  | 0  |     settings->addRef();  | 
193  | 0  |     cacheEntry->addRef();  | 
194  | 0  | }  | 
195  |  |  | 
196  | 0  | RuleBasedCollator::~RuleBasedCollator() { | 
197  | 0  |     SharedObject::clearPtr(settings);  | 
198  | 0  |     SharedObject::clearPtr(cacheEntry);  | 
199  | 0  | }  | 
200  |  |  | 
201  |  | void  | 
202  | 0  | RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) { | 
203  | 0  |     if(U_FAILURE(errorCode)) { | 
204  | 0  |         t->deleteIfZeroRefCount();  | 
205  | 0  |         return;  | 
206  | 0  |     }  | 
207  | 0  |     U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);  | 
208  | 0  |     cacheEntry = new CollationCacheEntry(t->actualLocale, t);  | 
209  | 0  |     if(cacheEntry == NULL) { | 
210  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
211  | 0  |         t->deleteIfZeroRefCount();  | 
212  | 0  |         return;  | 
213  | 0  |     }  | 
214  | 0  |     data = t->data;  | 
215  | 0  |     settings = t->settings;  | 
216  | 0  |     settings->addRef();  | 
217  | 0  |     tailoring = t;  | 
218  | 0  |     cacheEntry->addRef();  | 
219  | 0  |     validLocale = t->actualLocale;  | 
220  | 0  |     actualLocaleIsSameAsValid = FALSE;  | 
221  | 0  | }  | 
222  |  |  | 
223  |  | RuleBasedCollator *  | 
224  | 0  | RuleBasedCollator::clone() const { | 
225  | 0  |     return new RuleBasedCollator(*this);  | 
226  | 0  | }  | 
227  |  |  | 
228  | 0  | RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) { | 
229  | 0  |     if(this == &other) { return *this; } | 
230  | 0  |     SharedObject::copyPtr(other.settings, settings);  | 
231  | 0  |     tailoring = other.tailoring;  | 
232  | 0  |     SharedObject::copyPtr(other.cacheEntry, cacheEntry);  | 
233  | 0  |     data = tailoring->data;  | 
234  | 0  |     validLocale = other.validLocale;  | 
235  | 0  |     explicitlySetAttributes = other.explicitlySetAttributes;  | 
236  | 0  |     actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;  | 
237  | 0  |     return *this;  | 
238  | 0  | }  | 
239  |  |  | 
240  |  | UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)  | 
241  |  |  | 
242  |  | bool  | 
243  | 0  | RuleBasedCollator::operator==(const Collator& other) const { | 
244  | 0  |     if(this == &other) { return TRUE; } | 
245  | 0  |     if(!Collator::operator==(other)) { return FALSE; } | 
246  | 0  |     const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);  | 
247  | 0  |     if(*settings != *o.settings) { return FALSE; } | 
248  | 0  |     if(data == o.data) { return TRUE; } | 
249  | 0  |     UBool thisIsRoot = data->base == NULL;  | 
250  | 0  |     UBool otherIsRoot = o.data->base == NULL;  | 
251  | 0  |     U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==  | 
252  | 0  |     if(thisIsRoot != otherIsRoot) { return FALSE; } | 
253  | 0  |     if((thisIsRoot || !tailoring->rules.isEmpty()) &&  | 
254  | 0  |             (otherIsRoot || !o.tailoring->rules.isEmpty())) { | 
255  |  |         // Shortcut: If both collators have valid rule strings, then compare those.  | 
256  | 0  |         if(tailoring->rules == o.tailoring->rules) { return TRUE; } | 
257  | 0  |     }  | 
258  |  |     // Different rule strings can result in the same or equivalent tailoring.  | 
259  |  |     // The rule strings are optional in ICU resource bundles, although included by default.  | 
260  |  |     // cloneBinary() drops the rule string.  | 
261  | 0  |     UErrorCode errorCode = U_ZERO_ERROR;  | 
262  | 0  |     LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));  | 
263  | 0  |     LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));  | 
264  | 0  |     if(U_FAILURE(errorCode)) { return FALSE; } | 
265  | 0  |     if(*thisTailored != *otherTailored) { return FALSE; } | 
266  |  |     // For completeness, we should compare all of the mappings;  | 
267  |  |     // or we should create a list of strings, sort it with one collator,  | 
268  |  |     // and check if both collators compare adjacent strings the same  | 
269  |  |     // (order & strength, down to quaternary); or similar.  | 
270  |  |     // Testing equality of collators seems unusual.  | 
271  | 0  |     return TRUE;  | 
272  | 0  | }  | 
273  |  |  | 
274  |  | int32_t  | 
275  | 0  | RuleBasedCollator::hashCode() const { | 
276  | 0  |     int32_t h = settings->hashCode();  | 
277  | 0  |     if(data->base == NULL) { return h; }  // root collator | 
278  |  |     // Do not rely on the rule string, see comments in operator==().  | 
279  | 0  |     UErrorCode errorCode = U_ZERO_ERROR;  | 
280  | 0  |     LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));  | 
281  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
282  | 0  |     UnicodeSetIterator iter(*set);  | 
283  | 0  |     while(iter.next() && !iter.isString()) { | 
284  | 0  |         h ^= data->getCE32(iter.getCodepoint());  | 
285  | 0  |     }  | 
286  | 0  |     return h;  | 
287  | 0  | }  | 
288  |  |  | 
289  |  | void  | 
290  |  | RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,  | 
291  | 0  |                               const Locale &actual) { | 
292  | 0  |     if(actual == tailoring->actualLocale) { | 
293  | 0  |         actualLocaleIsSameAsValid = FALSE;  | 
294  | 0  |     } else { | 
295  | 0  |         U_ASSERT(actual == valid);  | 
296  | 0  |         actualLocaleIsSameAsValid = TRUE;  | 
297  | 0  |     }  | 
298  |  |     // Do not modify tailoring.actualLocale:  | 
299  |  |     // We cannot be sure that that would be thread-safe.  | 
300  | 0  |     validLocale = valid;  | 
301  | 0  |     (void)requested;  // Ignore, see also ticket #10477.  | 
302  | 0  | }  | 
303  |  |  | 
304  |  | Locale  | 
305  | 0  | RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const { | 
306  | 0  |     if(U_FAILURE(errorCode)) { | 
307  | 0  |         return Locale::getRoot();  | 
308  | 0  |     }  | 
309  | 0  |     switch(type) { | 
310  | 0  |     case ULOC_ACTUAL_LOCALE:  | 
311  | 0  |         return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;  | 
312  | 0  |     case ULOC_VALID_LOCALE:  | 
313  | 0  |         return validLocale;  | 
314  | 0  |     case ULOC_REQUESTED_LOCALE:  | 
315  | 0  |     default:  | 
316  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
317  | 0  |         return Locale::getRoot();  | 
318  | 0  |     }  | 
319  | 0  | }  | 
320  |  |  | 
321  |  | const char *  | 
322  | 0  | RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const { | 
323  | 0  |     if(U_FAILURE(errorCode)) { | 
324  | 0  |         return NULL;  | 
325  | 0  |     }  | 
326  | 0  |     const Locale *result;  | 
327  | 0  |     switch(type) { | 
328  | 0  |     case ULOC_ACTUAL_LOCALE:  | 
329  | 0  |         result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;  | 
330  | 0  |         break;  | 
331  | 0  |     case ULOC_VALID_LOCALE:  | 
332  | 0  |         result = &validLocale;  | 
333  | 0  |         break;  | 
334  | 0  |     case ULOC_REQUESTED_LOCALE:  | 
335  | 0  |     default:  | 
336  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
337  | 0  |         return NULL;  | 
338  | 0  |     }  | 
339  | 0  |     if(result->isBogus()) { return NULL; } | 
340  | 0  |     const char *id = result->getName();  | 
341  | 0  |     return id[0] == 0 ? "root" : id;  | 
342  | 0  | }  | 
343  |  |  | 
344  |  | const UnicodeString&  | 
345  | 0  | RuleBasedCollator::getRules() const { | 
346  | 0  |     return tailoring->rules;  | 
347  | 0  | }  | 
348  |  |  | 
349  |  | void  | 
350  | 0  | RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const { | 
351  | 0  |     if(delta == UCOL_TAILORING_ONLY) { | 
352  | 0  |         buffer = tailoring->rules;  | 
353  | 0  |         return;  | 
354  | 0  |     }  | 
355  |  |     // UCOL_FULL_RULES  | 
356  | 0  |     buffer.remove();  | 
357  | 0  |     CollationLoader::appendRootRules(buffer);  | 
358  | 0  |     buffer.append(tailoring->rules).getTerminatedBuffer();  | 
359  | 0  | }  | 
360  |  |  | 
361  |  | void  | 
362  | 0  | RuleBasedCollator::getVersion(UVersionInfo version) const { | 
363  | 0  |     uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);  | 
364  | 0  |     version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);  | 
365  | 0  | }  | 
366  |  |  | 
367  |  | UnicodeSet *  | 
368  | 0  | RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const { | 
369  | 0  |     if(U_FAILURE(errorCode)) { return NULL; } | 
370  | 0  |     UnicodeSet *tailored = new UnicodeSet();  | 
371  | 0  |     if(tailored == NULL) { | 
372  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
373  | 0  |         return NULL;  | 
374  | 0  |     }  | 
375  | 0  |     if(data->base != NULL) { | 
376  | 0  |         TailoredSet(tailored).forData(data, errorCode);  | 
377  | 0  |         if(U_FAILURE(errorCode)) { | 
378  | 0  |             delete tailored;  | 
379  | 0  |             return NULL;  | 
380  | 0  |         }  | 
381  | 0  |     }  | 
382  | 0  |     return tailored;  | 
383  | 0  | }  | 
384  |  |  | 
385  |  | void  | 
386  |  | RuleBasedCollator::internalGetContractionsAndExpansions(  | 
387  |  |         UnicodeSet *contractions, UnicodeSet *expansions,  | 
388  | 0  |         UBool addPrefixes, UErrorCode &errorCode) const { | 
389  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
390  | 0  |     if(contractions != NULL) { | 
391  | 0  |         contractions->clear();  | 
392  | 0  |     }  | 
393  | 0  |     if(expansions != NULL) { | 
394  | 0  |         expansions->clear();  | 
395  | 0  |     }  | 
396  | 0  |     ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);  | 
397  | 0  | }  | 
398  |  |  | 
399  |  | void  | 
400  | 0  | RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const { | 
401  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
402  | 0  |     ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);  | 
403  | 0  | }  | 
404  |  |  | 
405  |  | const CollationSettings &  | 
406  | 0  | RuleBasedCollator::getDefaultSettings() const { | 
407  | 0  |     return *tailoring->settings;  | 
408  | 0  | }  | 
409  |  |  | 
410  |  | UColAttributeValue  | 
411  | 0  | RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const { | 
412  | 0  |     if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; } | 
413  | 0  |     int32_t option;  | 
414  | 0  |     switch(attr) { | 
415  | 0  |     case UCOL_FRENCH_COLLATION:  | 
416  | 0  |         option = CollationSettings::BACKWARD_SECONDARY;  | 
417  | 0  |         break;  | 
418  | 0  |     case UCOL_ALTERNATE_HANDLING:  | 
419  | 0  |         return settings->getAlternateHandling();  | 
420  | 0  |     case UCOL_CASE_FIRST:  | 
421  | 0  |         return settings->getCaseFirst();  | 
422  | 0  |     case UCOL_CASE_LEVEL:  | 
423  | 0  |         option = CollationSettings::CASE_LEVEL;  | 
424  | 0  |         break;  | 
425  | 0  |     case UCOL_NORMALIZATION_MODE:  | 
426  | 0  |         option = CollationSettings::CHECK_FCD;  | 
427  | 0  |         break;  | 
428  | 0  |     case UCOL_STRENGTH:  | 
429  | 0  |         return (UColAttributeValue)settings->getStrength();  | 
430  | 0  |     case UCOL_HIRAGANA_QUATERNARY_MODE:  | 
431  |  |         // Deprecated attribute, unsettable.  | 
432  | 0  |         return UCOL_OFF;  | 
433  | 0  |     case UCOL_NUMERIC_COLLATION:  | 
434  | 0  |         option = CollationSettings::NUMERIC;  | 
435  | 0  |         break;  | 
436  | 0  |     default:  | 
437  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
438  | 0  |         return UCOL_DEFAULT;  | 
439  | 0  |     }  | 
440  | 0  |     return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;  | 
441  | 0  | }  | 
442  |  |  | 
443  |  | void  | 
444  |  | RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,  | 
445  | 0  |                                 UErrorCode &errorCode) { | 
446  | 0  |     UColAttributeValue oldValue = getAttribute(attr, errorCode);  | 
447  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
448  | 0  |     if(value == oldValue) { | 
449  | 0  |         setAttributeExplicitly(attr);  | 
450  | 0  |         return;  | 
451  | 0  |     }  | 
452  | 0  |     const CollationSettings &defaultSettings = getDefaultSettings();  | 
453  | 0  |     if(settings == &defaultSettings) { | 
454  | 0  |         if(value == UCOL_DEFAULT) { | 
455  | 0  |             setAttributeDefault(attr);  | 
456  | 0  |             return;  | 
457  | 0  |         }  | 
458  | 0  |     }  | 
459  | 0  |     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);  | 
460  | 0  |     if(ownedSettings == NULL) { | 
461  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
462  | 0  |         return;  | 
463  | 0  |     }  | 
464  |  |  | 
465  | 0  |     switch(attr) { | 
466  | 0  |     case UCOL_FRENCH_COLLATION:  | 
467  | 0  |         ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,  | 
468  | 0  |                                defaultSettings.options, errorCode);  | 
469  | 0  |         break;  | 
470  | 0  |     case UCOL_ALTERNATE_HANDLING:  | 
471  | 0  |         ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);  | 
472  | 0  |         break;  | 
473  | 0  |     case UCOL_CASE_FIRST:  | 
474  | 0  |         ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);  | 
475  | 0  |         break;  | 
476  | 0  |     case UCOL_CASE_LEVEL:  | 
477  | 0  |         ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,  | 
478  | 0  |                                defaultSettings.options, errorCode);  | 
479  | 0  |         break;  | 
480  | 0  |     case UCOL_NORMALIZATION_MODE:  | 
481  | 0  |         ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,  | 
482  | 0  |                                defaultSettings.options, errorCode);  | 
483  | 0  |         break;  | 
484  | 0  |     case UCOL_STRENGTH:  | 
485  | 0  |         ownedSettings->setStrength(value, defaultSettings.options, errorCode);  | 
486  | 0  |         break;  | 
487  | 0  |     case UCOL_HIRAGANA_QUATERNARY_MODE:  | 
488  |  |         // Deprecated attribute. Check for valid values but do not change anything.  | 
489  | 0  |         if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) { | 
490  | 0  |             errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
491  | 0  |         }  | 
492  | 0  |         break;  | 
493  | 0  |     case UCOL_NUMERIC_COLLATION:  | 
494  | 0  |         ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);  | 
495  | 0  |         break;  | 
496  | 0  |     default:  | 
497  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
498  | 0  |         break;  | 
499  | 0  |     }  | 
500  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
501  | 0  |     setFastLatinOptions(*ownedSettings);  | 
502  | 0  |     if(value == UCOL_DEFAULT) { | 
503  | 0  |         setAttributeDefault(attr);  | 
504  | 0  |     } else { | 
505  | 0  |         setAttributeExplicitly(attr);  | 
506  | 0  |     }  | 
507  | 0  | }  | 
508  |  |  | 
509  |  | Collator &  | 
510  | 0  | RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) { | 
511  | 0  |     if(U_FAILURE(errorCode)) { return *this; } | 
512  |  |     // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.  | 
513  | 0  |     int32_t value;  | 
514  | 0  |     if(group == UCOL_REORDER_CODE_DEFAULT) { | 
515  | 0  |         value = UCOL_DEFAULT;  | 
516  | 0  |     } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) { | 
517  | 0  |         value = group - UCOL_REORDER_CODE_FIRST;  | 
518  | 0  |     } else { | 
519  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
520  | 0  |         return *this;  | 
521  | 0  |     }  | 
522  | 0  |     CollationSettings::MaxVariable oldValue = settings->getMaxVariable();  | 
523  | 0  |     if(value == oldValue) { | 
524  | 0  |         setAttributeExplicitly(ATTR_VARIABLE_TOP);  | 
525  | 0  |         return *this;  | 
526  | 0  |     }  | 
527  | 0  |     const CollationSettings &defaultSettings = getDefaultSettings();  | 
528  | 0  |     if(settings == &defaultSettings) { | 
529  | 0  |         if(value == UCOL_DEFAULT) { | 
530  | 0  |             setAttributeDefault(ATTR_VARIABLE_TOP);  | 
531  | 0  |             return *this;  | 
532  | 0  |         }  | 
533  | 0  |     }  | 
534  | 0  |     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);  | 
535  | 0  |     if(ownedSettings == NULL) { | 
536  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
537  | 0  |         return *this;  | 
538  | 0  |     }  | 
539  |  |  | 
540  | 0  |     if(group == UCOL_REORDER_CODE_DEFAULT) { | 
541  | 0  |         group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());  | 
542  | 0  |     }  | 
543  | 0  |     uint32_t varTop = data->getLastPrimaryForGroup(group);  | 
544  | 0  |     U_ASSERT(varTop != 0);  | 
545  | 0  |     ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);  | 
546  | 0  |     if(U_FAILURE(errorCode)) { return *this; } | 
547  | 0  |     ownedSettings->variableTop = varTop;  | 
548  | 0  |     setFastLatinOptions(*ownedSettings);  | 
549  | 0  |     if(value == UCOL_DEFAULT) { | 
550  | 0  |         setAttributeDefault(ATTR_VARIABLE_TOP);  | 
551  | 0  |     } else { | 
552  | 0  |         setAttributeExplicitly(ATTR_VARIABLE_TOP);  | 
553  | 0  |     }  | 
554  | 0  |     return *this;  | 
555  | 0  | }  | 
556  |  |  | 
557  |  | UColReorderCode  | 
558  | 0  | RuleBasedCollator::getMaxVariable() const { | 
559  | 0  |     return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());  | 
560  | 0  | }  | 
561  |  |  | 
562  |  | uint32_t  | 
563  | 0  | RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const { | 
564  | 0  |     return settings->variableTop;  | 
565  | 0  | }  | 
566  |  |  | 
567  |  | uint32_t  | 
568  | 0  | RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) { | 
569  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
570  | 0  |     if(varTop == NULL && len !=0) { | 
571  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
572  | 0  |         return 0;  | 
573  | 0  |     }  | 
574  | 0  |     if(len < 0) { len = u_strlen(varTop); } | 
575  | 0  |     if(len == 0) { | 
576  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
577  | 0  |         return 0;  | 
578  | 0  |     }  | 
579  | 0  |     UBool numeric = settings->isNumeric();  | 
580  | 0  |     int64_t ce1, ce2;  | 
581  | 0  |     if(settings->dontCheckFCD()) { | 
582  | 0  |         UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);  | 
583  | 0  |         ce1 = ci.nextCE(errorCode);  | 
584  | 0  |         ce2 = ci.nextCE(errorCode);  | 
585  | 0  |     } else { | 
586  | 0  |         FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);  | 
587  | 0  |         ce1 = ci.nextCE(errorCode);  | 
588  | 0  |         ce2 = ci.nextCE(errorCode);  | 
589  | 0  |     }  | 
590  | 0  |     if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) { | 
591  | 0  |         errorCode = U_CE_NOT_FOUND_ERROR;  | 
592  | 0  |         return 0;  | 
593  | 0  |     }  | 
594  | 0  |     setVariableTop((uint32_t)(ce1 >> 32), errorCode);  | 
595  | 0  |     return settings->variableTop;  | 
596  | 0  | }  | 
597  |  |  | 
598  |  | uint32_t  | 
599  | 0  | RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) { | 
600  | 0  |     return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);  | 
601  | 0  | }  | 
602  |  |  | 
603  |  | void  | 
604  | 0  | RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) { | 
605  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
606  | 0  |     if(varTop != settings->variableTop) { | 
607  |  |         // Pin the variable top to the end of the reordering group which contains it.  | 
608  |  |         // Only a few special groups are supported.  | 
609  | 0  |         int32_t group = data->getGroupForPrimary(varTop);  | 
610  | 0  |         if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) { | 
611  | 0  |             errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
612  | 0  |             return;  | 
613  | 0  |         }  | 
614  | 0  |         uint32_t v = data->getLastPrimaryForGroup(group);  | 
615  | 0  |         U_ASSERT(v != 0 && v >= varTop);  | 
616  | 0  |         varTop = v;  | 
617  | 0  |         if(varTop != settings->variableTop) { | 
618  | 0  |             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);  | 
619  | 0  |             if(ownedSettings == NULL) { | 
620  | 0  |                 errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
621  | 0  |                 return;  | 
622  | 0  |             }  | 
623  | 0  |             ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,  | 
624  | 0  |                                           getDefaultSettings().options, errorCode);  | 
625  | 0  |             if(U_FAILURE(errorCode)) { return; } | 
626  | 0  |             ownedSettings->variableTop = varTop;  | 
627  | 0  |             setFastLatinOptions(*ownedSettings);  | 
628  | 0  |         }  | 
629  | 0  |     }  | 
630  | 0  |     if(varTop == getDefaultSettings().variableTop) { | 
631  | 0  |         setAttributeDefault(ATTR_VARIABLE_TOP);  | 
632  | 0  |     } else { | 
633  | 0  |         setAttributeExplicitly(ATTR_VARIABLE_TOP);  | 
634  | 0  |     }  | 
635  | 0  | }  | 
636  |  |  | 
637  |  | int32_t  | 
638  |  | RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,  | 
639  | 0  |                                    UErrorCode &errorCode) const { | 
640  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
641  | 0  |     if(capacity < 0 || (dest == NULL && capacity > 0)) { | 
642  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
643  | 0  |         return 0;  | 
644  | 0  |     }  | 
645  | 0  |     int32_t length = settings->reorderCodesLength;  | 
646  | 0  |     if(length == 0) { return 0; } | 
647  | 0  |     if(length > capacity) { | 
648  | 0  |         errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
649  | 0  |         return length;  | 
650  | 0  |     }  | 
651  | 0  |     uprv_memcpy(dest, settings->reorderCodes, length * 4);  | 
652  | 0  |     return length;  | 
653  | 0  | }  | 
654  |  |  | 
655  |  | void  | 
656  |  | RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,  | 
657  | 0  |                                    UErrorCode &errorCode) { | 
658  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
659  | 0  |     if(length < 0 || (reorderCodes == NULL && length > 0)) { | 
660  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
661  | 0  |         return;  | 
662  | 0  |     }  | 
663  | 0  |     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) { | 
664  | 0  |         length = 0;  | 
665  | 0  |     }  | 
666  | 0  |     if(length == settings->reorderCodesLength &&  | 
667  | 0  |             uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) { | 
668  | 0  |         return;  | 
669  | 0  |     }  | 
670  | 0  |     const CollationSettings &defaultSettings = getDefaultSettings();  | 
671  | 0  |     if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) { | 
672  | 0  |         if(settings != &defaultSettings) { | 
673  | 0  |             CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);  | 
674  | 0  |             if(ownedSettings == NULL) { | 
675  | 0  |                 errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
676  | 0  |                 return;  | 
677  | 0  |             }  | 
678  | 0  |             ownedSettings->copyReorderingFrom(defaultSettings, errorCode);  | 
679  | 0  |             setFastLatinOptions(*ownedSettings);  | 
680  | 0  |         }  | 
681  | 0  |         return;  | 
682  | 0  |     }  | 
683  | 0  |     CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);  | 
684  | 0  |     if(ownedSettings == NULL) { | 
685  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
686  | 0  |         return;  | 
687  | 0  |     }  | 
688  | 0  |     ownedSettings->setReordering(*data, reorderCodes, length, errorCode);  | 
689  | 0  |     setFastLatinOptions(*ownedSettings);  | 
690  | 0  | }  | 
691  |  |  | 
692  |  | void  | 
693  | 0  | RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const { | 
694  | 0  |     ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(  | 
695  | 0  |             data, ownedSettings,  | 
696  | 0  |             ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));  | 
697  | 0  | }  | 
698  |  |  | 
699  |  | UCollationResult  | 
700  |  | RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,  | 
701  | 0  |                            UErrorCode &errorCode) const { | 
702  | 0  |     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } | 
703  | 0  |     return doCompare(left.getBuffer(), left.length(),  | 
704  | 0  |                      right.getBuffer(), right.length(), errorCode);  | 
705  | 0  | }  | 
706  |  |  | 
707  |  | UCollationResult  | 
708  |  | RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,  | 
709  | 0  |                            int32_t length, UErrorCode &errorCode) const { | 
710  | 0  |     if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; } | 
711  | 0  |     if(length < 0) { | 
712  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
713  | 0  |         return UCOL_EQUAL;  | 
714  | 0  |     }  | 
715  | 0  |     int32_t leftLength = left.length();  | 
716  | 0  |     int32_t rightLength = right.length();  | 
717  | 0  |     if(leftLength > length) { leftLength = length; } | 
718  | 0  |     if(rightLength > length) { rightLength = length; } | 
719  | 0  |     return doCompare(left.getBuffer(), leftLength,  | 
720  | 0  |                      right.getBuffer(), rightLength, errorCode);  | 
721  | 0  | }  | 
722  |  |  | 
723  |  | UCollationResult  | 
724  |  | RuleBasedCollator::compare(const UChar *left, int32_t leftLength,  | 
725  |  |                            const UChar *right, int32_t rightLength,  | 
726  | 0  |                            UErrorCode &errorCode) const { | 
727  | 0  |     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } | 
728  | 0  |     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) { | 
729  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
730  | 0  |         return UCOL_EQUAL;  | 
731  | 0  |     }  | 
732  |  |     // Make sure both or neither strings have a known length.  | 
733  |  |     // We do not optimize for mixed length/termination.  | 
734  | 0  |     if(leftLength >= 0) { | 
735  | 0  |         if(rightLength < 0) { rightLength = u_strlen(right); } | 
736  | 0  |     } else { | 
737  | 0  |         if(rightLength >= 0) { leftLength = u_strlen(left); } | 
738  | 0  |     }  | 
739  | 0  |     return doCompare(left, leftLength, right, rightLength, errorCode);  | 
740  | 0  | }  | 
741  |  |  | 
742  |  | UCollationResult  | 
743  |  | RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,  | 
744  | 0  |                                UErrorCode &errorCode) const { | 
745  | 0  |     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } | 
746  | 0  |     const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());  | 
747  | 0  |     const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());  | 
748  | 0  |     if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) { | 
749  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
750  | 0  |         return UCOL_EQUAL;  | 
751  | 0  |     }  | 
752  | 0  |     return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);  | 
753  | 0  | }  | 
754  |  |  | 
755  |  | UCollationResult  | 
756  |  | RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,  | 
757  |  |                                        const char *right, int32_t rightLength,  | 
758  | 0  |                                        UErrorCode &errorCode) const { | 
759  | 0  |     if(U_FAILURE(errorCode)) { return UCOL_EQUAL; } | 
760  | 0  |     if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) { | 
761  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
762  | 0  |         return UCOL_EQUAL;  | 
763  | 0  |     }  | 
764  |  |     // Make sure both or neither strings have a known length.  | 
765  |  |     // We do not optimize for mixed length/termination.  | 
766  | 0  |     if(leftLength >= 0) { | 
767  | 0  |         if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right)); } | 
768  | 0  |     } else { | 
769  | 0  |         if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left)); } | 
770  | 0  |     }  | 
771  | 0  |     return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,  | 
772  | 0  |                      reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);  | 
773  | 0  | }  | 
774  |  |  | 
775  |  | namespace { | 
776  |  |  | 
777  |  | /**  | 
778  |  |  * Abstract iterator for identical-level string comparisons.  | 
779  |  |  * Returns FCD code points and handles temporary switching to NFD.  | 
780  |  |  */  | 
781  |  | class NFDIterator : public UObject { | 
782  |  | public:  | 
783  | 0  |     NFDIterator() : index(-1), length(0) {} | 
784  | 0  |     virtual ~NFDIterator() {} | 
785  |  |     /**  | 
786  |  |      * Returns the next code point from the internal normalization buffer,  | 
787  |  |      * or else the next text code point.  | 
788  |  |      * Returns -1 at the end of the text.  | 
789  |  |      */  | 
790  | 0  |     UChar32 nextCodePoint() { | 
791  | 0  |         if(index >= 0) { | 
792  | 0  |             if(index == length) { | 
793  | 0  |                 index = -1;  | 
794  | 0  |             } else { | 
795  | 0  |                 UChar32 c;  | 
796  | 0  |                 U16_NEXT_UNSAFE(decomp, index, c);  | 
797  | 0  |                 return c;  | 
798  | 0  |             }  | 
799  | 0  |         }  | 
800  | 0  |         return nextRawCodePoint();  | 
801  | 0  |     }  | 
802  |  |     /**  | 
803  |  |      * @param nfcImpl  | 
804  |  |      * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()  | 
805  |  |      * @return the first code point in c's decomposition,  | 
806  |  |      *         or c itself if it was decomposed already or if it does not decompose  | 
807  |  |      */  | 
808  | 0  |     UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) { | 
809  | 0  |         if(index >= 0) { return c; } | 
810  | 0  |         decomp = nfcImpl.getDecomposition(c, buffer, length);  | 
811  | 0  |         if(decomp == NULL) { return c; } | 
812  | 0  |         index = 0;  | 
813  | 0  |         U16_NEXT_UNSAFE(decomp, index, c);  | 
814  | 0  |         return c;  | 
815  | 0  |     }  | 
816  |  | protected:  | 
817  |  |     /**  | 
818  |  |      * Returns the next text code point in FCD order.  | 
819  |  |      * Returns -1 at the end of the text.  | 
820  |  |      */  | 
821  |  |     virtual UChar32 nextRawCodePoint() = 0;  | 
822  |  | private:  | 
823  |  |     const UChar *decomp;  | 
824  |  |     UChar buffer[4];  | 
825  |  |     int32_t index;  | 
826  |  |     int32_t length;  | 
827  |  | };  | 
828  |  |  | 
829  |  | class UTF16NFDIterator : public NFDIterator { | 
830  |  | public:  | 
831  | 0  |     UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {} | 
832  |  | protected:  | 
833  | 0  |     virtual UChar32 nextRawCodePoint() { | 
834  | 0  |         if(s == limit) { return U_SENTINEL; } | 
835  | 0  |         UChar32 c = *s++;  | 
836  | 0  |         if(limit == NULL && c == 0) { | 
837  | 0  |             s = NULL;  | 
838  | 0  |             return U_SENTINEL;  | 
839  | 0  |         }  | 
840  | 0  |         UChar trail;  | 
841  | 0  |         if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) { | 
842  | 0  |             ++s;  | 
843  | 0  |             c = U16_GET_SUPPLEMENTARY(c, trail);  | 
844  | 0  |         }  | 
845  | 0  |         return c;  | 
846  | 0  |     }  | 
847  |  |  | 
848  |  |     const UChar *s;  | 
849  |  |     const UChar *limit;  | 
850  |  | };  | 
851  |  |  | 
852  |  | class FCDUTF16NFDIterator : public UTF16NFDIterator { | 
853  |  | public:  | 
854  |  |     FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)  | 
855  | 0  |             : UTF16NFDIterator(NULL, NULL) { | 
856  | 0  |         UErrorCode errorCode = U_ZERO_ERROR;  | 
857  | 0  |         const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);  | 
858  | 0  |         if(U_FAILURE(errorCode)) { return; } | 
859  | 0  |         if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) { | 
860  | 0  |             s = text;  | 
861  | 0  |             limit = spanLimit;  | 
862  | 0  |         } else { | 
863  | 0  |             str.setTo(text, (int32_t)(spanLimit - text));  | 
864  | 0  |             { | 
865  | 0  |                 ReorderingBuffer r_buffer(nfcImpl, str);  | 
866  | 0  |                 if(r_buffer.init(str.length(), errorCode)) { | 
867  | 0  |                     nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);  | 
868  | 0  |                 }  | 
869  | 0  |             }  | 
870  | 0  |             if(U_SUCCESS(errorCode)) { | 
871  | 0  |                 s = str.getBuffer();  | 
872  | 0  |                 limit = s + str.length();  | 
873  | 0  |             }  | 
874  | 0  |         }  | 
875  | 0  |     }  | 
876  |  | private:  | 
877  |  |     UnicodeString str;  | 
878  |  | };  | 
879  |  |  | 
880  |  | class UTF8NFDIterator : public NFDIterator { | 
881  |  | public:  | 
882  |  |     UTF8NFDIterator(const uint8_t *text, int32_t textLength)  | 
883  | 0  |         : s(text), pos(0), length(textLength) {} | 
884  |  | protected:  | 
885  | 0  |     virtual UChar32 nextRawCodePoint() { | 
886  | 0  |         if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; } | 
887  | 0  |         UChar32 c;  | 
888  | 0  |         U8_NEXT_OR_FFFD(s, pos, length, c);  | 
889  | 0  |         return c;  | 
890  | 0  |     }  | 
891  |  |  | 
892  |  |     const uint8_t *s;  | 
893  |  |     int32_t pos;  | 
894  |  |     int32_t length;  | 
895  |  | };  | 
896  |  |  | 
897  |  | class FCDUTF8NFDIterator : public NFDIterator { | 
898  |  | public:  | 
899  |  |     FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)  | 
900  | 0  |             : u8ci(data, FALSE, text, 0, textLength) {} | 
901  |  | protected:  | 
902  | 0  |     virtual UChar32 nextRawCodePoint() { | 
903  | 0  |         UErrorCode errorCode = U_ZERO_ERROR;  | 
904  | 0  |         return u8ci.nextCodePoint(errorCode);  | 
905  | 0  |     }  | 
906  |  | private:  | 
907  |  |     FCDUTF8CollationIterator u8ci;  | 
908  |  | };  | 
909  |  |  | 
910  |  | class UIterNFDIterator : public NFDIterator { | 
911  |  | public:  | 
912  | 0  |     UIterNFDIterator(UCharIterator &it) : iter(it) {} | 
913  |  | protected:  | 
914  | 0  |     virtual UChar32 nextRawCodePoint() { | 
915  | 0  |         return uiter_next32(&iter);  | 
916  | 0  |     }  | 
917  |  | private:  | 
918  |  |     UCharIterator &iter;  | 
919  |  | };  | 
920  |  |  | 
921  |  | class FCDUIterNFDIterator : public NFDIterator { | 
922  |  | public:  | 
923  |  |     FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)  | 
924  | 0  |             : uici(data, FALSE, it, startIndex) {} | 
925  |  | protected:  | 
926  | 0  |     virtual UChar32 nextRawCodePoint() { | 
927  | 0  |         UErrorCode errorCode = U_ZERO_ERROR;  | 
928  | 0  |         return uici.nextCodePoint(errorCode);  | 
929  | 0  |     }  | 
930  |  | private:  | 
931  |  |     FCDUIterCollationIterator uici;  | 
932  |  | };  | 
933  |  |  | 
934  |  | UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,  | 
935  | 0  |                                 NFDIterator &left, NFDIterator &right) { | 
936  | 0  |     for(;;) { | 
937  |  |         // Fetch the next FCD code point from each string.  | 
938  | 0  |         UChar32 leftCp = left.nextCodePoint();  | 
939  | 0  |         UChar32 rightCp = right.nextCodePoint();  | 
940  | 0  |         if(leftCp == rightCp) { | 
941  | 0  |             if(leftCp < 0) { break; } | 
942  | 0  |             continue;  | 
943  | 0  |         }  | 
944  |  |         // If they are different, then decompose each and compare again.  | 
945  | 0  |         if(leftCp < 0) { | 
946  | 0  |             leftCp = -2;  // end of string  | 
947  | 0  |         } else if(leftCp == 0xfffe) { | 
948  | 0  |             leftCp = -1;  // U+FFFE: merge separator  | 
949  | 0  |         } else { | 
950  | 0  |             leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);  | 
951  | 0  |         }  | 
952  | 0  |         if(rightCp < 0) { | 
953  | 0  |             rightCp = -2;  // end of string  | 
954  | 0  |         } else if(rightCp == 0xfffe) { | 
955  | 0  |             rightCp = -1;  // U+FFFE: merge separator  | 
956  | 0  |         } else { | 
957  | 0  |             rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);  | 
958  | 0  |         }  | 
959  | 0  |         if(leftCp < rightCp) { return UCOL_LESS; } | 
960  | 0  |         if(leftCp > rightCp) { return UCOL_GREATER; } | 
961  | 0  |     }  | 
962  | 0  |     return UCOL_EQUAL;  | 
963  | 0  | }  | 
964  |  |  | 
965  |  | }  // namespace  | 
966  |  |  | 
967  |  | UCollationResult  | 
968  |  | RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,  | 
969  |  |                              const UChar *right, int32_t rightLength,  | 
970  | 0  |                              UErrorCode &errorCode) const { | 
971  |  |     // U_FAILURE(errorCode) checked by caller.  | 
972  | 0  |     if(left == right && leftLength == rightLength) { | 
973  | 0  |         return UCOL_EQUAL;  | 
974  | 0  |     }  | 
975  |  |  | 
976  |  |     // Identical-prefix test.  | 
977  | 0  |     const UChar *leftLimit;  | 
978  | 0  |     const UChar *rightLimit;  | 
979  | 0  |     int32_t equalPrefixLength = 0;  | 
980  | 0  |     if(leftLength < 0) { | 
981  | 0  |         leftLimit = NULL;  | 
982  | 0  |         rightLimit = NULL;  | 
983  | 0  |         UChar c;  | 
984  | 0  |         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) { | 
985  | 0  |             if(c == 0) { return UCOL_EQUAL; } | 
986  | 0  |             ++equalPrefixLength;  | 
987  | 0  |         }  | 
988  | 0  |     } else { | 
989  | 0  |         leftLimit = left + leftLength;  | 
990  | 0  |         rightLimit = right + rightLength;  | 
991  | 0  |         for(;;) { | 
992  | 0  |             if(equalPrefixLength == leftLength) { | 
993  | 0  |                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; } | 
994  | 0  |                 break;  | 
995  | 0  |             } else if(equalPrefixLength == rightLength ||  | 
996  | 0  |                       left[equalPrefixLength] != right[equalPrefixLength]) { | 
997  | 0  |                 break;  | 
998  | 0  |             }  | 
999  | 0  |             ++equalPrefixLength;  | 
1000  | 0  |         }  | 
1001  | 0  |     }  | 
1002  |  |  | 
1003  | 0  |     UBool numeric = settings->isNumeric();  | 
1004  | 0  |     if(equalPrefixLength > 0) { | 
1005  | 0  |         if((equalPrefixLength != leftLength &&  | 
1006  | 0  |                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||  | 
1007  | 0  |                 (equalPrefixLength != rightLength &&  | 
1008  | 0  |                     data->isUnsafeBackward(right[equalPrefixLength], numeric))) { | 
1009  |  |             // Identical prefix: Back up to the start of a contraction or reordering sequence.  | 
1010  | 0  |             while(--equalPrefixLength > 0 &&  | 
1011  | 0  |                     data->isUnsafeBackward(left[equalPrefixLength], numeric)) {} | 
1012  | 0  |         }  | 
1013  |  |         // Notes:  | 
1014  |  |         // - A longer string can compare equal to a prefix of it if only ignorables follow.  | 
1015  |  |         // - With a backward level, a longer string can compare less-than a prefix of it.  | 
1016  |  |  | 
1017  |  |         // Pass the actual start of each string into the CollationIterators,  | 
1018  |  |         // plus the equalPrefixLength position,  | 
1019  |  |         // so that prefix matches back into the equal prefix work.  | 
1020  | 0  |     }  | 
1021  |  | 
  | 
1022  | 0  |     int32_t result;  | 
1023  | 0  |     int32_t fastLatinOptions = settings->fastLatinOptions;  | 
1024  | 0  |     if(fastLatinOptions >= 0 &&  | 
1025  | 0  |             (equalPrefixLength == leftLength ||  | 
1026  | 0  |                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&  | 
1027  | 0  |             (equalPrefixLength == rightLength ||  | 
1028  | 0  |                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) { | 
1029  | 0  |         if(leftLength >= 0) { | 
1030  | 0  |             result = CollationFastLatin::compareUTF16(data->fastLatinTable,  | 
1031  | 0  |                                                       settings->fastLatinPrimaries,  | 
1032  | 0  |                                                       fastLatinOptions,  | 
1033  | 0  |                                                       left + equalPrefixLength,  | 
1034  | 0  |                                                       leftLength - equalPrefixLength,  | 
1035  | 0  |                                                       right + equalPrefixLength,  | 
1036  | 0  |                                                       rightLength - equalPrefixLength);  | 
1037  | 0  |         } else { | 
1038  | 0  |             result = CollationFastLatin::compareUTF16(data->fastLatinTable,  | 
1039  | 0  |                                                       settings->fastLatinPrimaries,  | 
1040  | 0  |                                                       fastLatinOptions,  | 
1041  | 0  |                                                       left + equalPrefixLength, -1,  | 
1042  | 0  |                                                       right + equalPrefixLength, -1);  | 
1043  | 0  |         }  | 
1044  | 0  |     } else { | 
1045  | 0  |         result = CollationFastLatin::BAIL_OUT_RESULT;  | 
1046  | 0  |     }  | 
1047  |  | 
  | 
1048  | 0  |     if(result == CollationFastLatin::BAIL_OUT_RESULT) { | 
1049  | 0  |         if(settings->dontCheckFCD()) { | 
1050  | 0  |             UTF16CollationIterator leftIter(data, numeric,  | 
1051  | 0  |                                             left, left + equalPrefixLength, leftLimit);  | 
1052  | 0  |             UTF16CollationIterator rightIter(data, numeric,  | 
1053  | 0  |                                             right, right + equalPrefixLength, rightLimit);  | 
1054  | 0  |             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);  | 
1055  | 0  |         } else { | 
1056  | 0  |             FCDUTF16CollationIterator leftIter(data, numeric,  | 
1057  | 0  |                                               left, left + equalPrefixLength, leftLimit);  | 
1058  | 0  |             FCDUTF16CollationIterator rightIter(data, numeric,  | 
1059  | 0  |                                                 right, right + equalPrefixLength, rightLimit);  | 
1060  | 0  |             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);  | 
1061  | 0  |         }  | 
1062  | 0  |     }  | 
1063  | 0  |     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) { | 
1064  | 0  |         return (UCollationResult)result;  | 
1065  | 0  |     }  | 
1066  |  |  | 
1067  |  |     // Note: If NUL-terminated, we could get the actual limits from the iterators now.  | 
1068  |  |     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,  | 
1069  |  |     // and the benefit seems unlikely to be measurable.  | 
1070  |  |  | 
1071  |  |     // Compare identical level.  | 
1072  | 0  |     const Normalizer2Impl &nfcImpl = data->nfcImpl;  | 
1073  | 0  |     left += equalPrefixLength;  | 
1074  | 0  |     right += equalPrefixLength;  | 
1075  | 0  |     if(settings->dontCheckFCD()) { | 
1076  | 0  |         UTF16NFDIterator leftIter(left, leftLimit);  | 
1077  | 0  |         UTF16NFDIterator rightIter(right, rightLimit);  | 
1078  | 0  |         return compareNFDIter(nfcImpl, leftIter, rightIter);  | 
1079  | 0  |     } else { | 
1080  | 0  |         FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);  | 
1081  | 0  |         FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);  | 
1082  | 0  |         return compareNFDIter(nfcImpl, leftIter, rightIter);  | 
1083  | 0  |     }  | 
1084  | 0  | }  | 
1085  |  |  | 
1086  |  | UCollationResult  | 
1087  |  | RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,  | 
1088  |  |                              const uint8_t *right, int32_t rightLength,  | 
1089  | 0  |                              UErrorCode &errorCode) const { | 
1090  |  |     // U_FAILURE(errorCode) checked by caller.  | 
1091  | 0  |     if(left == right && leftLength == rightLength) { | 
1092  | 0  |         return UCOL_EQUAL;  | 
1093  | 0  |     }  | 
1094  |  |  | 
1095  |  |     // Identical-prefix test.  | 
1096  | 0  |     int32_t equalPrefixLength = 0;  | 
1097  | 0  |     if(leftLength < 0) { | 
1098  | 0  |         uint8_t c;  | 
1099  | 0  |         while((c = left[equalPrefixLength]) == right[equalPrefixLength]) { | 
1100  | 0  |             if(c == 0) { return UCOL_EQUAL; } | 
1101  | 0  |             ++equalPrefixLength;  | 
1102  | 0  |         }  | 
1103  | 0  |     } else { | 
1104  | 0  |         for(;;) { | 
1105  | 0  |             if(equalPrefixLength == leftLength) { | 
1106  | 0  |                 if(equalPrefixLength == rightLength) { return UCOL_EQUAL; } | 
1107  | 0  |                 break;  | 
1108  | 0  |             } else if(equalPrefixLength == rightLength ||  | 
1109  | 0  |                       left[equalPrefixLength] != right[equalPrefixLength]) { | 
1110  | 0  |                 break;  | 
1111  | 0  |             }  | 
1112  | 0  |             ++equalPrefixLength;  | 
1113  | 0  |         }  | 
1114  | 0  |     }  | 
1115  |  |     // Back up to the start of a partially-equal code point.  | 
1116  | 0  |     if(equalPrefixLength > 0 &&  | 
1117  | 0  |             ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||  | 
1118  | 0  |             (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) { | 
1119  | 0  |         while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {} | 
1120  | 0  |     }  | 
1121  |  | 
  | 
1122  | 0  |     UBool numeric = settings->isNumeric();  | 
1123  | 0  |     if(equalPrefixLength > 0) { | 
1124  | 0  |         UBool unsafe = FALSE;  | 
1125  | 0  |         if(equalPrefixLength != leftLength) { | 
1126  | 0  |             int32_t i = equalPrefixLength;  | 
1127  | 0  |             UChar32 c;  | 
1128  | 0  |             U8_NEXT_OR_FFFD(left, i, leftLength, c);  | 
1129  | 0  |             unsafe = data->isUnsafeBackward(c, numeric);  | 
1130  | 0  |         }  | 
1131  | 0  |         if(!unsafe && equalPrefixLength != rightLength) { | 
1132  | 0  |             int32_t i = equalPrefixLength;  | 
1133  | 0  |             UChar32 c;  | 
1134  | 0  |             U8_NEXT_OR_FFFD(right, i, rightLength, c);  | 
1135  | 0  |             unsafe = data->isUnsafeBackward(c, numeric);  | 
1136  | 0  |         }  | 
1137  | 0  |         if(unsafe) { | 
1138  |  |             // Identical prefix: Back up to the start of a contraction or reordering sequence.  | 
1139  | 0  |             UChar32 c;  | 
1140  | 0  |             do { | 
1141  | 0  |                 U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);  | 
1142  | 0  |             } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));  | 
1143  | 0  |         }  | 
1144  |  |         // See the notes in the UTF-16 version.  | 
1145  |  |  | 
1146  |  |         // Pass the actual start of each string into the CollationIterators,  | 
1147  |  |         // plus the equalPrefixLength position,  | 
1148  |  |         // so that prefix matches back into the equal prefix work.  | 
1149  | 0  |     }  | 
1150  |  | 
  | 
1151  | 0  |     int32_t result;  | 
1152  | 0  |     int32_t fastLatinOptions = settings->fastLatinOptions;  | 
1153  | 0  |     if(fastLatinOptions >= 0 &&  | 
1154  | 0  |             (equalPrefixLength == leftLength ||  | 
1155  | 0  |                 left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&  | 
1156  | 0  |             (equalPrefixLength == rightLength ||  | 
1157  | 0  |                 right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) { | 
1158  | 0  |         if(leftLength >= 0) { | 
1159  | 0  |             result = CollationFastLatin::compareUTF8(data->fastLatinTable,  | 
1160  | 0  |                                                      settings->fastLatinPrimaries,  | 
1161  | 0  |                                                      fastLatinOptions,  | 
1162  | 0  |                                                      left + equalPrefixLength,  | 
1163  | 0  |                                                      leftLength - equalPrefixLength,  | 
1164  | 0  |                                                      right + equalPrefixLength,  | 
1165  | 0  |                                                      rightLength - equalPrefixLength);  | 
1166  | 0  |         } else { | 
1167  | 0  |             result = CollationFastLatin::compareUTF8(data->fastLatinTable,  | 
1168  | 0  |                                                      settings->fastLatinPrimaries,  | 
1169  | 0  |                                                      fastLatinOptions,  | 
1170  | 0  |                                                      left + equalPrefixLength, -1,  | 
1171  | 0  |                                                      right + equalPrefixLength, -1);  | 
1172  | 0  |         }  | 
1173  | 0  |     } else { | 
1174  | 0  |         result = CollationFastLatin::BAIL_OUT_RESULT;  | 
1175  | 0  |     }  | 
1176  |  | 
  | 
1177  | 0  |     if(result == CollationFastLatin::BAIL_OUT_RESULT) { | 
1178  | 0  |         if(settings->dontCheckFCD()) { | 
1179  | 0  |             UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);  | 
1180  | 0  |             UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);  | 
1181  | 0  |             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);  | 
1182  | 0  |         } else { | 
1183  | 0  |             FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);  | 
1184  | 0  |             FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);  | 
1185  | 0  |             result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);  | 
1186  | 0  |         }  | 
1187  | 0  |     }  | 
1188  | 0  |     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) { | 
1189  | 0  |         return (UCollationResult)result;  | 
1190  | 0  |     }  | 
1191  |  |  | 
1192  |  |     // Note: If NUL-terminated, we could get the actual limits from the iterators now.  | 
1193  |  |     // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,  | 
1194  |  |     // and the benefit seems unlikely to be measurable.  | 
1195  |  |  | 
1196  |  |     // Compare identical level.  | 
1197  | 0  |     const Normalizer2Impl &nfcImpl = data->nfcImpl;  | 
1198  | 0  |     left += equalPrefixLength;  | 
1199  | 0  |     right += equalPrefixLength;  | 
1200  | 0  |     if(leftLength > 0) { | 
1201  | 0  |         leftLength -= equalPrefixLength;  | 
1202  | 0  |         rightLength -= equalPrefixLength;  | 
1203  | 0  |     }  | 
1204  | 0  |     if(settings->dontCheckFCD()) { | 
1205  | 0  |         UTF8NFDIterator leftIter(left, leftLength);  | 
1206  | 0  |         UTF8NFDIterator rightIter(right, rightLength);  | 
1207  | 0  |         return compareNFDIter(nfcImpl, leftIter, rightIter);  | 
1208  | 0  |     } else { | 
1209  | 0  |         FCDUTF8NFDIterator leftIter(data, left, leftLength);  | 
1210  | 0  |         FCDUTF8NFDIterator rightIter(data, right, rightLength);  | 
1211  | 0  |         return compareNFDIter(nfcImpl, leftIter, rightIter);  | 
1212  | 0  |     }  | 
1213  | 0  | }  | 
1214  |  |  | 
1215  |  | UCollationResult  | 
1216  |  | RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,  | 
1217  | 0  |                            UErrorCode &errorCode) const { | 
1218  | 0  |     if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; } | 
1219  | 0  |     UBool numeric = settings->isNumeric();  | 
1220  |  |  | 
1221  |  |     // Identical-prefix test.  | 
1222  | 0  |     int32_t equalPrefixLength = 0;  | 
1223  | 0  |     { | 
1224  | 0  |         UChar32 leftUnit;  | 
1225  | 0  |         UChar32 rightUnit;  | 
1226  | 0  |         while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) { | 
1227  | 0  |             if(leftUnit < 0) { return UCOL_EQUAL; } | 
1228  | 0  |             ++equalPrefixLength;  | 
1229  | 0  |         }  | 
1230  |  |  | 
1231  |  |         // Back out the code units that differed, for the real collation comparison.  | 
1232  | 0  |         if(leftUnit >= 0) { left.previous(&left); } | 
1233  | 0  |         if(rightUnit >= 0) { right.previous(&right); } | 
1234  |  | 
  | 
1235  | 0  |         if(equalPrefixLength > 0) { | 
1236  | 0  |             if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||  | 
1237  | 0  |                     (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) { | 
1238  |  |                 // Identical prefix: Back up to the start of a contraction or reordering sequence.  | 
1239  | 0  |                 do { | 
1240  | 0  |                     --equalPrefixLength;  | 
1241  | 0  |                     leftUnit = left.previous(&left);  | 
1242  | 0  |                     right.previous(&right);  | 
1243  | 0  |                 } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));  | 
1244  | 0  |             }  | 
1245  |  |             // See the notes in the UTF-16 version.  | 
1246  | 0  |         }  | 
1247  | 0  |     }  | 
1248  |  |  | 
1249  | 0  |     UCollationResult result;  | 
1250  | 0  |     if(settings->dontCheckFCD()) { | 
1251  | 0  |         UIterCollationIterator leftIter(data, numeric, left);  | 
1252  | 0  |         UIterCollationIterator rightIter(data, numeric, right);  | 
1253  | 0  |         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);  | 
1254  | 0  |     } else { | 
1255  | 0  |         FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);  | 
1256  | 0  |         FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);  | 
1257  | 0  |         result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);  | 
1258  | 0  |     }  | 
1259  | 0  |     if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) { | 
1260  | 0  |         return result;  | 
1261  | 0  |     }  | 
1262  |  |  | 
1263  |  |     // Compare identical level.  | 
1264  | 0  |     left.move(&left, equalPrefixLength, UITER_ZERO);  | 
1265  | 0  |     right.move(&right, equalPrefixLength, UITER_ZERO);  | 
1266  | 0  |     const Normalizer2Impl &nfcImpl = data->nfcImpl;  | 
1267  | 0  |     if(settings->dontCheckFCD()) { | 
1268  | 0  |         UIterNFDIterator leftIter(left);  | 
1269  | 0  |         UIterNFDIterator rightIter(right);  | 
1270  | 0  |         return compareNFDIter(nfcImpl, leftIter, rightIter);  | 
1271  | 0  |     } else { | 
1272  | 0  |         FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);  | 
1273  | 0  |         FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);  | 
1274  | 0  |         return compareNFDIter(nfcImpl, leftIter, rightIter);  | 
1275  | 0  |     }  | 
1276  | 0  | }  | 
1277  |  |  | 
1278  |  | CollationKey &  | 
1279  |  | RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,  | 
1280  | 0  |                                    UErrorCode &errorCode) const { | 
1281  | 0  |     return getCollationKey(s.getBuffer(), s.length(), key, errorCode);  | 
1282  | 0  | }  | 
1283  |  |  | 
1284  |  | CollationKey &  | 
1285  |  | RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,  | 
1286  | 0  |                                    UErrorCode &errorCode) const { | 
1287  | 0  |     if(U_FAILURE(errorCode)) { | 
1288  | 0  |         return key.setToBogus();  | 
1289  | 0  |     }  | 
1290  | 0  |     if(s == NULL && length != 0) { | 
1291  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
1292  | 0  |         return key.setToBogus();  | 
1293  | 0  |     }  | 
1294  | 0  |     key.reset();  // resets the "bogus" state  | 
1295  | 0  |     CollationKeyByteSink sink(key);  | 
1296  | 0  |     writeSortKey(s, length, sink, errorCode);  | 
1297  | 0  |     if(U_FAILURE(errorCode)) { | 
1298  | 0  |         key.setToBogus();  | 
1299  | 0  |     } else if(key.isBogus()) { | 
1300  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1301  | 0  |     } else { | 
1302  | 0  |         key.setLength(sink.NumberOfBytesAppended());  | 
1303  | 0  |     }  | 
1304  | 0  |     return key;  | 
1305  | 0  | }  | 
1306  |  |  | 
1307  |  | int32_t  | 
1308  |  | RuleBasedCollator::getSortKey(const UnicodeString &s,  | 
1309  | 0  |                               uint8_t *dest, int32_t capacity) const { | 
1310  | 0  |     return getSortKey(s.getBuffer(), s.length(), dest, capacity);  | 
1311  | 0  | }  | 
1312  |  |  | 
1313  |  | int32_t  | 
1314  |  | RuleBasedCollator::getSortKey(const UChar *s, int32_t length,  | 
1315  | 0  |                               uint8_t *dest, int32_t capacity) const { | 
1316  | 0  |     if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) { | 
1317  | 0  |         return 0;  | 
1318  | 0  |     }  | 
1319  | 0  |     uint8_t noDest[1] = { 0 }; | 
1320  | 0  |     if(dest == NULL) { | 
1321  |  |         // Distinguish pure preflighting from an allocation error.  | 
1322  | 0  |         dest = noDest;  | 
1323  | 0  |         capacity = 0;  | 
1324  | 0  |     }  | 
1325  | 0  |     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);  | 
1326  | 0  |     UErrorCode errorCode = U_ZERO_ERROR;  | 
1327  | 0  |     writeSortKey(s, length, sink, errorCode);  | 
1328  | 0  |     return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;  | 
1329  | 0  | }  | 
1330  |  |  | 
1331  |  | void  | 
1332  |  | RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,  | 
1333  | 0  |                                 SortKeyByteSink &sink, UErrorCode &errorCode) const { | 
1334  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
1335  | 0  |     const UChar *limit = (length >= 0) ? s + length : NULL;  | 
1336  | 0  |     UBool numeric = settings->isNumeric();  | 
1337  | 0  |     CollationKeys::LevelCallback callback;  | 
1338  | 0  |     if(settings->dontCheckFCD()) { | 
1339  | 0  |         UTF16CollationIterator iter(data, numeric, s, s, limit);  | 
1340  | 0  |         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,  | 
1341  | 0  |                                                   sink, Collation::PRIMARY_LEVEL,  | 
1342  | 0  |                                                   callback, TRUE, errorCode);  | 
1343  | 0  |     } else { | 
1344  | 0  |         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);  | 
1345  | 0  |         CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,  | 
1346  | 0  |                                                   sink, Collation::PRIMARY_LEVEL,  | 
1347  | 0  |                                                   callback, TRUE, errorCode);  | 
1348  | 0  |     }  | 
1349  | 0  |     if(settings->getStrength() == UCOL_IDENTICAL) { | 
1350  | 0  |         writeIdenticalLevel(s, limit, sink, errorCode);  | 
1351  | 0  |     }  | 
1352  | 0  |     static const char terminator = 0;  // TERMINATOR_BYTE  | 
1353  | 0  |     sink.Append(&terminator, 1);  | 
1354  | 0  | }  | 
1355  |  |  | 
1356  |  | void  | 
1357  |  | RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,  | 
1358  | 0  |                                        SortKeyByteSink &sink, UErrorCode &errorCode) const { | 
1359  |  |     // NFD quick check  | 
1360  | 0  |     const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);  | 
1361  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
1362  | 0  |     sink.Append(Collation::LEVEL_SEPARATOR_BYTE);  | 
1363  | 0  |     UChar32 prev = 0;  | 
1364  | 0  |     if(nfdQCYesLimit != s) { | 
1365  | 0  |         prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);  | 
1366  | 0  |     }  | 
1367  |  |     // Is there non-NFD text?  | 
1368  | 0  |     int32_t destLengthEstimate;  | 
1369  | 0  |     if(limit != NULL) { | 
1370  | 0  |         if(nfdQCYesLimit == limit) { return; } | 
1371  | 0  |         destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);  | 
1372  | 0  |     } else { | 
1373  |  |         // s is NUL-terminated  | 
1374  | 0  |         if(*nfdQCYesLimit == 0) { return; } | 
1375  | 0  |         destLengthEstimate = -1;  | 
1376  | 0  |     }  | 
1377  | 0  |     UnicodeString nfd;  | 
1378  | 0  |     data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);  | 
1379  | 0  |     u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);  | 
1380  | 0  | }  | 
1381  |  |  | 
1382  |  | namespace { | 
1383  |  |  | 
1384  |  | /**  | 
1385  |  |  * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()  | 
1386  |  |  * with an instance of this callback class.  | 
1387  |  |  * When another level is about to be written, the callback  | 
1388  |  |  * records the level and the number of bytes that will be written until  | 
1389  |  |  * the sink (which is actually a FixedSortKeyByteSink) fills up.  | 
1390  |  |  *  | 
1391  |  |  * When internalNextSortKeyPart() is called again, it restarts with the last level  | 
1392  |  |  * and ignores as many bytes as were written previously for that level.  | 
1393  |  |  */  | 
1394  |  | class PartLevelCallback : public CollationKeys::LevelCallback { | 
1395  |  | public:  | 
1396  |  |     PartLevelCallback(const SortKeyByteSink &s)  | 
1397  | 0  |             : sink(s), level(Collation::PRIMARY_LEVEL) { | 
1398  | 0  |         levelCapacity = sink.GetRemainingCapacity();  | 
1399  | 0  |     }  | 
1400  | 0  |     virtual ~PartLevelCallback() {} | 
1401  | 0  |     virtual UBool needToWrite(Collation::Level l) { | 
1402  | 0  |         if(!sink.Overflowed()) { | 
1403  |  |             // Remember a level that will be at least partially written.  | 
1404  | 0  |             level = l;  | 
1405  | 0  |             levelCapacity = sink.GetRemainingCapacity();  | 
1406  | 0  |             return TRUE;  | 
1407  | 0  |         } else { | 
1408  | 0  |             return FALSE;  | 
1409  | 0  |         }  | 
1410  | 0  |     }  | 
1411  | 0  |     Collation::Level getLevel() const { return level; } | 
1412  | 0  |     int32_t getLevelCapacity() const { return levelCapacity; } | 
1413  |  |  | 
1414  |  | private:  | 
1415  |  |     const SortKeyByteSink &sink;  | 
1416  |  |     Collation::Level level;  | 
1417  |  |     int32_t levelCapacity;  | 
1418  |  | };  | 
1419  |  |  | 
1420  |  | }  // namespace  | 
1421  |  |  | 
1422  |  | int32_t  | 
1423  |  | RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],  | 
1424  | 0  |                                            uint8_t *dest, int32_t count, UErrorCode &errorCode) const { | 
1425  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
1426  | 0  |     if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) { | 
1427  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
1428  | 0  |         return 0;  | 
1429  | 0  |     }  | 
1430  | 0  |     if(count == 0) { return 0; } | 
1431  |  |  | 
1432  | 0  |     FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);  | 
1433  | 0  |     sink.IgnoreBytes((int32_t)state[1]);  | 
1434  | 0  |     iter->move(iter, 0, UITER_START);  | 
1435  |  | 
  | 
1436  | 0  |     Collation::Level level = (Collation::Level)state[0];  | 
1437  | 0  |     if(level <= Collation::QUATERNARY_LEVEL) { | 
1438  | 0  |         UBool numeric = settings->isNumeric();  | 
1439  | 0  |         PartLevelCallback callback(sink);  | 
1440  | 0  |         if(settings->dontCheckFCD()) { | 
1441  | 0  |             UIterCollationIterator ci(data, numeric, *iter);  | 
1442  | 0  |             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,  | 
1443  | 0  |                                                       sink, level, callback, FALSE, errorCode);  | 
1444  | 0  |         } else { | 
1445  | 0  |             FCDUIterCollationIterator ci(data, numeric, *iter, 0);  | 
1446  | 0  |             CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,  | 
1447  | 0  |                                                       sink, level, callback, FALSE, errorCode);  | 
1448  | 0  |         }  | 
1449  | 0  |         if(U_FAILURE(errorCode)) { return 0; } | 
1450  | 0  |         if(sink.NumberOfBytesAppended() > count) { | 
1451  | 0  |             state[0] = (uint32_t)callback.getLevel();  | 
1452  | 0  |             state[1] = (uint32_t)callback.getLevelCapacity();  | 
1453  | 0  |             return count;  | 
1454  | 0  |         }  | 
1455  |  |         // All of the normal levels are done.  | 
1456  | 0  |         if(settings->getStrength() == UCOL_IDENTICAL) { | 
1457  | 0  |             level = Collation::IDENTICAL_LEVEL;  | 
1458  | 0  |             iter->move(iter, 0, UITER_START);  | 
1459  | 0  |         }  | 
1460  |  |         // else fall through to setting ZERO_LEVEL  | 
1461  | 0  |     }  | 
1462  |  |  | 
1463  | 0  |     if(level == Collation::IDENTICAL_LEVEL) { | 
1464  | 0  |         int32_t levelCapacity = sink.GetRemainingCapacity();  | 
1465  | 0  |         UnicodeString s;  | 
1466  | 0  |         for(;;) { | 
1467  | 0  |             UChar32 c = iter->next(iter);  | 
1468  | 0  |             if(c < 0) { break; } | 
1469  | 0  |             s.append((UChar)c);  | 
1470  | 0  |         }  | 
1471  | 0  |         const UChar *sArray = s.getBuffer();  | 
1472  | 0  |         writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);  | 
1473  | 0  |         if(U_FAILURE(errorCode)) { return 0; } | 
1474  | 0  |         if(sink.NumberOfBytesAppended() > count) { | 
1475  | 0  |             state[0] = (uint32_t)level;  | 
1476  | 0  |             state[1] = (uint32_t)levelCapacity;  | 
1477  | 0  |             return count;  | 
1478  | 0  |         }  | 
1479  | 0  |     }  | 
1480  |  |  | 
1481  |  |     // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.  | 
1482  | 0  |     state[0] = (uint32_t)Collation::ZERO_LEVEL;  | 
1483  | 0  |     state[1] = 0;  | 
1484  | 0  |     int32_t length = sink.NumberOfBytesAppended();  | 
1485  | 0  |     int32_t i = length;  | 
1486  | 0  |     while(i < count) { dest[i++] = 0; } | 
1487  | 0  |     return length;  | 
1488  | 0  | }  | 
1489  |  |  | 
1490  |  | void  | 
1491  |  | RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,  | 
1492  | 0  |                                   UErrorCode &errorCode) const { | 
1493  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
1494  | 0  |     const UChar *s = str.getBuffer();  | 
1495  | 0  |     const UChar *limit = s + str.length();  | 
1496  | 0  |     UBool numeric = settings->isNumeric();  | 
1497  | 0  |     if(settings->dontCheckFCD()) { | 
1498  | 0  |         UTF16CollationIterator iter(data, numeric, s, s, limit);  | 
1499  | 0  |         int64_t ce;  | 
1500  | 0  |         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) { | 
1501  | 0  |             ces.addElement(ce, errorCode);  | 
1502  | 0  |         }  | 
1503  | 0  |     } else { | 
1504  | 0  |         FCDUTF16CollationIterator iter(data, numeric, s, s, limit);  | 
1505  | 0  |         int64_t ce;  | 
1506  | 0  |         while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) { | 
1507  | 0  |             ces.addElement(ce, errorCode);  | 
1508  | 0  |         }  | 
1509  | 0  |     }  | 
1510  | 0  | }  | 
1511  |  |  | 
1512  |  | namespace { | 
1513  |  |  | 
1514  |  | void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,  | 
1515  | 0  |                   UErrorCode &errorCode) { | 
1516  | 0  |     if(U_FAILURE(errorCode) || length == 0) { return; } | 
1517  | 0  |     if(!s.isEmpty()) { | 
1518  | 0  |         s.append('_', errorCode); | 
1519  | 0  |     }  | 
1520  | 0  |     s.append(letter, errorCode);  | 
1521  | 0  |     for(int32_t i = 0; i < length; ++i) { | 
1522  | 0  |         s.append(uprv_toupper(subtag[i]), errorCode);  | 
1523  | 0  |     }  | 
1524  | 0  | }  | 
1525  |  |  | 
1526  |  | void appendAttribute(CharString &s, char letter, UColAttributeValue value,  | 
1527  | 0  |                      UErrorCode &errorCode) { | 
1528  | 0  |     if(U_FAILURE(errorCode)) { return; } | 
1529  | 0  |     if(!s.isEmpty()) { | 
1530  | 0  |         s.append('_', errorCode); | 
1531  | 0  |     }  | 
1532  | 0  |     static const char *valueChars = "1234...........IXO..SN..LU......";  | 
1533  | 0  |     s.append(letter, errorCode);  | 
1534  | 0  |     s.append(valueChars[value], errorCode);  | 
1535  | 0  | }  | 
1536  |  |  | 
1537  |  | }  // namespace  | 
1538  |  |  | 
1539  |  | int32_t  | 
1540  |  | RuleBasedCollator::internalGetShortDefinitionString(const char *locale,  | 
1541  |  |                                                     char *buffer, int32_t capacity,  | 
1542  | 0  |                                                     UErrorCode &errorCode) const { | 
1543  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
1544  | 0  |     if(buffer == NULL ? capacity != 0 : capacity < 0) { | 
1545  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
1546  | 0  |         return 0;  | 
1547  | 0  |     }  | 
1548  | 0  |     if(locale == NULL) { | 
1549  | 0  |         locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);  | 
1550  | 0  |     }  | 
1551  |  | 
  | 
1552  | 0  |     char resultLocale[ULOC_FULLNAME_CAPACITY + 1];  | 
1553  | 0  |     int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,  | 
1554  | 0  |                                                   "collation", locale,  | 
1555  | 0  |                                                   NULL, &errorCode);  | 
1556  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
1557  | 0  |     resultLocale[length] = 0;  | 
1558  |  |  | 
1559  |  |     // Append items in alphabetic order of their short definition letters.  | 
1560  | 0  |     CharString result;  | 
1561  | 0  |     char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];  | 
1562  |  | 
  | 
1563  | 0  |     if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) { | 
1564  | 0  |         appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);  | 
1565  | 0  |     }  | 
1566  |  |     // ATTR_VARIABLE_TOP not supported because 'B' was broken.  | 
1567  |  |     // See ICU tickets #10372 and #10386.  | 
1568  | 0  |     if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) { | 
1569  | 0  |         appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);  | 
1570  | 0  |     }  | 
1571  | 0  |     if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) { | 
1572  | 0  |         appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);  | 
1573  | 0  |     }  | 
1574  | 0  |     if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) { | 
1575  | 0  |         appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);  | 
1576  | 0  |     }  | 
1577  | 0  |     if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) { | 
1578  | 0  |         appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);  | 
1579  | 0  |     }  | 
1580  |  |     // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.  | 
1581  | 0  |     length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);  | 
1582  | 0  |     appendSubtag(result, 'K', subtag, length, errorCode);  | 
1583  | 0  |     length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);  | 
1584  | 0  |     if (length == 0) { | 
1585  | 0  |         appendSubtag(result, 'L', "root", 4, errorCode);  | 
1586  | 0  |     } else { | 
1587  | 0  |         appendSubtag(result, 'L', subtag, length, errorCode);  | 
1588  | 0  |     }  | 
1589  | 0  |     if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) { | 
1590  | 0  |         appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);  | 
1591  | 0  |     }  | 
1592  | 0  |     length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);  | 
1593  | 0  |     appendSubtag(result, 'R', subtag, length, errorCode);  | 
1594  | 0  |     if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) { | 
1595  | 0  |         appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);  | 
1596  | 0  |     }  | 
1597  | 0  |     length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);  | 
1598  | 0  |     appendSubtag(result, 'V', subtag, length, errorCode);  | 
1599  | 0  |     length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);  | 
1600  | 0  |     appendSubtag(result, 'Z', subtag, length, errorCode);  | 
1601  |  | 
  | 
1602  | 0  |     if(U_FAILURE(errorCode)) { return 0; } | 
1603  | 0  |     return result.extract(buffer, capacity, errorCode);  | 
1604  | 0  | }  | 
1605  |  |  | 
1606  |  | UBool  | 
1607  | 0  | RuleBasedCollator::isUnsafe(UChar32 c) const { | 
1608  | 0  |     return data->isUnsafeBackward(c, settings->isNumeric());  | 
1609  | 0  | }  | 
1610  |  |  | 
1611  |  | void U_CALLCONV  | 
1612  | 0  | RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) { | 
1613  | 0  |     t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);  | 
1614  | 0  | }  | 
1615  |  |  | 
1616  |  | UBool  | 
1617  | 0  | RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const { | 
1618  | 0  |     umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);  | 
1619  | 0  |     return U_SUCCESS(errorCode);  | 
1620  | 0  | }  | 
1621  |  |  | 
1622  |  | CollationElementIterator *  | 
1623  | 0  | RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const { | 
1624  | 0  |     UErrorCode errorCode = U_ZERO_ERROR;  | 
1625  | 0  |     if(!initMaxExpansions(errorCode)) { return NULL; } | 
1626  | 0  |     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);  | 
1627  | 0  |     if(U_FAILURE(errorCode)) { | 
1628  | 0  |         delete cei;  | 
1629  | 0  |         return NULL;  | 
1630  | 0  |     }  | 
1631  | 0  |     return cei;  | 
1632  | 0  | }  | 
1633  |  |  | 
1634  |  | CollationElementIterator *  | 
1635  | 0  | RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const { | 
1636  | 0  |     UErrorCode errorCode = U_ZERO_ERROR;  | 
1637  | 0  |     if(!initMaxExpansions(errorCode)) { return NULL; } | 
1638  | 0  |     CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);  | 
1639  | 0  |     if(U_FAILURE(errorCode)) { | 
1640  | 0  |         delete cei;  | 
1641  | 0  |         return NULL;  | 
1642  | 0  |     }  | 
1643  | 0  |     return cei;  | 
1644  | 0  | }  | 
1645  |  |  | 
1646  |  | int32_t  | 
1647  | 0  | RuleBasedCollator::getMaxExpansion(int32_t order) const { | 
1648  | 0  |     UErrorCode errorCode = U_ZERO_ERROR;  | 
1649  | 0  |     (void)initMaxExpansions(errorCode);  | 
1650  | 0  |     return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);  | 
1651  | 0  | }  | 
1652  |  |  | 
1653  |  | U_NAMESPACE_END  | 
1654  |  |  | 
1655  |  | #endif  // !UCONFIG_NO_COLLATION  |