Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/rulebasedcollator.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 1996-2015, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* rulebasedcollator.cpp
9
*
10
* (replaced the former tblcoll.cpp)
11
*
12
* created on: 2012feb14 with new and old collation code
13
* created by: Markus W. Scherer
14
*/
15
16
#include "unicode/utypes.h"
17
18
#if !UCONFIG_NO_COLLATION
19
20
#include "unicode/coll.h"
21
#include "unicode/coleitr.h"
22
#include "unicode/localpointer.h"
23
#include "unicode/locid.h"
24
#include "unicode/sortkey.h"
25
#include "unicode/tblcoll.h"
26
#include "unicode/ucol.h"
27
#include "unicode/uiter.h"
28
#include "unicode/uloc.h"
29
#include "unicode/uniset.h"
30
#include "unicode/unistr.h"
31
#include "unicode/usetiter.h"
32
#include "unicode/utf8.h"
33
#include "unicode/uversion.h"
34
#include "bocsu.h"
35
#include "charstr.h"
36
#include "cmemory.h"
37
#include "collation.h"
38
#include "collationcompare.h"
39
#include "collationdata.h"
40
#include "collationdatareader.h"
41
#include "collationfastlatin.h"
42
#include "collationiterator.h"
43
#include "collationkeys.h"
44
#include "collationroot.h"
45
#include "collationsets.h"
46
#include "collationsettings.h"
47
#include "collationtailoring.h"
48
#include "cstring.h"
49
#include "uassert.h"
50
#include "ucol_imp.h"
51
#include "uhash.h"
52
#include "uitercollationiterator.h"
53
#include "ustr_imp.h"
54
#include "utf16collationiterator.h"
55
#include "utf8collationiterator.h"
56
#include "uvectr64.h"
57
58
U_NAMESPACE_BEGIN
59
60
namespace {
61
62
class FixedSortKeyByteSink : public SortKeyByteSink {
63
public:
64
    FixedSortKeyByteSink(char *dest, int32_t destCapacity)
65
0
            : SortKeyByteSink(dest, destCapacity) {}
66
    virtual ~FixedSortKeyByteSink();
67
68
private:
69
    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
70
    virtual UBool Resize(int32_t appendCapacity, int32_t length);
71
};
72
73
FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
74
75
void
76
0
FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
77
    // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
78
    // Fill the buffer completely.
79
0
    int32_t available = capacity_ - length;
80
0
    if (available > 0) {
81
0
        uprv_memcpy(buffer_ + length, bytes, available);
82
0
    }
83
0
}
84
85
UBool
86
0
FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
87
0
    return FALSE;
88
0
}
89
90
}  // namespace
91
92
// Not in an anonymous namespace, so that it can be a friend of CollationKey.
93
class CollationKeyByteSink : public SortKeyByteSink {
94
public:
95
    CollationKeyByteSink(CollationKey &key)
96
0
            : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
97
0
              key_(key) {}
98
    virtual ~CollationKeyByteSink();
99
100
private:
101
    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length);
102
    virtual UBool Resize(int32_t appendCapacity, int32_t length);
103
104
    CollationKey &key_;
105
};
106
107
0
CollationKeyByteSink::~CollationKeyByteSink() {}
108
109
void
110
0
CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
111
    // buffer_ != NULL && bytes != NULL && n > 0 && appended_ > capacity_
112
0
    if (Resize(n, length)) {
113
0
        uprv_memcpy(buffer_ + length, bytes, n);
114
0
    }
115
0
}
116
117
UBool
118
0
CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
119
0
    if (buffer_ == NULL) {
120
0
        return FALSE;  // allocation failed before already
121
0
    }
122
0
    int32_t newCapacity = 2 * capacity_;
123
0
    int32_t altCapacity = length + 2 * appendCapacity;
124
0
    if (newCapacity < altCapacity) {
125
0
        newCapacity = altCapacity;
126
0
    }
127
0
    if (newCapacity < 200) {
128
0
        newCapacity = 200;
129
0
    }
130
0
    uint8_t *newBuffer = key_.reallocate(newCapacity, length);
131
0
    if (newBuffer == NULL) {
132
0
        SetNotOk();
133
0
        return FALSE;
134
0
    }
135
0
    buffer_ = reinterpret_cast<char *>(newBuffer);
136
0
    capacity_ = newCapacity;
137
0
    return TRUE;
138
0
}
139
140
RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
141
0
        : Collator(other),
142
0
          data(other.data),
143
0
          settings(other.settings),
144
0
          tailoring(other.tailoring),
145
0
          cacheEntry(other.cacheEntry),
146
0
          validLocale(other.validLocale),
147
0
          explicitlySetAttributes(other.explicitlySetAttributes),
148
0
          actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
149
0
    settings->addRef();
150
0
    cacheEntry->addRef();
151
0
}
152
153
RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
154
                                     const RuleBasedCollator *base, UErrorCode &errorCode)
155
        : data(NULL),
156
          settings(NULL),
157
          tailoring(NULL),
158
          cacheEntry(NULL),
159
0
          validLocale(""),
160
0
          explicitlySetAttributes(0),
161
0
          actualLocaleIsSameAsValid(FALSE) {
162
0
    if(U_FAILURE(errorCode)) { return; }
163
0
    if(bin == NULL || length == 0 || base == NULL) {
164
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
165
0
        return;
166
0
    }
167
0
    const CollationTailoring *root = CollationRoot::getRoot(errorCode);
168
0
    if(U_FAILURE(errorCode)) { return; }
169
0
    if(base->tailoring != root) {
170
0
        errorCode = U_UNSUPPORTED_ERROR;
171
0
        return;
172
0
    }
173
0
    LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
174
0
    if(t.isNull() || t->isBogus()) {
175
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
176
0
        return;
177
0
    }
178
0
    CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
179
0
    if(U_FAILURE(errorCode)) { return; }
180
0
    t->actualLocale.setToBogus();
181
0
    adoptTailoring(t.orphan(), errorCode);
182
0
}
183
184
RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
185
0
        : data(entry->tailoring->data),
186
0
          settings(entry->tailoring->settings),
187
0
          tailoring(entry->tailoring),
188
0
          cacheEntry(entry),
189
0
          validLocale(entry->validLocale),
190
0
          explicitlySetAttributes(0),
191
0
          actualLocaleIsSameAsValid(FALSE) {
192
0
    settings->addRef();
193
0
    cacheEntry->addRef();
194
0
}
195
196
0
RuleBasedCollator::~RuleBasedCollator() {
197
0
    SharedObject::clearPtr(settings);
198
0
    SharedObject::clearPtr(cacheEntry);
199
0
}
200
201
void
202
0
RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
203
0
    if(U_FAILURE(errorCode)) {
204
0
        t->deleteIfZeroRefCount();
205
0
        return;
206
0
    }
207
0
    U_ASSERT(settings == NULL && data == NULL && tailoring == NULL && cacheEntry == NULL);
208
0
    cacheEntry = new CollationCacheEntry(t->actualLocale, t);
209
0
    if(cacheEntry == NULL) {
210
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
211
0
        t->deleteIfZeroRefCount();
212
0
        return;
213
0
    }
214
0
    data = t->data;
215
0
    settings = t->settings;
216
0
    settings->addRef();
217
0
    tailoring = t;
218
0
    cacheEntry->addRef();
219
0
    validLocale = t->actualLocale;
220
0
    actualLocaleIsSameAsValid = FALSE;
221
0
}
222
223
RuleBasedCollator *
224
0
RuleBasedCollator::clone() const {
225
0
    return new RuleBasedCollator(*this);
226
0
}
227
228
0
RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
229
0
    if(this == &other) { return *this; }
230
0
    SharedObject::copyPtr(other.settings, settings);
231
0
    tailoring = other.tailoring;
232
0
    SharedObject::copyPtr(other.cacheEntry, cacheEntry);
233
0
    data = tailoring->data;
234
0
    validLocale = other.validLocale;
235
0
    explicitlySetAttributes = other.explicitlySetAttributes;
236
0
    actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
237
0
    return *this;
238
0
}
239
240
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
241
242
bool
243
0
RuleBasedCollator::operator==(const Collator& other) const {
244
0
    if(this == &other) { return TRUE; }
245
0
    if(!Collator::operator==(other)) { return FALSE; }
246
0
    const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
247
0
    if(*settings != *o.settings) { return FALSE; }
248
0
    if(data == o.data) { return TRUE; }
249
0
    UBool thisIsRoot = data->base == NULL;
250
0
    UBool otherIsRoot = o.data->base == NULL;
251
0
    U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
252
0
    if(thisIsRoot != otherIsRoot) { return FALSE; }
253
0
    if((thisIsRoot || !tailoring->rules.isEmpty()) &&
254
0
            (otherIsRoot || !o.tailoring->rules.isEmpty())) {
255
        // Shortcut: If both collators have valid rule strings, then compare those.
256
0
        if(tailoring->rules == o.tailoring->rules) { return TRUE; }
257
0
    }
258
    // Different rule strings can result in the same or equivalent tailoring.
259
    // The rule strings are optional in ICU resource bundles, although included by default.
260
    // cloneBinary() drops the rule string.
261
0
    UErrorCode errorCode = U_ZERO_ERROR;
262
0
    LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
263
0
    LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
264
0
    if(U_FAILURE(errorCode)) { return FALSE; }
265
0
    if(*thisTailored != *otherTailored) { return FALSE; }
266
    // For completeness, we should compare all of the mappings;
267
    // or we should create a list of strings, sort it with one collator,
268
    // and check if both collators compare adjacent strings the same
269
    // (order & strength, down to quaternary); or similar.
270
    // Testing equality of collators seems unusual.
271
0
    return TRUE;
272
0
}
273
274
int32_t
275
0
RuleBasedCollator::hashCode() const {
276
0
    int32_t h = settings->hashCode();
277
0
    if(data->base == NULL) { return h; }  // root collator
278
    // Do not rely on the rule string, see comments in operator==().
279
0
    UErrorCode errorCode = U_ZERO_ERROR;
280
0
    LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
281
0
    if(U_FAILURE(errorCode)) { return 0; }
282
0
    UnicodeSetIterator iter(*set);
283
0
    while(iter.next() && !iter.isString()) {
284
0
        h ^= data->getCE32(iter.getCodepoint());
285
0
    }
286
0
    return h;
287
0
}
288
289
void
290
RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
291
0
                              const Locale &actual) {
292
0
    if(actual == tailoring->actualLocale) {
293
0
        actualLocaleIsSameAsValid = FALSE;
294
0
    } else {
295
0
        U_ASSERT(actual == valid);
296
0
        actualLocaleIsSameAsValid = TRUE;
297
0
    }
298
    // Do not modify tailoring.actualLocale:
299
    // We cannot be sure that that would be thread-safe.
300
0
    validLocale = valid;
301
0
    (void)requested;  // Ignore, see also ticket #10477.
302
0
}
303
304
Locale
305
0
RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
306
0
    if(U_FAILURE(errorCode)) {
307
0
        return Locale::getRoot();
308
0
    }
309
0
    switch(type) {
310
0
    case ULOC_ACTUAL_LOCALE:
311
0
        return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
312
0
    case ULOC_VALID_LOCALE:
313
0
        return validLocale;
314
0
    case ULOC_REQUESTED_LOCALE:
315
0
    default:
316
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
317
0
        return Locale::getRoot();
318
0
    }
319
0
}
320
321
const char *
322
0
RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
323
0
    if(U_FAILURE(errorCode)) {
324
0
        return NULL;
325
0
    }
326
0
    const Locale *result;
327
0
    switch(type) {
328
0
    case ULOC_ACTUAL_LOCALE:
329
0
        result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
330
0
        break;
331
0
    case ULOC_VALID_LOCALE:
332
0
        result = &validLocale;
333
0
        break;
334
0
    case ULOC_REQUESTED_LOCALE:
335
0
    default:
336
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
337
0
        return NULL;
338
0
    }
339
0
    if(result->isBogus()) { return NULL; }
340
0
    const char *id = result->getName();
341
0
    return id[0] == 0 ? "root" : id;
342
0
}
343
344
const UnicodeString&
345
0
RuleBasedCollator::getRules() const {
346
0
    return tailoring->rules;
347
0
}
348
349
void
350
0
RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
351
0
    if(delta == UCOL_TAILORING_ONLY) {
352
0
        buffer = tailoring->rules;
353
0
        return;
354
0
    }
355
    // UCOL_FULL_RULES
356
0
    buffer.remove();
357
0
    CollationLoader::appendRootRules(buffer);
358
0
    buffer.append(tailoring->rules).getTerminatedBuffer();
359
0
}
360
361
void
362
0
RuleBasedCollator::getVersion(UVersionInfo version) const {
363
0
    uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
364
0
    version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
365
0
}
366
367
UnicodeSet *
368
0
RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
369
0
    if(U_FAILURE(errorCode)) { return NULL; }
370
0
    UnicodeSet *tailored = new UnicodeSet();
371
0
    if(tailored == NULL) {
372
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
373
0
        return NULL;
374
0
    }
375
0
    if(data->base != NULL) {
376
0
        TailoredSet(tailored).forData(data, errorCode);
377
0
        if(U_FAILURE(errorCode)) {
378
0
            delete tailored;
379
0
            return NULL;
380
0
        }
381
0
    }
382
0
    return tailored;
383
0
}
384
385
void
386
RuleBasedCollator::internalGetContractionsAndExpansions(
387
        UnicodeSet *contractions, UnicodeSet *expansions,
388
0
        UBool addPrefixes, UErrorCode &errorCode) const {
389
0
    if(U_FAILURE(errorCode)) { return; }
390
0
    if(contractions != NULL) {
391
0
        contractions->clear();
392
0
    }
393
0
    if(expansions != NULL) {
394
0
        expansions->clear();
395
0
    }
396
0
    ContractionsAndExpansions(contractions, expansions, NULL, addPrefixes).forData(data, errorCode);
397
0
}
398
399
void
400
0
RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
401
0
    if(U_FAILURE(errorCode)) { return; }
402
0
    ContractionsAndExpansions(&set, NULL, NULL, FALSE).forCodePoint(data, c, errorCode);
403
0
}
404
405
const CollationSettings &
406
0
RuleBasedCollator::getDefaultSettings() const {
407
0
    return *tailoring->settings;
408
0
}
409
410
UColAttributeValue
411
0
RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
412
0
    if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
413
0
    int32_t option;
414
0
    switch(attr) {
415
0
    case UCOL_FRENCH_COLLATION:
416
0
        option = CollationSettings::BACKWARD_SECONDARY;
417
0
        break;
418
0
    case UCOL_ALTERNATE_HANDLING:
419
0
        return settings->getAlternateHandling();
420
0
    case UCOL_CASE_FIRST:
421
0
        return settings->getCaseFirst();
422
0
    case UCOL_CASE_LEVEL:
423
0
        option = CollationSettings::CASE_LEVEL;
424
0
        break;
425
0
    case UCOL_NORMALIZATION_MODE:
426
0
        option = CollationSettings::CHECK_FCD;
427
0
        break;
428
0
    case UCOL_STRENGTH:
429
0
        return (UColAttributeValue)settings->getStrength();
430
0
    case UCOL_HIRAGANA_QUATERNARY_MODE:
431
        // Deprecated attribute, unsettable.
432
0
        return UCOL_OFF;
433
0
    case UCOL_NUMERIC_COLLATION:
434
0
        option = CollationSettings::NUMERIC;
435
0
        break;
436
0
    default:
437
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
438
0
        return UCOL_DEFAULT;
439
0
    }
440
0
    return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
441
0
}
442
443
void
444
RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
445
0
                                UErrorCode &errorCode) {
446
0
    UColAttributeValue oldValue = getAttribute(attr, errorCode);
447
0
    if(U_FAILURE(errorCode)) { return; }
448
0
    if(value == oldValue) {
449
0
        setAttributeExplicitly(attr);
450
0
        return;
451
0
    }
452
0
    const CollationSettings &defaultSettings = getDefaultSettings();
453
0
    if(settings == &defaultSettings) {
454
0
        if(value == UCOL_DEFAULT) {
455
0
            setAttributeDefault(attr);
456
0
            return;
457
0
        }
458
0
    }
459
0
    CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
460
0
    if(ownedSettings == NULL) {
461
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
462
0
        return;
463
0
    }
464
465
0
    switch(attr) {
466
0
    case UCOL_FRENCH_COLLATION:
467
0
        ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
468
0
                               defaultSettings.options, errorCode);
469
0
        break;
470
0
    case UCOL_ALTERNATE_HANDLING:
471
0
        ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
472
0
        break;
473
0
    case UCOL_CASE_FIRST:
474
0
        ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
475
0
        break;
476
0
    case UCOL_CASE_LEVEL:
477
0
        ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
478
0
                               defaultSettings.options, errorCode);
479
0
        break;
480
0
    case UCOL_NORMALIZATION_MODE:
481
0
        ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
482
0
                               defaultSettings.options, errorCode);
483
0
        break;
484
0
    case UCOL_STRENGTH:
485
0
        ownedSettings->setStrength(value, defaultSettings.options, errorCode);
486
0
        break;
487
0
    case UCOL_HIRAGANA_QUATERNARY_MODE:
488
        // Deprecated attribute. Check for valid values but do not change anything.
489
0
        if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
490
0
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
491
0
        }
492
0
        break;
493
0
    case UCOL_NUMERIC_COLLATION:
494
0
        ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
495
0
        break;
496
0
    default:
497
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
498
0
        break;
499
0
    }
500
0
    if(U_FAILURE(errorCode)) { return; }
501
0
    setFastLatinOptions(*ownedSettings);
502
0
    if(value == UCOL_DEFAULT) {
503
0
        setAttributeDefault(attr);
504
0
    } else {
505
0
        setAttributeExplicitly(attr);
506
0
    }
507
0
}
508
509
Collator &
510
0
RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
511
0
    if(U_FAILURE(errorCode)) { return *this; }
512
    // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
513
0
    int32_t value;
514
0
    if(group == UCOL_REORDER_CODE_DEFAULT) {
515
0
        value = UCOL_DEFAULT;
516
0
    } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
517
0
        value = group - UCOL_REORDER_CODE_FIRST;
518
0
    } else {
519
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
520
0
        return *this;
521
0
    }
522
0
    CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
523
0
    if(value == oldValue) {
524
0
        setAttributeExplicitly(ATTR_VARIABLE_TOP);
525
0
        return *this;
526
0
    }
527
0
    const CollationSettings &defaultSettings = getDefaultSettings();
528
0
    if(settings == &defaultSettings) {
529
0
        if(value == UCOL_DEFAULT) {
530
0
            setAttributeDefault(ATTR_VARIABLE_TOP);
531
0
            return *this;
532
0
        }
533
0
    }
534
0
    CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
535
0
    if(ownedSettings == NULL) {
536
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
537
0
        return *this;
538
0
    }
539
540
0
    if(group == UCOL_REORDER_CODE_DEFAULT) {
541
0
        group = (UColReorderCode)(UCOL_REORDER_CODE_FIRST + defaultSettings.getMaxVariable());
542
0
    }
543
0
    uint32_t varTop = data->getLastPrimaryForGroup(group);
544
0
    U_ASSERT(varTop != 0);
545
0
    ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
546
0
    if(U_FAILURE(errorCode)) { return *this; }
547
0
    ownedSettings->variableTop = varTop;
548
0
    setFastLatinOptions(*ownedSettings);
549
0
    if(value == UCOL_DEFAULT) {
550
0
        setAttributeDefault(ATTR_VARIABLE_TOP);
551
0
    } else {
552
0
        setAttributeExplicitly(ATTR_VARIABLE_TOP);
553
0
    }
554
0
    return *this;
555
0
}
556
557
UColReorderCode
558
0
RuleBasedCollator::getMaxVariable() const {
559
0
    return (UColReorderCode)(UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
560
0
}
561
562
uint32_t
563
0
RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
564
0
    return settings->variableTop;
565
0
}
566
567
uint32_t
568
0
RuleBasedCollator::setVariableTop(const UChar *varTop, int32_t len, UErrorCode &errorCode) {
569
0
    if(U_FAILURE(errorCode)) { return 0; }
570
0
    if(varTop == NULL && len !=0) {
571
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
572
0
        return 0;
573
0
    }
574
0
    if(len < 0) { len = u_strlen(varTop); }
575
0
    if(len == 0) {
576
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
577
0
        return 0;
578
0
    }
579
0
    UBool numeric = settings->isNumeric();
580
0
    int64_t ce1, ce2;
581
0
    if(settings->dontCheckFCD()) {
582
0
        UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
583
0
        ce1 = ci.nextCE(errorCode);
584
0
        ce2 = ci.nextCE(errorCode);
585
0
    } else {
586
0
        FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
587
0
        ce1 = ci.nextCE(errorCode);
588
0
        ce2 = ci.nextCE(errorCode);
589
0
    }
590
0
    if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
591
0
        errorCode = U_CE_NOT_FOUND_ERROR;
592
0
        return 0;
593
0
    }
594
0
    setVariableTop((uint32_t)(ce1 >> 32), errorCode);
595
0
    return settings->variableTop;
596
0
}
597
598
uint32_t
599
0
RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
600
0
    return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
601
0
}
602
603
void
604
0
RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
605
0
    if(U_FAILURE(errorCode)) { return; }
606
0
    if(varTop != settings->variableTop) {
607
        // Pin the variable top to the end of the reordering group which contains it.
608
        // Only a few special groups are supported.
609
0
        int32_t group = data->getGroupForPrimary(varTop);
610
0
        if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
611
0
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
612
0
            return;
613
0
        }
614
0
        uint32_t v = data->getLastPrimaryForGroup(group);
615
0
        U_ASSERT(v != 0 && v >= varTop);
616
0
        varTop = v;
617
0
        if(varTop != settings->variableTop) {
618
0
            CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
619
0
            if(ownedSettings == NULL) {
620
0
                errorCode = U_MEMORY_ALLOCATION_ERROR;
621
0
                return;
622
0
            }
623
0
            ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
624
0
                                          getDefaultSettings().options, errorCode);
625
0
            if(U_FAILURE(errorCode)) { return; }
626
0
            ownedSettings->variableTop = varTop;
627
0
            setFastLatinOptions(*ownedSettings);
628
0
        }
629
0
    }
630
0
    if(varTop == getDefaultSettings().variableTop) {
631
0
        setAttributeDefault(ATTR_VARIABLE_TOP);
632
0
    } else {
633
0
        setAttributeExplicitly(ATTR_VARIABLE_TOP);
634
0
    }
635
0
}
636
637
int32_t
638
RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
639
0
                                   UErrorCode &errorCode) const {
640
0
    if(U_FAILURE(errorCode)) { return 0; }
641
0
    if(capacity < 0 || (dest == NULL && capacity > 0)) {
642
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
643
0
        return 0;
644
0
    }
645
0
    int32_t length = settings->reorderCodesLength;
646
0
    if(length == 0) { return 0; }
647
0
    if(length > capacity) {
648
0
        errorCode = U_BUFFER_OVERFLOW_ERROR;
649
0
        return length;
650
0
    }
651
0
    uprv_memcpy(dest, settings->reorderCodes, length * 4);
652
0
    return length;
653
0
}
654
655
void
656
RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
657
0
                                   UErrorCode &errorCode) {
658
0
    if(U_FAILURE(errorCode)) { return; }
659
0
    if(length < 0 || (reorderCodes == NULL && length > 0)) {
660
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
661
0
        return;
662
0
    }
663
0
    if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
664
0
        length = 0;
665
0
    }
666
0
    if(length == settings->reorderCodesLength &&
667
0
            uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
668
0
        return;
669
0
    }
670
0
    const CollationSettings &defaultSettings = getDefaultSettings();
671
0
    if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
672
0
        if(settings != &defaultSettings) {
673
0
            CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
674
0
            if(ownedSettings == NULL) {
675
0
                errorCode = U_MEMORY_ALLOCATION_ERROR;
676
0
                return;
677
0
            }
678
0
            ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
679
0
            setFastLatinOptions(*ownedSettings);
680
0
        }
681
0
        return;
682
0
    }
683
0
    CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
684
0
    if(ownedSettings == NULL) {
685
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
686
0
        return;
687
0
    }
688
0
    ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
689
0
    setFastLatinOptions(*ownedSettings);
690
0
}
691
692
void
693
0
RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
694
0
    ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
695
0
            data, ownedSettings,
696
0
            ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
697
0
}
698
699
UCollationResult
700
RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
701
0
                           UErrorCode &errorCode) const {
702
0
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
703
0
    return doCompare(left.getBuffer(), left.length(),
704
0
                     right.getBuffer(), right.length(), errorCode);
705
0
}
706
707
UCollationResult
708
RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
709
0
                           int32_t length, UErrorCode &errorCode) const {
710
0
    if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
711
0
    if(length < 0) {
712
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
713
0
        return UCOL_EQUAL;
714
0
    }
715
0
    int32_t leftLength = left.length();
716
0
    int32_t rightLength = right.length();
717
0
    if(leftLength > length) { leftLength = length; }
718
0
    if(rightLength > length) { rightLength = length; }
719
0
    return doCompare(left.getBuffer(), leftLength,
720
0
                     right.getBuffer(), rightLength, errorCode);
721
0
}
722
723
UCollationResult
724
RuleBasedCollator::compare(const UChar *left, int32_t leftLength,
725
                           const UChar *right, int32_t rightLength,
726
0
                           UErrorCode &errorCode) const {
727
0
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
728
0
    if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
729
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
730
0
        return UCOL_EQUAL;
731
0
    }
732
    // Make sure both or neither strings have a known length.
733
    // We do not optimize for mixed length/termination.
734
0
    if(leftLength >= 0) {
735
0
        if(rightLength < 0) { rightLength = u_strlen(right); }
736
0
    } else {
737
0
        if(rightLength >= 0) { leftLength = u_strlen(left); }
738
0
    }
739
0
    return doCompare(left, leftLength, right, rightLength, errorCode);
740
0
}
741
742
UCollationResult
743
RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
744
0
                               UErrorCode &errorCode) const {
745
0
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
746
0
    const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
747
0
    const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
748
0
    if((leftBytes == NULL && !left.empty()) || (rightBytes == NULL && !right.empty())) {
749
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
750
0
        return UCOL_EQUAL;
751
0
    }
752
0
    return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
753
0
}
754
755
UCollationResult
756
RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
757
                                       const char *right, int32_t rightLength,
758
0
                                       UErrorCode &errorCode) const {
759
0
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
760
0
    if((left == NULL && leftLength != 0) || (right == NULL && rightLength != 0)) {
761
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
762
0
        return UCOL_EQUAL;
763
0
    }
764
    // Make sure both or neither strings have a known length.
765
    // We do not optimize for mixed length/termination.
766
0
    if(leftLength >= 0) {
767
0
        if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right)); }
768
0
    } else {
769
0
        if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left)); }
770
0
    }
771
0
    return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
772
0
                     reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
773
0
}
774
775
namespace {
776
777
/**
778
 * Abstract iterator for identical-level string comparisons.
779
 * Returns FCD code points and handles temporary switching to NFD.
780
 */
781
class NFDIterator : public UObject {
782
public:
783
0
    NFDIterator() : index(-1), length(0) {}
784
0
    virtual ~NFDIterator() {}
785
    /**
786
     * Returns the next code point from the internal normalization buffer,
787
     * or else the next text code point.
788
     * Returns -1 at the end of the text.
789
     */
790
0
    UChar32 nextCodePoint() {
791
0
        if(index >= 0) {
792
0
            if(index == length) {
793
0
                index = -1;
794
0
            } else {
795
0
                UChar32 c;
796
0
                U16_NEXT_UNSAFE(decomp, index, c);
797
0
                return c;
798
0
            }
799
0
        }
800
0
        return nextRawCodePoint();
801
0
    }
802
    /**
803
     * @param nfcImpl
804
     * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
805
     * @return the first code point in c's decomposition,
806
     *         or c itself if it was decomposed already or if it does not decompose
807
     */
808
0
    UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
809
0
        if(index >= 0) { return c; }
810
0
        decomp = nfcImpl.getDecomposition(c, buffer, length);
811
0
        if(decomp == NULL) { return c; }
812
0
        index = 0;
813
0
        U16_NEXT_UNSAFE(decomp, index, c);
814
0
        return c;
815
0
    }
816
protected:
817
    /**
818
     * Returns the next text code point in FCD order.
819
     * Returns -1 at the end of the text.
820
     */
821
    virtual UChar32 nextRawCodePoint() = 0;
822
private:
823
    const UChar *decomp;
824
    UChar buffer[4];
825
    int32_t index;
826
    int32_t length;
827
};
828
829
class UTF16NFDIterator : public NFDIterator {
830
public:
831
0
    UTF16NFDIterator(const UChar *text, const UChar *textLimit) : s(text), limit(textLimit) {}
832
protected:
833
0
    virtual UChar32 nextRawCodePoint() {
834
0
        if(s == limit) { return U_SENTINEL; }
835
0
        UChar32 c = *s++;
836
0
        if(limit == NULL && c == 0) {
837
0
            s = NULL;
838
0
            return U_SENTINEL;
839
0
        }
840
0
        UChar trail;
841
0
        if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
842
0
            ++s;
843
0
            c = U16_GET_SUPPLEMENTARY(c, trail);
844
0
        }
845
0
        return c;
846
0
    }
847
848
    const UChar *s;
849
    const UChar *limit;
850
};
851
852
class FCDUTF16NFDIterator : public UTF16NFDIterator {
853
public:
854
    FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const UChar *text, const UChar *textLimit)
855
0
            : UTF16NFDIterator(NULL, NULL) {
856
0
        UErrorCode errorCode = U_ZERO_ERROR;
857
0
        const UChar *spanLimit = nfcImpl.makeFCD(text, textLimit, NULL, errorCode);
858
0
        if(U_FAILURE(errorCode)) { return; }
859
0
        if(spanLimit == textLimit || (textLimit == NULL && *spanLimit == 0)) {
860
0
            s = text;
861
0
            limit = spanLimit;
862
0
        } else {
863
0
            str.setTo(text, (int32_t)(spanLimit - text));
864
0
            {
865
0
                ReorderingBuffer r_buffer(nfcImpl, str);
866
0
                if(r_buffer.init(str.length(), errorCode)) {
867
0
                    nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);
868
0
                }
869
0
            }
870
0
            if(U_SUCCESS(errorCode)) {
871
0
                s = str.getBuffer();
872
0
                limit = s + str.length();
873
0
            }
874
0
        }
875
0
    }
876
private:
877
    UnicodeString str;
878
};
879
880
class UTF8NFDIterator : public NFDIterator {
881
public:
882
    UTF8NFDIterator(const uint8_t *text, int32_t textLength)
883
0
        : s(text), pos(0), length(textLength) {}
884
protected:
885
0
    virtual UChar32 nextRawCodePoint() {
886
0
        if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
887
0
        UChar32 c;
888
0
        U8_NEXT_OR_FFFD(s, pos, length, c);
889
0
        return c;
890
0
    }
891
892
    const uint8_t *s;
893
    int32_t pos;
894
    int32_t length;
895
};
896
897
class FCDUTF8NFDIterator : public NFDIterator {
898
public:
899
    FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
900
0
            : u8ci(data, FALSE, text, 0, textLength) {}
901
protected:
902
0
    virtual UChar32 nextRawCodePoint() {
903
0
        UErrorCode errorCode = U_ZERO_ERROR;
904
0
        return u8ci.nextCodePoint(errorCode);
905
0
    }
906
private:
907
    FCDUTF8CollationIterator u8ci;
908
};
909
910
class UIterNFDIterator : public NFDIterator {
911
public:
912
0
    UIterNFDIterator(UCharIterator &it) : iter(it) {}
913
protected:
914
0
    virtual UChar32 nextRawCodePoint() {
915
0
        return uiter_next32(&iter);
916
0
    }
917
private:
918
    UCharIterator &iter;
919
};
920
921
class FCDUIterNFDIterator : public NFDIterator {
922
public:
923
    FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
924
0
            : uici(data, FALSE, it, startIndex) {}
925
protected:
926
0
    virtual UChar32 nextRawCodePoint() {
927
0
        UErrorCode errorCode = U_ZERO_ERROR;
928
0
        return uici.nextCodePoint(errorCode);
929
0
    }
930
private:
931
    FCDUIterCollationIterator uici;
932
};
933
934
UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
935
0
                                NFDIterator &left, NFDIterator &right) {
936
0
    for(;;) {
937
        // Fetch the next FCD code point from each string.
938
0
        UChar32 leftCp = left.nextCodePoint();
939
0
        UChar32 rightCp = right.nextCodePoint();
940
0
        if(leftCp == rightCp) {
941
0
            if(leftCp < 0) { break; }
942
0
            continue;
943
0
        }
944
        // If they are different, then decompose each and compare again.
945
0
        if(leftCp < 0) {
946
0
            leftCp = -2;  // end of string
947
0
        } else if(leftCp == 0xfffe) {
948
0
            leftCp = -1;  // U+FFFE: merge separator
949
0
        } else {
950
0
            leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
951
0
        }
952
0
        if(rightCp < 0) {
953
0
            rightCp = -2;  // end of string
954
0
        } else if(rightCp == 0xfffe) {
955
0
            rightCp = -1;  // U+FFFE: merge separator
956
0
        } else {
957
0
            rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
958
0
        }
959
0
        if(leftCp < rightCp) { return UCOL_LESS; }
960
0
        if(leftCp > rightCp) { return UCOL_GREATER; }
961
0
    }
962
0
    return UCOL_EQUAL;
963
0
}
964
965
}  // namespace
966
967
UCollationResult
968
RuleBasedCollator::doCompare(const UChar *left, int32_t leftLength,
969
                             const UChar *right, int32_t rightLength,
970
0
                             UErrorCode &errorCode) const {
971
    // U_FAILURE(errorCode) checked by caller.
972
0
    if(left == right && leftLength == rightLength) {
973
0
        return UCOL_EQUAL;
974
0
    }
975
976
    // Identical-prefix test.
977
0
    const UChar *leftLimit;
978
0
    const UChar *rightLimit;
979
0
    int32_t equalPrefixLength = 0;
980
0
    if(leftLength < 0) {
981
0
        leftLimit = NULL;
982
0
        rightLimit = NULL;
983
0
        UChar c;
984
0
        while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
985
0
            if(c == 0) { return UCOL_EQUAL; }
986
0
            ++equalPrefixLength;
987
0
        }
988
0
    } else {
989
0
        leftLimit = left + leftLength;
990
0
        rightLimit = right + rightLength;
991
0
        for(;;) {
992
0
            if(equalPrefixLength == leftLength) {
993
0
                if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
994
0
                break;
995
0
            } else if(equalPrefixLength == rightLength ||
996
0
                      left[equalPrefixLength] != right[equalPrefixLength]) {
997
0
                break;
998
0
            }
999
0
            ++equalPrefixLength;
1000
0
        }
1001
0
    }
1002
1003
0
    UBool numeric = settings->isNumeric();
1004
0
    if(equalPrefixLength > 0) {
1005
0
        if((equalPrefixLength != leftLength &&
1006
0
                    data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1007
0
                (equalPrefixLength != rightLength &&
1008
0
                    data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1009
            // Identical prefix: Back up to the start of a contraction or reordering sequence.
1010
0
            while(--equalPrefixLength > 0 &&
1011
0
                    data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1012
0
        }
1013
        // Notes:
1014
        // - A longer string can compare equal to a prefix of it if only ignorables follow.
1015
        // - With a backward level, a longer string can compare less-than a prefix of it.
1016
1017
        // Pass the actual start of each string into the CollationIterators,
1018
        // plus the equalPrefixLength position,
1019
        // so that prefix matches back into the equal prefix work.
1020
0
    }
1021
1022
0
    int32_t result;
1023
0
    int32_t fastLatinOptions = settings->fastLatinOptions;
1024
0
    if(fastLatinOptions >= 0 &&
1025
0
            (equalPrefixLength == leftLength ||
1026
0
                left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1027
0
            (equalPrefixLength == rightLength ||
1028
0
                right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1029
0
        if(leftLength >= 0) {
1030
0
            result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1031
0
                                                      settings->fastLatinPrimaries,
1032
0
                                                      fastLatinOptions,
1033
0
                                                      left + equalPrefixLength,
1034
0
                                                      leftLength - equalPrefixLength,
1035
0
                                                      right + equalPrefixLength,
1036
0
                                                      rightLength - equalPrefixLength);
1037
0
        } else {
1038
0
            result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1039
0
                                                      settings->fastLatinPrimaries,
1040
0
                                                      fastLatinOptions,
1041
0
                                                      left + equalPrefixLength, -1,
1042
0
                                                      right + equalPrefixLength, -1);
1043
0
        }
1044
0
    } else {
1045
0
        result = CollationFastLatin::BAIL_OUT_RESULT;
1046
0
    }
1047
1048
0
    if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1049
0
        if(settings->dontCheckFCD()) {
1050
0
            UTF16CollationIterator leftIter(data, numeric,
1051
0
                                            left, left + equalPrefixLength, leftLimit);
1052
0
            UTF16CollationIterator rightIter(data, numeric,
1053
0
                                            right, right + equalPrefixLength, rightLimit);
1054
0
            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1055
0
        } else {
1056
0
            FCDUTF16CollationIterator leftIter(data, numeric,
1057
0
                                              left, left + equalPrefixLength, leftLimit);
1058
0
            FCDUTF16CollationIterator rightIter(data, numeric,
1059
0
                                                right, right + equalPrefixLength, rightLimit);
1060
0
            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1061
0
        }
1062
0
    }
1063
0
    if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1064
0
        return (UCollationResult)result;
1065
0
    }
1066
1067
    // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1068
    // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1069
    // and the benefit seems unlikely to be measurable.
1070
1071
    // Compare identical level.
1072
0
    const Normalizer2Impl &nfcImpl = data->nfcImpl;
1073
0
    left += equalPrefixLength;
1074
0
    right += equalPrefixLength;
1075
0
    if(settings->dontCheckFCD()) {
1076
0
        UTF16NFDIterator leftIter(left, leftLimit);
1077
0
        UTF16NFDIterator rightIter(right, rightLimit);
1078
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1079
0
    } else {
1080
0
        FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1081
0
        FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1082
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1083
0
    }
1084
0
}
1085
1086
UCollationResult
1087
RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1088
                             const uint8_t *right, int32_t rightLength,
1089
0
                             UErrorCode &errorCode) const {
1090
    // U_FAILURE(errorCode) checked by caller.
1091
0
    if(left == right && leftLength == rightLength) {
1092
0
        return UCOL_EQUAL;
1093
0
    }
1094
1095
    // Identical-prefix test.
1096
0
    int32_t equalPrefixLength = 0;
1097
0
    if(leftLength < 0) {
1098
0
        uint8_t c;
1099
0
        while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1100
0
            if(c == 0) { return UCOL_EQUAL; }
1101
0
            ++equalPrefixLength;
1102
0
        }
1103
0
    } else {
1104
0
        for(;;) {
1105
0
            if(equalPrefixLength == leftLength) {
1106
0
                if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1107
0
                break;
1108
0
            } else if(equalPrefixLength == rightLength ||
1109
0
                      left[equalPrefixLength] != right[equalPrefixLength]) {
1110
0
                break;
1111
0
            }
1112
0
            ++equalPrefixLength;
1113
0
        }
1114
0
    }
1115
    // Back up to the start of a partially-equal code point.
1116
0
    if(equalPrefixLength > 0 &&
1117
0
            ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1118
0
            (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1119
0
        while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1120
0
    }
1121
1122
0
    UBool numeric = settings->isNumeric();
1123
0
    if(equalPrefixLength > 0) {
1124
0
        UBool unsafe = FALSE;
1125
0
        if(equalPrefixLength != leftLength) {
1126
0
            int32_t i = equalPrefixLength;
1127
0
            UChar32 c;
1128
0
            U8_NEXT_OR_FFFD(left, i, leftLength, c);
1129
0
            unsafe = data->isUnsafeBackward(c, numeric);
1130
0
        }
1131
0
        if(!unsafe && equalPrefixLength != rightLength) {
1132
0
            int32_t i = equalPrefixLength;
1133
0
            UChar32 c;
1134
0
            U8_NEXT_OR_FFFD(right, i, rightLength, c);
1135
0
            unsafe = data->isUnsafeBackward(c, numeric);
1136
0
        }
1137
0
        if(unsafe) {
1138
            // Identical prefix: Back up to the start of a contraction or reordering sequence.
1139
0
            UChar32 c;
1140
0
            do {
1141
0
                U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1142
0
            } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1143
0
        }
1144
        // See the notes in the UTF-16 version.
1145
1146
        // Pass the actual start of each string into the CollationIterators,
1147
        // plus the equalPrefixLength position,
1148
        // so that prefix matches back into the equal prefix work.
1149
0
    }
1150
1151
0
    int32_t result;
1152
0
    int32_t fastLatinOptions = settings->fastLatinOptions;
1153
0
    if(fastLatinOptions >= 0 &&
1154
0
            (equalPrefixLength == leftLength ||
1155
0
                left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1156
0
            (equalPrefixLength == rightLength ||
1157
0
                right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1158
0
        if(leftLength >= 0) {
1159
0
            result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1160
0
                                                     settings->fastLatinPrimaries,
1161
0
                                                     fastLatinOptions,
1162
0
                                                     left + equalPrefixLength,
1163
0
                                                     leftLength - equalPrefixLength,
1164
0
                                                     right + equalPrefixLength,
1165
0
                                                     rightLength - equalPrefixLength);
1166
0
        } else {
1167
0
            result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1168
0
                                                     settings->fastLatinPrimaries,
1169
0
                                                     fastLatinOptions,
1170
0
                                                     left + equalPrefixLength, -1,
1171
0
                                                     right + equalPrefixLength, -1);
1172
0
        }
1173
0
    } else {
1174
0
        result = CollationFastLatin::BAIL_OUT_RESULT;
1175
0
    }
1176
1177
0
    if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1178
0
        if(settings->dontCheckFCD()) {
1179
0
            UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1180
0
            UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1181
0
            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1182
0
        } else {
1183
0
            FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1184
0
            FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1185
0
            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1186
0
        }
1187
0
    }
1188
0
    if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1189
0
        return (UCollationResult)result;
1190
0
    }
1191
1192
    // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1193
    // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1194
    // and the benefit seems unlikely to be measurable.
1195
1196
    // Compare identical level.
1197
0
    const Normalizer2Impl &nfcImpl = data->nfcImpl;
1198
0
    left += equalPrefixLength;
1199
0
    right += equalPrefixLength;
1200
0
    if(leftLength > 0) {
1201
0
        leftLength -= equalPrefixLength;
1202
0
        rightLength -= equalPrefixLength;
1203
0
    }
1204
0
    if(settings->dontCheckFCD()) {
1205
0
        UTF8NFDIterator leftIter(left, leftLength);
1206
0
        UTF8NFDIterator rightIter(right, rightLength);
1207
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1208
0
    } else {
1209
0
        FCDUTF8NFDIterator leftIter(data, left, leftLength);
1210
0
        FCDUTF8NFDIterator rightIter(data, right, rightLength);
1211
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1212
0
    }
1213
0
}
1214
1215
UCollationResult
1216
RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1217
0
                           UErrorCode &errorCode) const {
1218
0
    if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1219
0
    UBool numeric = settings->isNumeric();
1220
1221
    // Identical-prefix test.
1222
0
    int32_t equalPrefixLength = 0;
1223
0
    {
1224
0
        UChar32 leftUnit;
1225
0
        UChar32 rightUnit;
1226
0
        while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1227
0
            if(leftUnit < 0) { return UCOL_EQUAL; }
1228
0
            ++equalPrefixLength;
1229
0
        }
1230
1231
        // Back out the code units that differed, for the real collation comparison.
1232
0
        if(leftUnit >= 0) { left.previous(&left); }
1233
0
        if(rightUnit >= 0) { right.previous(&right); }
1234
1235
0
        if(equalPrefixLength > 0) {
1236
0
            if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1237
0
                    (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1238
                // Identical prefix: Back up to the start of a contraction or reordering sequence.
1239
0
                do {
1240
0
                    --equalPrefixLength;
1241
0
                    leftUnit = left.previous(&left);
1242
0
                    right.previous(&right);
1243
0
                } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1244
0
            }
1245
            // See the notes in the UTF-16 version.
1246
0
        }
1247
0
    }
1248
1249
0
    UCollationResult result;
1250
0
    if(settings->dontCheckFCD()) {
1251
0
        UIterCollationIterator leftIter(data, numeric, left);
1252
0
        UIterCollationIterator rightIter(data, numeric, right);
1253
0
        result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1254
0
    } else {
1255
0
        FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1256
0
        FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1257
0
        result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1258
0
    }
1259
0
    if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1260
0
        return result;
1261
0
    }
1262
1263
    // Compare identical level.
1264
0
    left.move(&left, equalPrefixLength, UITER_ZERO);
1265
0
    right.move(&right, equalPrefixLength, UITER_ZERO);
1266
0
    const Normalizer2Impl &nfcImpl = data->nfcImpl;
1267
0
    if(settings->dontCheckFCD()) {
1268
0
        UIterNFDIterator leftIter(left);
1269
0
        UIterNFDIterator rightIter(right);
1270
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1271
0
    } else {
1272
0
        FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1273
0
        FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1274
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1275
0
    }
1276
0
}
1277
1278
CollationKey &
1279
RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1280
0
                                   UErrorCode &errorCode) const {
1281
0
    return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1282
0
}
1283
1284
CollationKey &
1285
RuleBasedCollator::getCollationKey(const UChar *s, int32_t length, CollationKey& key,
1286
0
                                   UErrorCode &errorCode) const {
1287
0
    if(U_FAILURE(errorCode)) {
1288
0
        return key.setToBogus();
1289
0
    }
1290
0
    if(s == NULL && length != 0) {
1291
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1292
0
        return key.setToBogus();
1293
0
    }
1294
0
    key.reset();  // resets the "bogus" state
1295
0
    CollationKeyByteSink sink(key);
1296
0
    writeSortKey(s, length, sink, errorCode);
1297
0
    if(U_FAILURE(errorCode)) {
1298
0
        key.setToBogus();
1299
0
    } else if(key.isBogus()) {
1300
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1301
0
    } else {
1302
0
        key.setLength(sink.NumberOfBytesAppended());
1303
0
    }
1304
0
    return key;
1305
0
}
1306
1307
int32_t
1308
RuleBasedCollator::getSortKey(const UnicodeString &s,
1309
0
                              uint8_t *dest, int32_t capacity) const {
1310
0
    return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1311
0
}
1312
1313
int32_t
1314
RuleBasedCollator::getSortKey(const UChar *s, int32_t length,
1315
0
                              uint8_t *dest, int32_t capacity) const {
1316
0
    if((s == NULL && length != 0) || capacity < 0 || (dest == NULL && capacity > 0)) {
1317
0
        return 0;
1318
0
    }
1319
0
    uint8_t noDest[1] = { 0 };
1320
0
    if(dest == NULL) {
1321
        // Distinguish pure preflighting from an allocation error.
1322
0
        dest = noDest;
1323
0
        capacity = 0;
1324
0
    }
1325
0
    FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1326
0
    UErrorCode errorCode = U_ZERO_ERROR;
1327
0
    writeSortKey(s, length, sink, errorCode);
1328
0
    return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1329
0
}
1330
1331
void
1332
RuleBasedCollator::writeSortKey(const UChar *s, int32_t length,
1333
0
                                SortKeyByteSink &sink, UErrorCode &errorCode) const {
1334
0
    if(U_FAILURE(errorCode)) { return; }
1335
0
    const UChar *limit = (length >= 0) ? s + length : NULL;
1336
0
    UBool numeric = settings->isNumeric();
1337
0
    CollationKeys::LevelCallback callback;
1338
0
    if(settings->dontCheckFCD()) {
1339
0
        UTF16CollationIterator iter(data, numeric, s, s, limit);
1340
0
        CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1341
0
                                                  sink, Collation::PRIMARY_LEVEL,
1342
0
                                                  callback, TRUE, errorCode);
1343
0
    } else {
1344
0
        FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1345
0
        CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1346
0
                                                  sink, Collation::PRIMARY_LEVEL,
1347
0
                                                  callback, TRUE, errorCode);
1348
0
    }
1349
0
    if(settings->getStrength() == UCOL_IDENTICAL) {
1350
0
        writeIdenticalLevel(s, limit, sink, errorCode);
1351
0
    }
1352
0
    static const char terminator = 0;  // TERMINATOR_BYTE
1353
0
    sink.Append(&terminator, 1);
1354
0
}
1355
1356
void
1357
RuleBasedCollator::writeIdenticalLevel(const UChar *s, const UChar *limit,
1358
0
                                       SortKeyByteSink &sink, UErrorCode &errorCode) const {
1359
    // NFD quick check
1360
0
    const UChar *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, NULL, errorCode);
1361
0
    if(U_FAILURE(errorCode)) { return; }
1362
0
    sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1363
0
    UChar32 prev = 0;
1364
0
    if(nfdQCYesLimit != s) {
1365
0
        prev = u_writeIdenticalLevelRun(prev, s, (int32_t)(nfdQCYesLimit - s), sink);
1366
0
    }
1367
    // Is there non-NFD text?
1368
0
    int32_t destLengthEstimate;
1369
0
    if(limit != NULL) {
1370
0
        if(nfdQCYesLimit == limit) { return; }
1371
0
        destLengthEstimate = (int32_t)(limit - nfdQCYesLimit);
1372
0
    } else {
1373
        // s is NUL-terminated
1374
0
        if(*nfdQCYesLimit == 0) { return; }
1375
0
        destLengthEstimate = -1;
1376
0
    }
1377
0
    UnicodeString nfd;
1378
0
    data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1379
0
    u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1380
0
}
1381
1382
namespace {
1383
1384
/**
1385
 * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1386
 * with an instance of this callback class.
1387
 * When another level is about to be written, the callback
1388
 * records the level and the number of bytes that will be written until
1389
 * the sink (which is actually a FixedSortKeyByteSink) fills up.
1390
 *
1391
 * When internalNextSortKeyPart() is called again, it restarts with the last level
1392
 * and ignores as many bytes as were written previously for that level.
1393
 */
1394
class PartLevelCallback : public CollationKeys::LevelCallback {
1395
public:
1396
    PartLevelCallback(const SortKeyByteSink &s)
1397
0
            : sink(s), level(Collation::PRIMARY_LEVEL) {
1398
0
        levelCapacity = sink.GetRemainingCapacity();
1399
0
    }
1400
0
    virtual ~PartLevelCallback() {}
1401
0
    virtual UBool needToWrite(Collation::Level l) {
1402
0
        if(!sink.Overflowed()) {
1403
            // Remember a level that will be at least partially written.
1404
0
            level = l;
1405
0
            levelCapacity = sink.GetRemainingCapacity();
1406
0
            return TRUE;
1407
0
        } else {
1408
0
            return FALSE;
1409
0
        }
1410
0
    }
1411
0
    Collation::Level getLevel() const { return level; }
1412
0
    int32_t getLevelCapacity() const { return levelCapacity; }
1413
1414
private:
1415
    const SortKeyByteSink &sink;
1416
    Collation::Level level;
1417
    int32_t levelCapacity;
1418
};
1419
1420
}  // namespace
1421
1422
int32_t
1423
RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1424
0
                                           uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1425
0
    if(U_FAILURE(errorCode)) { return 0; }
1426
0
    if(iter == NULL || state == NULL || count < 0 || (count > 0 && dest == NULL)) {
1427
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1428
0
        return 0;
1429
0
    }
1430
0
    if(count == 0) { return 0; }
1431
1432
0
    FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1433
0
    sink.IgnoreBytes((int32_t)state[1]);
1434
0
    iter->move(iter, 0, UITER_START);
1435
1436
0
    Collation::Level level = (Collation::Level)state[0];
1437
0
    if(level <= Collation::QUATERNARY_LEVEL) {
1438
0
        UBool numeric = settings->isNumeric();
1439
0
        PartLevelCallback callback(sink);
1440
0
        if(settings->dontCheckFCD()) {
1441
0
            UIterCollationIterator ci(data, numeric, *iter);
1442
0
            CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1443
0
                                                      sink, level, callback, FALSE, errorCode);
1444
0
        } else {
1445
0
            FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1446
0
            CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1447
0
                                                      sink, level, callback, FALSE, errorCode);
1448
0
        }
1449
0
        if(U_FAILURE(errorCode)) { return 0; }
1450
0
        if(sink.NumberOfBytesAppended() > count) {
1451
0
            state[0] = (uint32_t)callback.getLevel();
1452
0
            state[1] = (uint32_t)callback.getLevelCapacity();
1453
0
            return count;
1454
0
        }
1455
        // All of the normal levels are done.
1456
0
        if(settings->getStrength() == UCOL_IDENTICAL) {
1457
0
            level = Collation::IDENTICAL_LEVEL;
1458
0
            iter->move(iter, 0, UITER_START);
1459
0
        }
1460
        // else fall through to setting ZERO_LEVEL
1461
0
    }
1462
1463
0
    if(level == Collation::IDENTICAL_LEVEL) {
1464
0
        int32_t levelCapacity = sink.GetRemainingCapacity();
1465
0
        UnicodeString s;
1466
0
        for(;;) {
1467
0
            UChar32 c = iter->next(iter);
1468
0
            if(c < 0) { break; }
1469
0
            s.append((UChar)c);
1470
0
        }
1471
0
        const UChar *sArray = s.getBuffer();
1472
0
        writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1473
0
        if(U_FAILURE(errorCode)) { return 0; }
1474
0
        if(sink.NumberOfBytesAppended() > count) {
1475
0
            state[0] = (uint32_t)level;
1476
0
            state[1] = (uint32_t)levelCapacity;
1477
0
            return count;
1478
0
        }
1479
0
    }
1480
1481
    // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1482
0
    state[0] = (uint32_t)Collation::ZERO_LEVEL;
1483
0
    state[1] = 0;
1484
0
    int32_t length = sink.NumberOfBytesAppended();
1485
0
    int32_t i = length;
1486
0
    while(i < count) { dest[i++] = 0; }
1487
0
    return length;
1488
0
}
1489
1490
void
1491
RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1492
0
                                  UErrorCode &errorCode) const {
1493
0
    if(U_FAILURE(errorCode)) { return; }
1494
0
    const UChar *s = str.getBuffer();
1495
0
    const UChar *limit = s + str.length();
1496
0
    UBool numeric = settings->isNumeric();
1497
0
    if(settings->dontCheckFCD()) {
1498
0
        UTF16CollationIterator iter(data, numeric, s, s, limit);
1499
0
        int64_t ce;
1500
0
        while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1501
0
            ces.addElement(ce, errorCode);
1502
0
        }
1503
0
    } else {
1504
0
        FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1505
0
        int64_t ce;
1506
0
        while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1507
0
            ces.addElement(ce, errorCode);
1508
0
        }
1509
0
    }
1510
0
}
1511
1512
namespace {
1513
1514
void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1515
0
                  UErrorCode &errorCode) {
1516
0
    if(U_FAILURE(errorCode) || length == 0) { return; }
1517
0
    if(!s.isEmpty()) {
1518
0
        s.append('_', errorCode);
1519
0
    }
1520
0
    s.append(letter, errorCode);
1521
0
    for(int32_t i = 0; i < length; ++i) {
1522
0
        s.append(uprv_toupper(subtag[i]), errorCode);
1523
0
    }
1524
0
}
1525
1526
void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1527
0
                     UErrorCode &errorCode) {
1528
0
    if(U_FAILURE(errorCode)) { return; }
1529
0
    if(!s.isEmpty()) {
1530
0
        s.append('_', errorCode);
1531
0
    }
1532
0
    static const char *valueChars = "1234...........IXO..SN..LU......";
1533
0
    s.append(letter, errorCode);
1534
0
    s.append(valueChars[value], errorCode);
1535
0
}
1536
1537
}  // namespace
1538
1539
int32_t
1540
RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1541
                                                    char *buffer, int32_t capacity,
1542
0
                                                    UErrorCode &errorCode) const {
1543
0
    if(U_FAILURE(errorCode)) { return 0; }
1544
0
    if(buffer == NULL ? capacity != 0 : capacity < 0) {
1545
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1546
0
        return 0;
1547
0
    }
1548
0
    if(locale == NULL) {
1549
0
        locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1550
0
    }
1551
1552
0
    char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1553
0
    int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1554
0
                                                  "collation", locale,
1555
0
                                                  NULL, &errorCode);
1556
0
    if(U_FAILURE(errorCode)) { return 0; }
1557
0
    resultLocale[length] = 0;
1558
1559
    // Append items in alphabetic order of their short definition letters.
1560
0
    CharString result;
1561
0
    char subtag[ULOC_KEYWORD_AND_VALUES_CAPACITY];
1562
1563
0
    if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1564
0
        appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1565
0
    }
1566
    // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1567
    // See ICU tickets #10372 and #10386.
1568
0
    if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1569
0
        appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1570
0
    }
1571
0
    if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1572
0
        appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1573
0
    }
1574
0
    if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1575
0
        appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1576
0
    }
1577
0
    if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1578
0
        appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1579
0
    }
1580
    // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1581
0
    length = uloc_getKeywordValue(resultLocale, "collation", subtag, UPRV_LENGTHOF(subtag), &errorCode);
1582
0
    appendSubtag(result, 'K', subtag, length, errorCode);
1583
0
    length = uloc_getLanguage(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1584
0
    if (length == 0) {
1585
0
        appendSubtag(result, 'L', "root", 4, errorCode);
1586
0
    } else {
1587
0
        appendSubtag(result, 'L', subtag, length, errorCode);
1588
0
    }
1589
0
    if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1590
0
        appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1591
0
    }
1592
0
    length = uloc_getCountry(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1593
0
    appendSubtag(result, 'R', subtag, length, errorCode);
1594
0
    if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1595
0
        appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1596
0
    }
1597
0
    length = uloc_getVariant(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1598
0
    appendSubtag(result, 'V', subtag, length, errorCode);
1599
0
    length = uloc_getScript(resultLocale, subtag, UPRV_LENGTHOF(subtag), &errorCode);
1600
0
    appendSubtag(result, 'Z', subtag, length, errorCode);
1601
1602
0
    if(U_FAILURE(errorCode)) { return 0; }
1603
0
    return result.extract(buffer, capacity, errorCode);
1604
0
}
1605
1606
UBool
1607
0
RuleBasedCollator::isUnsafe(UChar32 c) const {
1608
0
    return data->isUnsafeBackward(c, settings->isNumeric());
1609
0
}
1610
1611
void U_CALLCONV
1612
0
RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1613
0
    t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1614
0
}
1615
1616
UBool
1617
0
RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1618
0
    umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1619
0
    return U_SUCCESS(errorCode);
1620
0
}
1621
1622
CollationElementIterator *
1623
0
RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1624
0
    UErrorCode errorCode = U_ZERO_ERROR;
1625
0
    if(!initMaxExpansions(errorCode)) { return NULL; }
1626
0
    CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1627
0
    if(U_FAILURE(errorCode)) {
1628
0
        delete cei;
1629
0
        return NULL;
1630
0
    }
1631
0
    return cei;
1632
0
}
1633
1634
CollationElementIterator *
1635
0
RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1636
0
    UErrorCode errorCode = U_ZERO_ERROR;
1637
0
    if(!initMaxExpansions(errorCode)) { return NULL; }
1638
0
    CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1639
0
    if(U_FAILURE(errorCode)) {
1640
0
        delete cei;
1641
0
        return NULL;
1642
0
    }
1643
0
    return cei;
1644
0
}
1645
1646
int32_t
1647
0
RuleBasedCollator::getMaxExpansion(int32_t order) const {
1648
0
    UErrorCode errorCode = U_ZERO_ERROR;
1649
0
    (void)initMaxExpansions(errorCode);
1650
0
    return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1651
0
}
1652
1653
U_NAMESPACE_END
1654
1655
#endif  // !UCONFIG_NO_COLLATION