Coverage Report

Created: 2025-10-24 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/i18n/rulebasedcollator.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 1996-2015, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* rulebasedcollator.cpp
9
*
10
* (replaced the former tblcoll.cpp)
11
*
12
* created on: 2012feb14 with new and old collation code
13
* created by: Markus W. Scherer
14
*/
15
16
#include "unicode/utypes.h"
17
18
#if !UCONFIG_NO_COLLATION
19
20
#include "unicode/coll.h"
21
#include "unicode/coleitr.h"
22
#include "unicode/localpointer.h"
23
#include "unicode/locid.h"
24
#include "unicode/sortkey.h"
25
#include "unicode/tblcoll.h"
26
#include "unicode/ucol.h"
27
#include "unicode/uiter.h"
28
#include "unicode/uloc.h"
29
#include "unicode/uniset.h"
30
#include "unicode/unistr.h"
31
#include "unicode/usetiter.h"
32
#include "unicode/utf8.h"
33
#include "unicode/uversion.h"
34
#include "bocsu.h"
35
#include "charstr.h"
36
#include "cmemory.h"
37
#include "collation.h"
38
#include "collationcompare.h"
39
#include "collationdata.h"
40
#include "collationdatareader.h"
41
#include "collationfastlatin.h"
42
#include "collationiterator.h"
43
#include "collationkeys.h"
44
#include "collationroot.h"
45
#include "collationsets.h"
46
#include "collationsettings.h"
47
#include "collationtailoring.h"
48
#include "cstring.h"
49
#include "uassert.h"
50
#include "ucol_imp.h"
51
#include "uhash.h"
52
#include "uitercollationiterator.h"
53
#include "ulocimp.h"
54
#include "ustr_imp.h"
55
#include "utf16collationiterator.h"
56
#include "utf8collationiterator.h"
57
#include "uvectr64.h"
58
59
U_NAMESPACE_BEGIN
60
61
namespace {
62
63
class FixedSortKeyByteSink : public SortKeyByteSink {
64
public:
65
    FixedSortKeyByteSink(char *dest, int32_t destCapacity)
66
4.06k
            : SortKeyByteSink(dest, destCapacity) {}
67
    virtual ~FixedSortKeyByteSink();
68
69
private:
70
    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
71
    virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
72
};
73
74
FixedSortKeyByteSink::~FixedSortKeyByteSink() {}
75
76
void
77
902k
FixedSortKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t /*n*/, int32_t length) {
78
    // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
79
    // Fill the buffer completely.
80
902k
    int32_t available = capacity_ - length;
81
902k
    if (available > 0) {
82
0
        uprv_memcpy(buffer_ + length, bytes, available);
83
0
    }
84
902k
}
85
86
UBool
87
1.11M
FixedSortKeyByteSink::Resize(int32_t /*appendCapacity*/, int32_t /*length*/) {
88
1.11M
    return false;
89
1.11M
}
90
91
}  // namespace
92
93
// Not in an anonymous namespace, so that it can be a friend of CollationKey.
94
class CollationKeyByteSink : public SortKeyByteSink {
95
public:
96
    CollationKeyByteSink(CollationKey &key)
97
4.06k
            : SortKeyByteSink(reinterpret_cast<char *>(key.getBytes()), key.getCapacity()),
98
4.06k
              key_(key) {}
99
    virtual ~CollationKeyByteSink();
100
101
private:
102
    virtual void AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) override;
103
    virtual UBool Resize(int32_t appendCapacity, int32_t length) override;
104
105
    CollationKey &key_;
106
};
107
108
4.06k
CollationKeyByteSink::~CollationKeyByteSink() {}
109
110
void
111
3.26k
CollationKeyByteSink::AppendBeyondCapacity(const char *bytes, int32_t n, int32_t length) {
112
    // buffer_ != nullptr && bytes != nullptr && n > 0 && appended_ > capacity_
113
3.26k
    if (Resize(n, length)) {
114
3.26k
        uprv_memcpy(buffer_ + length, bytes, n);
115
3.26k
    }
116
3.26k
}
117
118
UBool
119
5.18k
CollationKeyByteSink::Resize(int32_t appendCapacity, int32_t length) {
120
5.18k
    if (buffer_ == nullptr) {
121
0
        return false;  // allocation failed before already
122
0
    }
123
5.18k
    int32_t newCapacity = 2 * capacity_;
124
5.18k
    int32_t altCapacity = length + 2 * appendCapacity;
125
5.18k
    if (newCapacity < altCapacity) {
126
101
        newCapacity = altCapacity;
127
101
    }
128
5.18k
    if (newCapacity < 200) {
129
2.17k
        newCapacity = 200;
130
2.17k
    }
131
5.18k
    uint8_t *newBuffer = key_.reallocate(newCapacity, length);
132
5.18k
    if (newBuffer == nullptr) {
133
0
        SetNotOk();
134
0
        return false;
135
0
    }
136
5.18k
    buffer_ = reinterpret_cast<char *>(newBuffer);
137
5.18k
    capacity_ = newCapacity;
138
5.18k
    return true;
139
5.18k
}
140
141
RuleBasedCollator::RuleBasedCollator(const RuleBasedCollator &other)
142
0
        : Collator(other),
143
0
          data(other.data),
144
0
          settings(other.settings),
145
0
          tailoring(other.tailoring),
146
0
          cacheEntry(other.cacheEntry),
147
0
          validLocale(other.validLocale),
148
0
          explicitlySetAttributes(other.explicitlySetAttributes),
149
0
          actualLocaleIsSameAsValid(other.actualLocaleIsSameAsValid) {
150
0
    settings->addRef();
151
0
    cacheEntry->addRef();
152
0
}
153
154
RuleBasedCollator::RuleBasedCollator(const uint8_t *bin, int32_t length,
155
                                     const RuleBasedCollator *base, UErrorCode &errorCode)
156
0
        : data(nullptr),
157
0
          settings(nullptr),
158
0
          tailoring(nullptr),
159
0
          cacheEntry(nullptr),
160
0
          validLocale(""),
161
0
          explicitlySetAttributes(0),
162
0
          actualLocaleIsSameAsValid(false) {
163
0
    if(U_FAILURE(errorCode)) { return; }
164
0
    if(bin == nullptr || length == 0 || base == nullptr) {
165
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
166
0
        return;
167
0
    }
168
0
    const CollationTailoring *root = CollationRoot::getRoot(errorCode);
169
0
    if(U_FAILURE(errorCode)) { return; }
170
0
    if(base->tailoring != root) {
171
0
        errorCode = U_UNSUPPORTED_ERROR;
172
0
        return;
173
0
    }
174
0
    LocalPointer<CollationTailoring> t(new CollationTailoring(base->tailoring->settings));
175
0
    if(t.isNull() || t->isBogus()) {
176
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
177
0
        return;
178
0
    }
179
0
    CollationDataReader::read(base->tailoring, bin, length, *t, errorCode);
180
0
    if(U_FAILURE(errorCode)) { return; }
181
0
    t->actualLocale.setToBogus();
182
0
    adoptTailoring(t.orphan(), errorCode);
183
0
}
184
185
RuleBasedCollator::RuleBasedCollator(const CollationCacheEntry *entry)
186
13.8k
        : data(entry->tailoring->data),
187
13.8k
          settings(entry->tailoring->settings),
188
13.8k
          tailoring(entry->tailoring),
189
13.8k
          cacheEntry(entry),
190
13.8k
          validLocale(entry->validLocale),
191
13.8k
          explicitlySetAttributes(0),
192
13.8k
          actualLocaleIsSameAsValid(false) {
193
13.8k
    settings->addRef();
194
13.8k
    cacheEntry->addRef();
195
13.8k
}
196
197
21.5k
RuleBasedCollator::~RuleBasedCollator() {
198
21.5k
    SharedObject::clearPtr(settings);
199
21.5k
    SharedObject::clearPtr(cacheEntry);
200
21.5k
}
201
202
void
203
4.06k
RuleBasedCollator::adoptTailoring(CollationTailoring *t, UErrorCode &errorCode) {
204
4.06k
    if(U_FAILURE(errorCode)) {
205
0
        t->deleteIfZeroRefCount();
206
0
        return;
207
0
    }
208
4.06k
    U_ASSERT(settings == nullptr && data == nullptr && tailoring == nullptr && cacheEntry == nullptr);
209
4.06k
    cacheEntry = new CollationCacheEntry(t->actualLocale, t);
210
4.06k
    if(cacheEntry == nullptr) {
211
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
212
0
        t->deleteIfZeroRefCount();
213
0
        return;
214
0
    }
215
4.06k
    data = t->data;
216
4.06k
    settings = t->settings;
217
4.06k
    settings->addRef();
218
4.06k
    tailoring = t;
219
4.06k
    cacheEntry->addRef();
220
4.06k
    validLocale = t->actualLocale;
221
4.06k
    actualLocaleIsSameAsValid = false;
222
4.06k
}
223
224
RuleBasedCollator *
225
0
RuleBasedCollator::clone() const {
226
0
    return new RuleBasedCollator(*this);
227
0
}
228
229
0
RuleBasedCollator &RuleBasedCollator::operator=(const RuleBasedCollator &other) {
230
0
    if(this == &other) { return *this; }
231
0
    SharedObject::copyPtr(other.settings, settings);
232
0
    tailoring = other.tailoring;
233
0
    SharedObject::copyPtr(other.cacheEntry, cacheEntry);
234
0
    data = tailoring->data;
235
0
    validLocale = other.validLocale;
236
0
    explicitlySetAttributes = other.explicitlySetAttributes;
237
0
    actualLocaleIsSameAsValid = other.actualLocaleIsSameAsValid;
238
0
    return *this;
239
0
}
240
241
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(RuleBasedCollator)
242
243
bool
244
0
RuleBasedCollator::operator==(const Collator& other) const {
245
0
    if(this == &other) { return true; }
246
0
    if(!Collator::operator==(other)) { return false; }
247
0
    const RuleBasedCollator &o = static_cast<const RuleBasedCollator &>(other);
248
0
    if(*settings != *o.settings) { return false; }
249
0
    if(data == o.data) { return true; }
250
0
    UBool thisIsRoot = data->base == nullptr;
251
0
    UBool otherIsRoot = o.data->base == nullptr;
252
0
    U_ASSERT(!thisIsRoot || !otherIsRoot);  // otherwise their data pointers should be ==
253
0
    if(thisIsRoot != otherIsRoot) { return false; }
254
0
    if((thisIsRoot || !tailoring->rules.isEmpty()) &&
255
0
            (otherIsRoot || !o.tailoring->rules.isEmpty())) {
256
        // Shortcut: If both collators have valid rule strings, then compare those.
257
0
        if(tailoring->rules == o.tailoring->rules) { return true; }
258
0
    }
259
    // Different rule strings can result in the same or equivalent tailoring.
260
    // The rule strings are optional in ICU resource bundles, although included by default.
261
    // cloneBinary() drops the rule string.
262
0
    UErrorCode errorCode = U_ZERO_ERROR;
263
0
    LocalPointer<UnicodeSet> thisTailored(getTailoredSet(errorCode));
264
0
    LocalPointer<UnicodeSet> otherTailored(o.getTailoredSet(errorCode));
265
0
    if(U_FAILURE(errorCode)) { return false; }
266
0
    if(*thisTailored != *otherTailored) { return false; }
267
    // For completeness, we should compare all of the mappings;
268
    // or we should create a list of strings, sort it with one collator,
269
    // and check if both collators compare adjacent strings the same
270
    // (order & strength, down to quaternary); or similar.
271
    // Testing equality of collators seems unusual.
272
0
    return true;
273
0
}
274
275
int32_t
276
0
RuleBasedCollator::hashCode() const {
277
0
    int32_t h = settings->hashCode();
278
0
    if(data->base == nullptr) { return h; }  // root collator
279
    // Do not rely on the rule string, see comments in operator==().
280
0
    UErrorCode errorCode = U_ZERO_ERROR;
281
0
    LocalPointer<UnicodeSet> set(getTailoredSet(errorCode));
282
0
    if(U_FAILURE(errorCode)) { return 0; }
283
0
    UnicodeSetIterator iter(*set);
284
0
    while(iter.next() && !iter.isString()) {
285
0
        h ^= data->getCE32(iter.getCodepoint());
286
0
    }
287
0
    return h;
288
0
}
289
290
void
291
RuleBasedCollator::setLocales(const Locale &requested, const Locale &valid,
292
0
                              const Locale &actual) {
293
0
    if(actual == tailoring->actualLocale) {
294
0
        actualLocaleIsSameAsValid = false;
295
0
    } else {
296
0
        U_ASSERT(actual == valid);
297
0
        actualLocaleIsSameAsValid = true;
298
0
    }
299
    // Do not modify tailoring.actualLocale:
300
    // We cannot be sure that that would be thread-safe.
301
0
    validLocale = valid;
302
0
    (void)requested;  // Ignore, see also ticket #10477.
303
0
}
304
305
Locale
306
8.12k
RuleBasedCollator::getLocale(ULocDataLocaleType type, UErrorCode& errorCode) const {
307
8.12k
    if(U_FAILURE(errorCode)) {
308
0
        return Locale::getRoot();
309
0
    }
310
8.12k
    switch(type) {
311
4.06k
    case ULOC_ACTUAL_LOCALE:
312
4.06k
        return actualLocaleIsSameAsValid ? validLocale : tailoring->actualLocale;
313
4.06k
    case ULOC_VALID_LOCALE:
314
4.06k
        return validLocale;
315
0
    case ULOC_REQUESTED_LOCALE:
316
0
    default:
317
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
318
0
        return Locale::getRoot();
319
8.12k
    }
320
8.12k
}
321
322
const char *
323
0
RuleBasedCollator::internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const {
324
0
    if(U_FAILURE(errorCode)) {
325
0
        return nullptr;
326
0
    }
327
0
    const Locale *result;
328
0
    switch(type) {
329
0
    case ULOC_ACTUAL_LOCALE:
330
0
        result = actualLocaleIsSameAsValid ? &validLocale : &tailoring->actualLocale;
331
0
        break;
332
0
    case ULOC_VALID_LOCALE:
333
0
        result = &validLocale;
334
0
        break;
335
0
    case ULOC_REQUESTED_LOCALE:
336
0
    default:
337
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
338
0
        return nullptr;
339
0
    }
340
0
    if(result->isBogus()) { return nullptr; }
341
0
    const char *id = result->getName();
342
0
    return id[0] == 0 ? "root" : id;
343
0
}
344
345
const UnicodeString&
346
0
RuleBasedCollator::getRules() const {
347
0
    return tailoring->rules;
348
0
}
349
350
void
351
0
RuleBasedCollator::getRules(UColRuleOption delta, UnicodeString &buffer) const {
352
0
    if(delta == UCOL_TAILORING_ONLY) {
353
0
        buffer = tailoring->rules;
354
0
        return;
355
0
    }
356
    // UCOL_FULL_RULES
357
0
    buffer.remove();
358
0
    CollationLoader::appendRootRules(buffer);
359
0
    buffer.append(tailoring->rules).getTerminatedBuffer();
360
0
}
361
362
void
363
0
RuleBasedCollator::getVersion(UVersionInfo version) const {
364
0
    uprv_memcpy(version, tailoring->version, U_MAX_VERSION_LENGTH);
365
0
    version[0] += (UCOL_RUNTIME_VERSION << 4) + (UCOL_RUNTIME_VERSION >> 4);
366
0
}
367
368
UnicodeSet *
369
4.06k
RuleBasedCollator::getTailoredSet(UErrorCode &errorCode) const {
370
4.06k
    if(U_FAILURE(errorCode)) { return nullptr; }
371
4.06k
    UnicodeSet *tailored = new UnicodeSet();
372
4.06k
    if(tailored == nullptr) {
373
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
374
0
        return nullptr;
375
0
    }
376
4.06k
    if(data->base != nullptr) {
377
3.82k
        TailoredSet(tailored).forData(data, errorCode);
378
3.82k
        if(U_FAILURE(errorCode)) {
379
0
            delete tailored;
380
0
            return nullptr;
381
0
        }
382
3.82k
    }
383
4.06k
    return tailored;
384
4.06k
}
385
386
void
387
RuleBasedCollator::internalGetContractionsAndExpansions(
388
        UnicodeSet *contractions, UnicodeSet *expansions,
389
0
        UBool addPrefixes, UErrorCode &errorCode) const {
390
0
    if(U_FAILURE(errorCode)) { return; }
391
0
    if(contractions != nullptr) {
392
0
        contractions->clear();
393
0
    }
394
0
    if(expansions != nullptr) {
395
0
        expansions->clear();
396
0
    }
397
0
    ContractionsAndExpansions(contractions, expansions, nullptr, addPrefixes).forData(data, errorCode);
398
0
}
399
400
void
401
0
RuleBasedCollator::internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const {
402
0
    if(U_FAILURE(errorCode)) { return; }
403
0
    ContractionsAndExpansions(&set, nullptr, nullptr, false).forCodePoint(data, c, errorCode);
404
0
}
405
406
const CollationSettings &
407
6.80k
RuleBasedCollator::getDefaultSettings() const {
408
6.80k
    return *tailoring->settings;
409
6.80k
}
410
411
UColAttributeValue
412
11.9k
RuleBasedCollator::getAttribute(UColAttribute attr, UErrorCode &errorCode) const {
413
11.9k
    if(U_FAILURE(errorCode)) { return UCOL_DEFAULT; }
414
11.9k
    int32_t option;
415
11.9k
    switch(attr) {
416
27
    case UCOL_FRENCH_COLLATION:
417
27
        option = CollationSettings::BACKWARD_SECONDARY;
418
27
        break;
419
11
    case UCOL_ALTERNATE_HANDLING:
420
11
        return settings->getAlternateHandling();
421
15
    case UCOL_CASE_FIRST:
422
15
        return settings->getCaseFirst();
423
33
    case UCOL_CASE_LEVEL:
424
33
        option = CollationSettings::CASE_LEVEL;
425
33
        break;
426
34
    case UCOL_NORMALIZATION_MODE:
427
34
        option = CollationSettings::CHECK_FCD;
428
34
        break;
429
11.7k
    case UCOL_STRENGTH:
430
11.7k
        return static_cast<UColAttributeValue>(settings->getStrength());
431
0
    case UCOL_HIRAGANA_QUATERNARY_MODE:
432
        // Deprecated attribute, unsettable.
433
0
        return UCOL_OFF;
434
35
    case UCOL_NUMERIC_COLLATION:
435
35
        option = CollationSettings::NUMERIC;
436
35
        break;
437
0
    default:
438
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
439
0
        return UCOL_DEFAULT;
440
11.9k
    }
441
129
    return ((settings->options & option) == 0) ? UCOL_OFF : UCOL_ON;
442
11.9k
}
443
444
void
445
RuleBasedCollator::setAttribute(UColAttribute attr, UColAttributeValue value,
446
7.89k
                                UErrorCode &errorCode) {
447
7.89k
    UColAttributeValue oldValue = getAttribute(attr, errorCode);
448
7.89k
    if(U_FAILURE(errorCode)) { return; }
449
7.89k
    if(value == oldValue) {
450
1.29k
        setAttributeExplicitly(attr);
451
1.29k
        return;
452
1.29k
    }
453
6.59k
    const CollationSettings &defaultSettings = getDefaultSettings();
454
6.59k
    if(settings == &defaultSettings) {
455
6.52k
        if(value == UCOL_DEFAULT) {
456
0
            setAttributeDefault(attr);
457
0
            return;
458
0
        }
459
6.52k
    }
460
6.59k
    CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
461
6.59k
    if(ownedSettings == nullptr) {
462
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
463
0
        return;
464
0
    }
465
466
6.59k
    switch(attr) {
467
27
    case UCOL_FRENCH_COLLATION:
468
27
        ownedSettings->setFlag(CollationSettings::BACKWARD_SECONDARY, value,
469
27
                               defaultSettings.options, errorCode);
470
27
        break;
471
11
    case UCOL_ALTERNATE_HANDLING:
472
11
        ownedSettings->setAlternateHandling(value, defaultSettings.options, errorCode);
473
11
        break;
474
15
    case UCOL_CASE_FIRST:
475
15
        ownedSettings->setCaseFirst(value, defaultSettings.options, errorCode);
476
15
        break;
477
33
    case UCOL_CASE_LEVEL:
478
33
        ownedSettings->setFlag(CollationSettings::CASE_LEVEL, value,
479
33
                               defaultSettings.options, errorCode);
480
33
        break;
481
31
    case UCOL_NORMALIZATION_MODE:
482
31
        ownedSettings->setFlag(CollationSettings::CHECK_FCD, value,
483
31
                               defaultSettings.options, errorCode);
484
31
        break;
485
6.44k
    case UCOL_STRENGTH:
486
6.44k
        ownedSettings->setStrength(value, defaultSettings.options, errorCode);
487
6.44k
        break;
488
0
    case UCOL_HIRAGANA_QUATERNARY_MODE:
489
        // Deprecated attribute. Check for valid values but do not change anything.
490
0
        if(value != UCOL_OFF && value != UCOL_ON && value != UCOL_DEFAULT) {
491
0
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
492
0
        }
493
0
        break;
494
35
    case UCOL_NUMERIC_COLLATION:
495
35
        ownedSettings->setFlag(CollationSettings::NUMERIC, value, defaultSettings.options, errorCode);
496
35
        break;
497
0
    default:
498
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
499
0
        break;
500
6.59k
    }
501
6.59k
    if(U_FAILURE(errorCode)) { return; }
502
6.54k
    setFastLatinOptions(*ownedSettings);
503
6.54k
    if(value == UCOL_DEFAULT) {
504
0
        setAttributeDefault(attr);
505
6.54k
    } else {
506
6.54k
        setAttributeExplicitly(attr);
507
6.54k
    }
508
6.54k
}
509
510
Collator &
511
13
RuleBasedCollator::setMaxVariable(UColReorderCode group, UErrorCode &errorCode) {
512
13
    if(U_FAILURE(errorCode)) { return *this; }
513
    // Convert the reorder code into a MaxVariable number, or UCOL_DEFAULT=-1.
514
13
    int32_t value;
515
13
    if(group == UCOL_REORDER_CODE_DEFAULT) {
516
0
        value = UCOL_DEFAULT;
517
13
    } else if(UCOL_REORDER_CODE_FIRST <= group && group <= UCOL_REORDER_CODE_CURRENCY) {
518
12
        value = group - UCOL_REORDER_CODE_FIRST;
519
12
    } else {
520
1
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
521
1
        return *this;
522
1
    }
523
12
    CollationSettings::MaxVariable oldValue = settings->getMaxVariable();
524
12
    if(value == oldValue) {
525
1
        setAttributeExplicitly(ATTR_VARIABLE_TOP);
526
1
        return *this;
527
1
    }
528
11
    const CollationSettings &defaultSettings = getDefaultSettings();
529
11
    if(settings == &defaultSettings) {
530
4
        if(value == UCOL_DEFAULT) {
531
0
            setAttributeDefault(ATTR_VARIABLE_TOP);
532
0
            return *this;
533
0
        }
534
4
    }
535
11
    CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
536
11
    if(ownedSettings == nullptr) {
537
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
538
0
        return *this;
539
0
    }
540
541
11
    if(group == UCOL_REORDER_CODE_DEFAULT) {
542
0
        group = static_cast<UColReorderCode>(
543
0
            UCOL_REORDER_CODE_FIRST + int32_t{defaultSettings.getMaxVariable()});
544
0
    }
545
11
    uint32_t varTop = data->getLastPrimaryForGroup(group);
546
11
    U_ASSERT(varTop != 0);
547
11
    ownedSettings->setMaxVariable(value, defaultSettings.options, errorCode);
548
11
    if(U_FAILURE(errorCode)) { return *this; }
549
11
    ownedSettings->variableTop = varTop;
550
11
    setFastLatinOptions(*ownedSettings);
551
11
    if(value == UCOL_DEFAULT) {
552
0
        setAttributeDefault(ATTR_VARIABLE_TOP);
553
11
    } else {
554
11
        setAttributeExplicitly(ATTR_VARIABLE_TOP);
555
11
    }
556
11
    return *this;
557
11
}
558
559
UColReorderCode
560
4.06k
RuleBasedCollator::getMaxVariable() const {
561
4.06k
    return static_cast<UColReorderCode>(UCOL_REORDER_CODE_FIRST + int32_t{settings->getMaxVariable()});
562
4.06k
}
563
564
uint32_t
565
4.06k
RuleBasedCollator::getVariableTop(UErrorCode & /*errorCode*/) const {
566
4.06k
    return settings->variableTop;
567
4.06k
}
568
569
uint32_t
570
0
RuleBasedCollator::setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &errorCode) {
571
0
    if(U_FAILURE(errorCode)) { return 0; }
572
0
    if(varTop == nullptr && len !=0) {
573
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
574
0
        return 0;
575
0
    }
576
0
    if(len < 0) { len = u_strlen(varTop); }
577
0
    if(len == 0) {
578
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
579
0
        return 0;
580
0
    }
581
0
    UBool numeric = settings->isNumeric();
582
0
    int64_t ce1, ce2;
583
0
    if(settings->dontCheckFCD()) {
584
0
        UTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
585
0
        ce1 = ci.nextCE(errorCode);
586
0
        ce2 = ci.nextCE(errorCode);
587
0
    } else {
588
0
        FCDUTF16CollationIterator ci(data, numeric, varTop, varTop, varTop + len);
589
0
        ce1 = ci.nextCE(errorCode);
590
0
        ce2 = ci.nextCE(errorCode);
591
0
    }
592
0
    if(ce1 == Collation::NO_CE || ce2 != Collation::NO_CE) {
593
0
        errorCode = U_CE_NOT_FOUND_ERROR;
594
0
        return 0;
595
0
    }
596
0
    setVariableTop(static_cast<uint32_t>(ce1 >> 32), errorCode);
597
0
    return settings->variableTop;
598
0
}
599
600
uint32_t
601
0
RuleBasedCollator::setVariableTop(const UnicodeString &varTop, UErrorCode &errorCode) {
602
0
    return setVariableTop(varTop.getBuffer(), varTop.length(), errorCode);
603
0
}
604
605
void
606
0
RuleBasedCollator::setVariableTop(uint32_t varTop, UErrorCode &errorCode) {
607
0
    if(U_FAILURE(errorCode)) { return; }
608
0
    if(varTop != settings->variableTop) {
609
        // Pin the variable top to the end of the reordering group which contains it.
610
        // Only a few special groups are supported.
611
0
        int32_t group = data->getGroupForPrimary(varTop);
612
0
        if(group < UCOL_REORDER_CODE_FIRST || UCOL_REORDER_CODE_CURRENCY < group) {
613
0
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
614
0
            return;
615
0
        }
616
0
        uint32_t v = data->getLastPrimaryForGroup(group);
617
0
        U_ASSERT(v != 0 && v >= varTop);
618
0
        varTop = v;
619
0
        if(varTop != settings->variableTop) {
620
0
            CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
621
0
            if(ownedSettings == nullptr) {
622
0
                errorCode = U_MEMORY_ALLOCATION_ERROR;
623
0
                return;
624
0
            }
625
0
            ownedSettings->setMaxVariable(group - UCOL_REORDER_CODE_FIRST,
626
0
                                          getDefaultSettings().options, errorCode);
627
0
            if(U_FAILURE(errorCode)) { return; }
628
0
            ownedSettings->variableTop = varTop;
629
0
            setFastLatinOptions(*ownedSettings);
630
0
        }
631
0
    }
632
0
    if(varTop == getDefaultSettings().variableTop) {
633
0
        setAttributeDefault(ATTR_VARIABLE_TOP);
634
0
    } else {
635
0
        setAttributeExplicitly(ATTR_VARIABLE_TOP);
636
0
    }
637
0
}
638
639
int32_t
640
RuleBasedCollator::getReorderCodes(int32_t *dest, int32_t capacity,
641
0
                                   UErrorCode &errorCode) const {
642
0
    if(U_FAILURE(errorCode)) { return 0; }
643
0
    if(capacity < 0 || (dest == nullptr && capacity > 0)) {
644
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
645
0
        return 0;
646
0
    }
647
0
    int32_t length = settings->reorderCodesLength;
648
0
    if(length == 0) { return 0; }
649
0
    if(length > capacity) {
650
0
        errorCode = U_BUFFER_OVERFLOW_ERROR;
651
0
        return length;
652
0
    }
653
0
    uprv_memcpy(dest, settings->reorderCodes, length * 4);
654
0
    return length;
655
0
}
656
657
void
658
RuleBasedCollator::setReorderCodes(const int32_t *reorderCodes, int32_t length,
659
205
                                   UErrorCode &errorCode) {
660
205
    if(U_FAILURE(errorCode)) { return; }
661
205
    if(length < 0 || (reorderCodes == nullptr && length > 0)) {
662
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
663
0
        return;
664
0
    }
665
205
    if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_NONE) {
666
2
        length = 0;
667
2
    }
668
205
    if(length == settings->reorderCodesLength &&
669
205
            uprv_memcmp(reorderCodes, settings->reorderCodes, length * 4) == 0) {
670
1
        return;
671
1
    }
672
204
    const CollationSettings &defaultSettings = getDefaultSettings();
673
204
    if(length == 1 && reorderCodes[0] == UCOL_REORDER_CODE_DEFAULT) {
674
0
        if(settings != &defaultSettings) {
675
0
            CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
676
0
            if(ownedSettings == nullptr) {
677
0
                errorCode = U_MEMORY_ALLOCATION_ERROR;
678
0
                return;
679
0
            }
680
0
            ownedSettings->copyReorderingFrom(defaultSettings, errorCode);
681
0
            setFastLatinOptions(*ownedSettings);
682
0
        }
683
0
        return;
684
0
    }
685
204
    CollationSettings *ownedSettings = SharedObject::copyOnWrite(settings);
686
204
    if(ownedSettings == nullptr) {
687
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
688
0
        return;
689
0
    }
690
204
    ownedSettings->setReordering(*data, reorderCodes, length, errorCode);
691
204
    setFastLatinOptions(*ownedSettings);
692
204
}
693
694
void
695
6.76k
RuleBasedCollator::setFastLatinOptions(CollationSettings &ownedSettings) const {
696
6.76k
    ownedSettings.fastLatinOptions = CollationFastLatin::getOptions(
697
6.76k
            data, ownedSettings,
698
6.76k
            ownedSettings.fastLatinPrimaries, UPRV_LENGTHOF(ownedSettings.fastLatinPrimaries));
699
6.76k
}
700
701
UCollationResult
702
RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
703
0
                           UErrorCode &errorCode) const {
704
0
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
705
0
    return doCompare(left.getBuffer(), left.length(),
706
0
                     right.getBuffer(), right.length(), errorCode);
707
0
}
708
709
UCollationResult
710
RuleBasedCollator::compare(const UnicodeString &left, const UnicodeString &right,
711
0
                           int32_t length, UErrorCode &errorCode) const {
712
0
    if(U_FAILURE(errorCode) || length == 0) { return UCOL_EQUAL; }
713
0
    if(length < 0) {
714
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
715
0
        return UCOL_EQUAL;
716
0
    }
717
0
    int32_t leftLength = left.length();
718
0
    int32_t rightLength = right.length();
719
0
    if(leftLength > length) { leftLength = length; }
720
0
    if(rightLength > length) { rightLength = length; }
721
0
    return doCompare(left.getBuffer(), leftLength,
722
0
                     right.getBuffer(), rightLength, errorCode);
723
0
}
724
725
UCollationResult
726
RuleBasedCollator::compare(const char16_t *left, int32_t leftLength,
727
                           const char16_t *right, int32_t rightLength,
728
7.71k
                           UErrorCode &errorCode) const {
729
7.71k
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
730
7.71k
    if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
731
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
732
0
        return UCOL_EQUAL;
733
0
    }
734
    // Make sure both or neither strings have a known length.
735
    // We do not optimize for mixed length/termination.
736
7.71k
    if(leftLength >= 0) {
737
7.71k
        if(rightLength < 0) { rightLength = u_strlen(right); }
738
7.71k
    } else {
739
0
        if(rightLength >= 0) { leftLength = u_strlen(left); }
740
0
    }
741
7.71k
    return doCompare(left, leftLength, right, rightLength, errorCode);
742
7.71k
}
743
744
UCollationResult
745
RuleBasedCollator::compareUTF8(const StringPiece &left, const StringPiece &right,
746
0
                               UErrorCode &errorCode) const {
747
0
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
748
0
    const uint8_t *leftBytes = reinterpret_cast<const uint8_t *>(left.data());
749
0
    const uint8_t *rightBytes = reinterpret_cast<const uint8_t *>(right.data());
750
0
    if((leftBytes == nullptr && !left.empty()) || (rightBytes == nullptr && !right.empty())) {
751
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
752
0
        return UCOL_EQUAL;
753
0
    }
754
0
    return doCompare(leftBytes, left.length(), rightBytes, right.length(), errorCode);
755
0
}
756
757
UCollationResult
758
RuleBasedCollator::internalCompareUTF8(const char *left, int32_t leftLength,
759
                                       const char *right, int32_t rightLength,
760
0
                                       UErrorCode &errorCode) const {
761
0
    if(U_FAILURE(errorCode)) { return UCOL_EQUAL; }
762
0
    if((left == nullptr && leftLength != 0) || (right == nullptr && rightLength != 0)) {
763
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
764
0
        return UCOL_EQUAL;
765
0
    }
766
    // Make sure both or neither strings have a known length.
767
    // We do not optimize for mixed length/termination.
768
0
    if(leftLength >= 0) {
769
0
        if(rightLength < 0) { rightLength = static_cast<int32_t>(uprv_strlen(right)); }
770
0
    } else {
771
0
        if(rightLength >= 0) { leftLength = static_cast<int32_t>(uprv_strlen(left)); }
772
0
    }
773
0
    return doCompare(reinterpret_cast<const uint8_t *>(left), leftLength,
774
0
                     reinterpret_cast<const uint8_t *>(right), rightLength, errorCode);
775
0
}
776
777
namespace {
778
779
/**
780
 * Abstract iterator for identical-level string comparisons.
781
 * Returns FCD code points and handles temporary switching to NFD.
782
 */
783
class NFDIterator : public UObject {
784
public:
785
1.67k
    NFDIterator() : index(-1), length(0) {}
786
0
    virtual ~NFDIterator() {}
787
    /**
788
     * Returns the next code point from the internal normalization buffer,
789
     * or else the next text code point.
790
     * Returns -1 at the end of the text.
791
     */
792
23.0k
    UChar32 nextCodePoint() {
793
23.0k
        if(index >= 0) {
794
1.65k
            if(index == length) {
795
1.50k
                index = -1;
796
1.50k
            } else {
797
146
                UChar32 c;
798
146
                U16_NEXT_UNSAFE(decomp, index, c);
799
146
                return c;
800
146
            }
801
1.65k
        }
802
22.9k
        return nextRawCodePoint();
803
23.0k
    }
804
    /**
805
     * @param nfcImpl
806
     * @param c the last code point returned by nextCodePoint() or nextDecomposedCodePoint()
807
     * @return the first code point in c's decomposition,
808
     *         or c itself if it was decomposed already or if it does not decompose
809
     */
810
4.37k
    UChar32 nextDecomposedCodePoint(const Normalizer2Impl &nfcImpl, UChar32 c) {
811
4.37k
        if(index >= 0) { return c; }
812
4.32k
        decomp = nfcImpl.getDecomposition(c, buffer, length);
813
4.32k
        if(decomp == nullptr) { return c; }
814
1.61k
        index = 0;
815
1.61k
        U16_NEXT_UNSAFE(decomp, index, c);
816
1.61k
        return c;
817
4.32k
    }
818
protected:
819
    /**
820
     * Returns the next text code point in FCD order.
821
     * Returns -1 at the end of the text.
822
     */
823
    virtual UChar32 nextRawCodePoint() = 0;
824
private:
825
    const char16_t *decomp;
826
    char16_t buffer[4];
827
    int32_t index;
828
    int32_t length;
829
};
830
831
class UTF16NFDIterator : public NFDIterator {
832
public:
833
1.67k
    UTF16NFDIterator(const char16_t *text, const char16_t *textLimit) : s(text), limit(textLimit) {}
834
protected:
835
22.9k
    virtual UChar32 nextRawCodePoint() override {
836
22.9k
        if(s == limit) { return U_SENTINEL; }
837
22.6k
        UChar32 c = *s++;
838
22.6k
        if(limit == nullptr && c == 0) {
839
0
            s = nullptr;
840
0
            return U_SENTINEL;
841
0
        }
842
22.6k
        char16_t trail;
843
22.6k
        if(U16_IS_LEAD(c) && s != limit && U16_IS_TRAIL(trail = *s)) {
844
968
            ++s;
845
968
            c = U16_GET_SUPPLEMENTARY(c, trail);
846
968
        }
847
22.6k
        return c;
848
22.6k
    }
849
850
    const char16_t *s;
851
    const char16_t *limit;
852
};
853
854
class FCDUTF16NFDIterator : public UTF16NFDIterator {
855
public:
856
    FCDUTF16NFDIterator(const Normalizer2Impl &nfcImpl, const char16_t *text, const char16_t *textLimit)
857
1.17k
            : UTF16NFDIterator(nullptr, nullptr) {
858
1.17k
        UErrorCode errorCode = U_ZERO_ERROR;
859
1.17k
        const char16_t *spanLimit = nfcImpl.makeFCD(text, textLimit, nullptr, errorCode);
860
1.17k
        if(U_FAILURE(errorCode)) { return; }
861
1.17k
        if(spanLimit == textLimit || (textLimit == nullptr && *spanLimit == 0)) {
862
450
            s = text;
863
450
            limit = spanLimit;
864
726
        } else {
865
726
            str.setTo(text, static_cast<int32_t>(spanLimit - text));
866
726
            {
867
726
                ReorderingBuffer r_buffer(nfcImpl, str);
868
726
                if(r_buffer.init(str.length(), errorCode)) {
869
726
                    nfcImpl.makeFCD(spanLimit, textLimit, &r_buffer, errorCode);
870
726
                }
871
726
            }
872
726
            if(U_SUCCESS(errorCode)) {
873
726
                s = str.getBuffer();
874
726
                limit = s + str.length();
875
726
            }
876
726
        }
877
1.17k
    }
878
private:
879
    UnicodeString str;
880
};
881
882
class UTF8NFDIterator : public NFDIterator {
883
public:
884
    UTF8NFDIterator(const uint8_t *text, int32_t textLength)
885
0
        : s(text), pos(0), length(textLength) {}
886
protected:
887
0
    virtual UChar32 nextRawCodePoint() override {
888
0
        if(pos == length || (s[pos] == 0 && length < 0)) { return U_SENTINEL; }
889
0
        UChar32 c;
890
0
        U8_NEXT_OR_FFFD(s, pos, length, c);
891
0
        return c;
892
0
    }
893
894
    const uint8_t *s;
895
    int32_t pos;
896
    int32_t length;
897
};
898
899
class FCDUTF8NFDIterator : public NFDIterator {
900
public:
901
    FCDUTF8NFDIterator(const CollationData *data, const uint8_t *text, int32_t textLength)
902
0
            : u8ci(data, false, text, 0, textLength) {}
903
protected:
904
0
    virtual UChar32 nextRawCodePoint() override {
905
0
        UErrorCode errorCode = U_ZERO_ERROR;
906
0
        return u8ci.nextCodePoint(errorCode);
907
0
    }
908
private:
909
    FCDUTF8CollationIterator u8ci;
910
};
911
912
class UIterNFDIterator : public NFDIterator {
913
public:
914
0
    UIterNFDIterator(UCharIterator &it) : iter(it) {}
915
protected:
916
0
    virtual UChar32 nextRawCodePoint() override {
917
0
        return uiter_next32(&iter);
918
0
    }
919
private:
920
    UCharIterator &iter;
921
};
922
923
class FCDUIterNFDIterator : public NFDIterator {
924
public:
925
    FCDUIterNFDIterator(const CollationData *data, UCharIterator &it, int32_t startIndex)
926
0
            : uici(data, false, it, startIndex) {}
927
protected:
928
0
    virtual UChar32 nextRawCodePoint() override {
929
0
        UErrorCode errorCode = U_ZERO_ERROR;
930
0
        return uici.nextCodePoint(errorCode);
931
0
    }
932
private:
933
    FCDUIterCollationIterator uici;
934
};
935
936
UCollationResult compareNFDIter(const Normalizer2Impl &nfcImpl,
937
836
                                NFDIterator &left, NFDIterator &right) {
938
11.5k
    for(;;) {
939
        // Fetch the next FCD code point from each string.
940
11.5k
        UChar32 leftCp = left.nextCodePoint();
941
11.5k
        UChar32 rightCp = right.nextCodePoint();
942
11.5k
        if(leftCp == rightCp) {
943
9.33k
            if(leftCp < 0) { break; }
944
9.18k
            continue;
945
9.33k
        }
946
        // If they are different, then decompose each and compare again.
947
2.20k
        if(leftCp < 0) {
948
9
            leftCp = -2;  // end of string
949
2.19k
        } else if(leftCp == 0xfffe) {
950
1
            leftCp = -1;  // U+FFFE: merge separator
951
2.19k
        } else {
952
2.19k
            leftCp = left.nextDecomposedCodePoint(nfcImpl, leftCp);
953
2.19k
        }
954
2.20k
        if(rightCp < 0) {
955
15
            rightCp = -2;  // end of string
956
2.19k
        } else if(rightCp == 0xfffe) {
957
13
            rightCp = -1;  // U+FFFE: merge separator
958
2.17k
        } else {
959
2.17k
            rightCp = right.nextDecomposedCodePoint(nfcImpl, rightCp);
960
2.17k
        }
961
2.20k
        if(leftCp < rightCp) { return UCOL_LESS; }
962
1.90k
        if(leftCp > rightCp) { return UCOL_GREATER; }
963
1.90k
    }
964
146
    return UCOL_EQUAL;
965
836
}
966
967
}  // namespace
968
969
UCollationResult
970
RuleBasedCollator::doCompare(const char16_t *left, int32_t leftLength,
971
                             const char16_t *right, int32_t rightLength,
972
7.71k
                             UErrorCode &errorCode) const {
973
    // U_FAILURE(errorCode) checked by caller.
974
7.71k
    if(left == right && leftLength == rightLength) {
975
0
        return UCOL_EQUAL;
976
0
    }
977
978
    // Identical-prefix test.
979
7.71k
    const char16_t *leftLimit;
980
7.71k
    const char16_t *rightLimit;
981
7.71k
    int32_t equalPrefixLength = 0;
982
7.71k
    if(leftLength < 0) {
983
0
        leftLimit = nullptr;
984
0
        rightLimit = nullptr;
985
0
        char16_t c;
986
0
        while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
987
0
            if(c == 0) { return UCOL_EQUAL; }
988
0
            ++equalPrefixLength;
989
0
        }
990
7.71k
    } else {
991
7.71k
        leftLimit = left + leftLength;
992
7.71k
        rightLimit = right + rightLength;
993
18.0k
        for(;;) {
994
18.0k
            if(equalPrefixLength == leftLength) {
995
453
                if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
996
0
                break;
997
17.6k
            } else if(equalPrefixLength == rightLength ||
998
17.6k
                      left[equalPrefixLength] != right[equalPrefixLength]) {
999
7.26k
                break;
1000
7.26k
            }
1001
10.3k
            ++equalPrefixLength;
1002
10.3k
        }
1003
7.71k
    }
1004
1005
7.26k
    UBool numeric = settings->isNumeric();
1006
7.26k
    if(equalPrefixLength > 0) {
1007
951
        if((equalPrefixLength != leftLength &&
1008
951
                    data->isUnsafeBackward(left[equalPrefixLength], numeric)) ||
1009
340
                (equalPrefixLength != rightLength &&
1010
765
                    data->isUnsafeBackward(right[equalPrefixLength], numeric))) {
1011
            // Identical prefix: Back up to the start of a contraction or reordering sequence.
1012
6.46k
            while(--equalPrefixLength > 0 &&
1013
5.73k
                    data->isUnsafeBackward(left[equalPrefixLength], numeric)) {}
1014
765
        }
1015
        // Notes:
1016
        // - A longer string can compare equal to a prefix of it if only ignorables follow.
1017
        // - With a backward level, a longer string can compare less-than a prefix of it.
1018
1019
        // Pass the actual start of each string into the CollationIterators,
1020
        // plus the equalPrefixLength position,
1021
        // so that prefix matches back into the equal prefix work.
1022
951
    }
1023
1024
7.26k
    int32_t result;
1025
7.26k
    int32_t fastLatinOptions = settings->fastLatinOptions;
1026
7.26k
    if(fastLatinOptions >= 0 &&
1027
7.25k
            (equalPrefixLength == leftLength ||
1028
7.25k
                left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX) &&
1029
1.35k
            (equalPrefixLength == rightLength ||
1030
1.35k
                right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX)) {
1031
802
        if(leftLength >= 0) {
1032
802
            result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1033
802
                                                      settings->fastLatinPrimaries,
1034
802
                                                      fastLatinOptions,
1035
802
                                                      left + equalPrefixLength,
1036
802
                                                      leftLength - equalPrefixLength,
1037
802
                                                      right + equalPrefixLength,
1038
802
                                                      rightLength - equalPrefixLength);
1039
802
        } else {
1040
0
            result = CollationFastLatin::compareUTF16(data->fastLatinTable,
1041
0
                                                      settings->fastLatinPrimaries,
1042
0
                                                      fastLatinOptions,
1043
0
                                                      left + equalPrefixLength, -1,
1044
0
                                                      right + equalPrefixLength, -1);
1045
0
        }
1046
6.46k
    } else {
1047
6.46k
        result = CollationFastLatin::BAIL_OUT_RESULT;
1048
6.46k
    }
1049
1050
7.26k
    if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1051
6.84k
        if(settings->dontCheckFCD()) {
1052
4.41k
            UTF16CollationIterator leftIter(data, numeric,
1053
4.41k
                                            left, left + equalPrefixLength, leftLimit);
1054
4.41k
            UTF16CollationIterator rightIter(data, numeric,
1055
4.41k
                                            right, right + equalPrefixLength, rightLimit);
1056
4.41k
            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1057
4.41k
        } else {
1058
2.43k
            FCDUTF16CollationIterator leftIter(data, numeric,
1059
2.43k
                                              left, left + equalPrefixLength, leftLimit);
1060
2.43k
            FCDUTF16CollationIterator rightIter(data, numeric,
1061
2.43k
                                                right, right + equalPrefixLength, rightLimit);
1062
2.43k
            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1063
2.43k
        }
1064
6.84k
    }
1065
7.26k
    if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1066
6.42k
        return static_cast<UCollationResult>(result);
1067
6.42k
    }
1068
1069
    // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1070
    // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1071
    // and the benefit seems unlikely to be measurable.
1072
1073
    // Compare identical level.
1074
836
    const Normalizer2Impl &nfcImpl = data->nfcImpl;
1075
836
    left += equalPrefixLength;
1076
836
    right += equalPrefixLength;
1077
836
    if(settings->dontCheckFCD()) {
1078
248
        UTF16NFDIterator leftIter(left, leftLimit);
1079
248
        UTF16NFDIterator rightIter(right, rightLimit);
1080
248
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1081
588
    } else {
1082
588
        FCDUTF16NFDIterator leftIter(nfcImpl, left, leftLimit);
1083
588
        FCDUTF16NFDIterator rightIter(nfcImpl, right, rightLimit);
1084
588
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1085
588
    }
1086
836
}
1087
1088
UCollationResult
1089
RuleBasedCollator::doCompare(const uint8_t *left, int32_t leftLength,
1090
                             const uint8_t *right, int32_t rightLength,
1091
0
                             UErrorCode &errorCode) const {
1092
    // U_FAILURE(errorCode) checked by caller.
1093
0
    if(left == right && leftLength == rightLength) {
1094
0
        return UCOL_EQUAL;
1095
0
    }
1096
1097
    // Identical-prefix test.
1098
0
    int32_t equalPrefixLength = 0;
1099
0
    if(leftLength < 0) {
1100
0
        uint8_t c;
1101
0
        while((c = left[equalPrefixLength]) == right[equalPrefixLength]) {
1102
0
            if(c == 0) { return UCOL_EQUAL; }
1103
0
            ++equalPrefixLength;
1104
0
        }
1105
0
    } else {
1106
0
        for(;;) {
1107
0
            if(equalPrefixLength == leftLength) {
1108
0
                if(equalPrefixLength == rightLength) { return UCOL_EQUAL; }
1109
0
                break;
1110
0
            } else if(equalPrefixLength == rightLength ||
1111
0
                      left[equalPrefixLength] != right[equalPrefixLength]) {
1112
0
                break;
1113
0
            }
1114
0
            ++equalPrefixLength;
1115
0
        }
1116
0
    }
1117
    // Back up to the start of a partially-equal code point.
1118
0
    if(equalPrefixLength > 0 &&
1119
0
            ((equalPrefixLength != leftLength && U8_IS_TRAIL(left[equalPrefixLength])) ||
1120
0
            (equalPrefixLength != rightLength && U8_IS_TRAIL(right[equalPrefixLength])))) {
1121
0
        while(--equalPrefixLength > 0 && U8_IS_TRAIL(left[equalPrefixLength])) {}
1122
0
    }
1123
1124
0
    UBool numeric = settings->isNumeric();
1125
0
    if(equalPrefixLength > 0) {
1126
0
        UBool unsafe = false;
1127
0
        if(equalPrefixLength != leftLength) {
1128
0
            int32_t i = equalPrefixLength;
1129
0
            UChar32 c;
1130
0
            U8_NEXT_OR_FFFD(left, i, leftLength, c);
1131
0
            unsafe = data->isUnsafeBackward(c, numeric);
1132
0
        }
1133
0
        if(!unsafe && equalPrefixLength != rightLength) {
1134
0
            int32_t i = equalPrefixLength;
1135
0
            UChar32 c;
1136
0
            U8_NEXT_OR_FFFD(right, i, rightLength, c);
1137
0
            unsafe = data->isUnsafeBackward(c, numeric);
1138
0
        }
1139
0
        if(unsafe) {
1140
            // Identical prefix: Back up to the start of a contraction or reordering sequence.
1141
0
            UChar32 c;
1142
0
            do {
1143
0
                U8_PREV_OR_FFFD(left, 0, equalPrefixLength, c);
1144
0
            } while(equalPrefixLength > 0 && data->isUnsafeBackward(c, numeric));
1145
0
        }
1146
        // See the notes in the UTF-16 version.
1147
1148
        // Pass the actual start of each string into the CollationIterators,
1149
        // plus the equalPrefixLength position,
1150
        // so that prefix matches back into the equal prefix work.
1151
0
    }
1152
1153
0
    int32_t result;
1154
0
    int32_t fastLatinOptions = settings->fastLatinOptions;
1155
0
    if(fastLatinOptions >= 0 &&
1156
0
            (equalPrefixLength == leftLength ||
1157
0
                left[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD) &&
1158
0
            (equalPrefixLength == rightLength ||
1159
0
                right[equalPrefixLength] <= CollationFastLatin::LATIN_MAX_UTF8_LEAD)) {
1160
0
        if(leftLength >= 0) {
1161
0
            result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1162
0
                                                     settings->fastLatinPrimaries,
1163
0
                                                     fastLatinOptions,
1164
0
                                                     left + equalPrefixLength,
1165
0
                                                     leftLength - equalPrefixLength,
1166
0
                                                     right + equalPrefixLength,
1167
0
                                                     rightLength - equalPrefixLength);
1168
0
        } else {
1169
0
            result = CollationFastLatin::compareUTF8(data->fastLatinTable,
1170
0
                                                     settings->fastLatinPrimaries,
1171
0
                                                     fastLatinOptions,
1172
0
                                                     left + equalPrefixLength, -1,
1173
0
                                                     right + equalPrefixLength, -1);
1174
0
        }
1175
0
    } else {
1176
0
        result = CollationFastLatin::BAIL_OUT_RESULT;
1177
0
    }
1178
1179
0
    if(result == CollationFastLatin::BAIL_OUT_RESULT) {
1180
0
        if(settings->dontCheckFCD()) {
1181
0
            UTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1182
0
            UTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1183
0
            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1184
0
        } else {
1185
0
            FCDUTF8CollationIterator leftIter(data, numeric, left, equalPrefixLength, leftLength);
1186
0
            FCDUTF8CollationIterator rightIter(data, numeric, right, equalPrefixLength, rightLength);
1187
0
            result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1188
0
        }
1189
0
    }
1190
0
    if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1191
0
        return static_cast<UCollationResult>(result);
1192
0
    }
1193
1194
    // Note: If NUL-terminated, we could get the actual limits from the iterators now.
1195
    // That would complicate the iterators a bit, NUL-terminated strings are only a C convenience,
1196
    // and the benefit seems unlikely to be measurable.
1197
1198
    // Compare identical level.
1199
0
    const Normalizer2Impl &nfcImpl = data->nfcImpl;
1200
0
    left += equalPrefixLength;
1201
0
    right += equalPrefixLength;
1202
0
    if(leftLength > 0) {
1203
0
        leftLength -= equalPrefixLength;
1204
0
        rightLength -= equalPrefixLength;
1205
0
    }
1206
0
    if(settings->dontCheckFCD()) {
1207
0
        UTF8NFDIterator leftIter(left, leftLength);
1208
0
        UTF8NFDIterator rightIter(right, rightLength);
1209
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1210
0
    } else {
1211
0
        FCDUTF8NFDIterator leftIter(data, left, leftLength);
1212
0
        FCDUTF8NFDIterator rightIter(data, right, rightLength);
1213
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1214
0
    }
1215
0
}
1216
1217
UCollationResult
1218
RuleBasedCollator::compare(UCharIterator &left, UCharIterator &right,
1219
0
                           UErrorCode &errorCode) const {
1220
0
    if(U_FAILURE(errorCode) || &left == &right) { return UCOL_EQUAL; }
1221
0
    UBool numeric = settings->isNumeric();
1222
1223
    // Identical-prefix test.
1224
0
    int32_t equalPrefixLength = 0;
1225
0
    {
1226
0
        UChar32 leftUnit;
1227
0
        UChar32 rightUnit;
1228
0
        while((leftUnit = left.next(&left)) == (rightUnit = right.next(&right))) {
1229
0
            if(leftUnit < 0) { return UCOL_EQUAL; }
1230
0
            ++equalPrefixLength;
1231
0
        }
1232
1233
        // Back out the code units that differed, for the real collation comparison.
1234
0
        if(leftUnit >= 0) { left.previous(&left); }
1235
0
        if(rightUnit >= 0) { right.previous(&right); }
1236
1237
0
        if(equalPrefixLength > 0) {
1238
0
            if((leftUnit >= 0 && data->isUnsafeBackward(leftUnit, numeric)) ||
1239
0
                    (rightUnit >= 0 && data->isUnsafeBackward(rightUnit, numeric))) {
1240
                // Identical prefix: Back up to the start of a contraction or reordering sequence.
1241
0
                do {
1242
0
                    --equalPrefixLength;
1243
0
                    leftUnit = left.previous(&left);
1244
0
                    right.previous(&right);
1245
0
                } while(equalPrefixLength > 0 && data->isUnsafeBackward(leftUnit, numeric));
1246
0
            }
1247
            // See the notes in the UTF-16 version.
1248
0
        }
1249
0
    }
1250
1251
0
    UCollationResult result;
1252
0
    if(settings->dontCheckFCD()) {
1253
0
        UIterCollationIterator leftIter(data, numeric, left);
1254
0
        UIterCollationIterator rightIter(data, numeric, right);
1255
0
        result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1256
0
    } else {
1257
0
        FCDUIterCollationIterator leftIter(data, numeric, left, equalPrefixLength);
1258
0
        FCDUIterCollationIterator rightIter(data, numeric, right, equalPrefixLength);
1259
0
        result = CollationCompare::compareUpToQuaternary(leftIter, rightIter, *settings, errorCode);
1260
0
    }
1261
0
    if(result != UCOL_EQUAL || settings->getStrength() < UCOL_IDENTICAL || U_FAILURE(errorCode)) {
1262
0
        return result;
1263
0
    }
1264
1265
    // Compare identical level.
1266
0
    left.move(&left, equalPrefixLength, UITER_ZERO);
1267
0
    right.move(&right, equalPrefixLength, UITER_ZERO);
1268
0
    const Normalizer2Impl &nfcImpl = data->nfcImpl;
1269
0
    if(settings->dontCheckFCD()) {
1270
0
        UIterNFDIterator leftIter(left);
1271
0
        UIterNFDIterator rightIter(right);
1272
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1273
0
    } else {
1274
0
        FCDUIterNFDIterator leftIter(data, left, equalPrefixLength);
1275
0
        FCDUIterNFDIterator rightIter(data, right, equalPrefixLength);
1276
0
        return compareNFDIter(nfcImpl, leftIter, rightIter);
1277
0
    }
1278
0
}
1279
1280
CollationKey &
1281
RuleBasedCollator::getCollationKey(const UnicodeString &s, CollationKey &key,
1282
4.06k
                                   UErrorCode &errorCode) const {
1283
4.06k
    return getCollationKey(s.getBuffer(), s.length(), key, errorCode);
1284
4.06k
}
1285
1286
CollationKey &
1287
RuleBasedCollator::getCollationKey(const char16_t *s, int32_t length, CollationKey& key,
1288
4.06k
                                   UErrorCode &errorCode) const {
1289
4.06k
    if(U_FAILURE(errorCode)) {
1290
0
        return key.setToBogus();
1291
0
    }
1292
4.06k
    if(s == nullptr && length != 0) {
1293
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1294
0
        return key.setToBogus();
1295
0
    }
1296
4.06k
    key.reset();  // resets the "bogus" state
1297
4.06k
    CollationKeyByteSink sink(key);
1298
4.06k
    writeSortKey(s, length, sink, errorCode);
1299
4.06k
    if(U_FAILURE(errorCode)) {
1300
0
        key.setToBogus();
1301
4.06k
    } else if(key.isBogus()) {
1302
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1303
4.06k
    } else {
1304
4.06k
        key.setLength(sink.NumberOfBytesAppended());
1305
4.06k
    }
1306
4.06k
    return key;
1307
4.06k
}
1308
1309
int32_t
1310
RuleBasedCollator::getSortKey(const UnicodeString &s,
1311
4.06k
                              uint8_t *dest, int32_t capacity) const {
1312
4.06k
    return getSortKey(s.getBuffer(), s.length(), dest, capacity);
1313
4.06k
}
1314
1315
int32_t
1316
RuleBasedCollator::getSortKey(const char16_t *s, int32_t length,
1317
4.06k
                              uint8_t *dest, int32_t capacity) const {
1318
4.06k
    if((s == nullptr && length != 0) || capacity < 0 || (dest == nullptr && capacity > 0)) {
1319
0
        return 0;
1320
0
    }
1321
4.06k
    uint8_t noDest[1] = { 0 };
1322
4.06k
    if(dest == nullptr) {
1323
        // Distinguish pure preflighting from an allocation error.
1324
4.06k
        dest = noDest;
1325
4.06k
        capacity = 0;
1326
4.06k
    }
1327
4.06k
    FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), capacity);
1328
4.06k
    UErrorCode errorCode = U_ZERO_ERROR;
1329
4.06k
    writeSortKey(s, length, sink, errorCode);
1330
4.06k
    return U_SUCCESS(errorCode) ? sink.NumberOfBytesAppended() : 0;
1331
4.06k
}
1332
1333
void
1334
RuleBasedCollator::writeSortKey(const char16_t *s, int32_t length,
1335
8.12k
                                SortKeyByteSink &sink, UErrorCode &errorCode) const {
1336
8.12k
    if(U_FAILURE(errorCode)) { return; }
1337
8.12k
    const char16_t *limit = (length >= 0) ? s + length : nullptr;
1338
8.12k
    UBool numeric = settings->isNumeric();
1339
8.12k
    CollationKeys::LevelCallback callback;
1340
8.12k
    if(settings->dontCheckFCD()) {
1341
7.19k
        UTF16CollationIterator iter(data, numeric, s, s, limit);
1342
7.19k
        CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1343
7.19k
                                                  sink, Collation::PRIMARY_LEVEL,
1344
7.19k
                                                  callback, true, errorCode);
1345
7.19k
    } else {
1346
930
        FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1347
930
        CollationKeys::writeSortKeyUpToQuaternary(iter, data->compressibleBytes, *settings,
1348
930
                                                  sink, Collation::PRIMARY_LEVEL,
1349
930
                                                  callback, true, errorCode);
1350
930
    }
1351
8.12k
    if(settings->getStrength() == UCOL_IDENTICAL) {
1352
424
        writeIdenticalLevel(s, limit, sink, errorCode);
1353
424
    }
1354
8.12k
    static const char terminator = 0;  // TERMINATOR_BYTE
1355
8.12k
    sink.Append(&terminator, 1);
1356
8.12k
}
1357
1358
void
1359
RuleBasedCollator::writeIdenticalLevel(const char16_t *s, const char16_t *limit,
1360
424
                                       SortKeyByteSink &sink, UErrorCode &errorCode) const {
1361
    // NFD quick check
1362
424
    const char16_t *nfdQCYesLimit = data->nfcImpl.decompose(s, limit, nullptr, errorCode);
1363
424
    if(U_FAILURE(errorCode)) { return; }
1364
424
    sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
1365
424
    UChar32 prev = 0;
1366
424
    if(nfdQCYesLimit != s) {
1367
424
        prev = u_writeIdenticalLevelRun(prev, s, static_cast<int32_t>(nfdQCYesLimit - s), sink);
1368
424
    }
1369
    // Is there non-NFD text?
1370
424
    int32_t destLengthEstimate;
1371
424
    if(limit != nullptr) {
1372
424
        if(nfdQCYesLimit == limit) { return; }
1373
382
        destLengthEstimate = static_cast<int32_t>(limit - nfdQCYesLimit);
1374
382
    } else {
1375
        // s is NUL-terminated
1376
0
        if(*nfdQCYesLimit == 0) { return; }
1377
0
        destLengthEstimate = -1;
1378
0
    }
1379
382
    UnicodeString nfd;
1380
382
    data->nfcImpl.decompose(nfdQCYesLimit, limit, nfd, destLengthEstimate, errorCode);
1381
382
    u_writeIdenticalLevelRun(prev, nfd.getBuffer(), nfd.length(), sink);
1382
382
}
1383
1384
namespace {
1385
1386
/**
1387
 * internalNextSortKeyPart() calls CollationKeys::writeSortKeyUpToQuaternary()
1388
 * with an instance of this callback class.
1389
 * When another level is about to be written, the callback
1390
 * records the level and the number of bytes that will be written until
1391
 * the sink (which is actually a FixedSortKeyByteSink) fills up.
1392
 *
1393
 * When internalNextSortKeyPart() is called again, it restarts with the last level
1394
 * and ignores as many bytes as were written previously for that level.
1395
 */
1396
class PartLevelCallback : public CollationKeys::LevelCallback {
1397
public:
1398
    PartLevelCallback(const SortKeyByteSink &s)
1399
0
            : sink(s), level(Collation::PRIMARY_LEVEL) {
1400
0
        levelCapacity = sink.GetRemainingCapacity();
1401
0
    }
1402
0
    virtual ~PartLevelCallback() {}
1403
0
    virtual UBool needToWrite(Collation::Level l) override {
1404
0
        if(!sink.Overflowed()) {
1405
            // Remember a level that will be at least partially written.
1406
0
            level = l;
1407
0
            levelCapacity = sink.GetRemainingCapacity();
1408
0
            return true;
1409
0
        } else {
1410
0
            return false;
1411
0
        }
1412
0
    }
1413
0
    Collation::Level getLevel() const { return level; }
1414
0
    int32_t getLevelCapacity() const { return levelCapacity; }
1415
1416
private:
1417
    const SortKeyByteSink &sink;
1418
    Collation::Level level;
1419
    int32_t levelCapacity;
1420
};
1421
1422
}  // namespace
1423
1424
int32_t
1425
RuleBasedCollator::internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2],
1426
0
                                           uint8_t *dest, int32_t count, UErrorCode &errorCode) const {
1427
0
    if(U_FAILURE(errorCode)) { return 0; }
1428
0
    if(iter == nullptr || state == nullptr || count < 0 || (count > 0 && dest == nullptr)) {
1429
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1430
0
        return 0;
1431
0
    }
1432
0
    if(count == 0) { return 0; }
1433
1434
0
    FixedSortKeyByteSink sink(reinterpret_cast<char *>(dest), count);
1435
0
    sink.IgnoreBytes(static_cast<int32_t>(state[1]));
1436
0
    iter->move(iter, 0, UITER_START);
1437
1438
0
    Collation::Level level = static_cast<Collation::Level>(state[0]);
1439
0
    if(level <= Collation::QUATERNARY_LEVEL) {
1440
0
        UBool numeric = settings->isNumeric();
1441
0
        PartLevelCallback callback(sink);
1442
0
        if(settings->dontCheckFCD()) {
1443
0
            UIterCollationIterator ci(data, numeric, *iter);
1444
0
            CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1445
0
                                                      sink, level, callback, false, errorCode);
1446
0
        } else {
1447
0
            FCDUIterCollationIterator ci(data, numeric, *iter, 0);
1448
0
            CollationKeys::writeSortKeyUpToQuaternary(ci, data->compressibleBytes, *settings,
1449
0
                                                      sink, level, callback, false, errorCode);
1450
0
        }
1451
0
        if(U_FAILURE(errorCode)) { return 0; }
1452
0
        if(sink.NumberOfBytesAppended() > count) {
1453
0
            state[0] = static_cast<uint32_t>(callback.getLevel());
1454
0
            state[1] = static_cast<uint32_t>(callback.getLevelCapacity());
1455
0
            return count;
1456
0
        }
1457
        // All of the normal levels are done.
1458
0
        if(settings->getStrength() == UCOL_IDENTICAL) {
1459
0
            level = Collation::IDENTICAL_LEVEL;
1460
0
            iter->move(iter, 0, UITER_START);
1461
0
        }
1462
        // else fall through to setting ZERO_LEVEL
1463
0
    }
1464
1465
0
    if(level == Collation::IDENTICAL_LEVEL) {
1466
0
        int32_t levelCapacity = sink.GetRemainingCapacity();
1467
0
        UnicodeString s;
1468
0
        for(;;) {
1469
0
            UChar32 c = iter->next(iter);
1470
0
            if(c < 0) { break; }
1471
0
            s.append(static_cast<char16_t>(c));
1472
0
        }
1473
0
        const char16_t *sArray = s.getBuffer();
1474
0
        writeIdenticalLevel(sArray, sArray + s.length(), sink, errorCode);
1475
0
        if(U_FAILURE(errorCode)) { return 0; }
1476
0
        if(sink.NumberOfBytesAppended() > count) {
1477
0
            state[0] = static_cast<uint32_t>(level);
1478
0
            state[1] = static_cast<uint32_t>(levelCapacity);
1479
0
            return count;
1480
0
        }
1481
0
    }
1482
1483
    // ZERO_LEVEL: Fill the remainder of dest with 00 bytes.
1484
0
    state[0] = static_cast<uint32_t>(Collation::ZERO_LEVEL);
1485
0
    state[1] = 0;
1486
0
    int32_t length = sink.NumberOfBytesAppended();
1487
0
    int32_t i = length;
1488
0
    while(i < count) { dest[i++] = 0; }
1489
0
    return length;
1490
0
}
1491
1492
void
1493
RuleBasedCollator::internalGetCEs(const UnicodeString &str, UVector64 &ces,
1494
0
                                  UErrorCode &errorCode) const {
1495
0
    if(U_FAILURE(errorCode)) { return; }
1496
0
    const char16_t *s = str.getBuffer();
1497
0
    const char16_t *limit = s + str.length();
1498
0
    UBool numeric = settings->isNumeric();
1499
0
    if(settings->dontCheckFCD()) {
1500
0
        UTF16CollationIterator iter(data, numeric, s, s, limit);
1501
0
        int64_t ce;
1502
0
        while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1503
0
            ces.addElement(ce, errorCode);
1504
0
        }
1505
0
    } else {
1506
0
        FCDUTF16CollationIterator iter(data, numeric, s, s, limit);
1507
0
        int64_t ce;
1508
0
        while((ce = iter.nextCE(errorCode)) != Collation::NO_CE) {
1509
0
            ces.addElement(ce, errorCode);
1510
0
        }
1511
0
    }
1512
0
}
1513
1514
namespace {
1515
1516
void appendSubtag(CharString &s, char letter, const char *subtag, int32_t length,
1517
0
                  UErrorCode &errorCode) {
1518
0
    if(U_FAILURE(errorCode) || length == 0) { return; }
1519
0
    if(!s.isEmpty()) {
1520
0
        s.append('_', errorCode);
1521
0
    }
1522
0
    s.append(letter, errorCode);
1523
0
    for(int32_t i = 0; i < length; ++i) {
1524
0
        s.append(uprv_toupper(subtag[i]), errorCode);
1525
0
    }
1526
0
}
1527
1528
void appendAttribute(CharString &s, char letter, UColAttributeValue value,
1529
0
                     UErrorCode &errorCode) {
1530
0
    if(U_FAILURE(errorCode)) { return; }
1531
0
    if(!s.isEmpty()) {
1532
0
        s.append('_', errorCode);
1533
0
    }
1534
0
    static const char *valueChars = "1234...........IXO..SN..LU......";
1535
0
    s.append(letter, errorCode);
1536
0
    s.append(valueChars[value], errorCode);
1537
0
}
1538
1539
}  // namespace
1540
1541
int32_t
1542
RuleBasedCollator::internalGetShortDefinitionString(const char *locale,
1543
                                                    char *buffer, int32_t capacity,
1544
0
                                                    UErrorCode &errorCode) const {
1545
0
    if(U_FAILURE(errorCode)) { return 0; }
1546
0
    if(buffer == nullptr ? capacity != 0 : capacity < 0) {
1547
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1548
0
        return 0;
1549
0
    }
1550
0
    if(locale == nullptr) {
1551
0
        locale = internalGetLocaleID(ULOC_VALID_LOCALE, errorCode);
1552
0
    }
1553
1554
0
    char resultLocale[ULOC_FULLNAME_CAPACITY + 1];
1555
0
    int32_t length = ucol_getFunctionalEquivalent(resultLocale, ULOC_FULLNAME_CAPACITY,
1556
0
                                                  "collation", locale,
1557
0
                                                  nullptr, &errorCode);
1558
0
    if(U_FAILURE(errorCode)) { return 0; }
1559
0
    resultLocale[length] = 0;
1560
1561
    // Append items in alphabetic order of their short definition letters.
1562
0
    CharString result;
1563
1564
0
    if(attributeHasBeenSetExplicitly(UCOL_ALTERNATE_HANDLING)) {
1565
0
        appendAttribute(result, 'A', getAttribute(UCOL_ALTERNATE_HANDLING, errorCode), errorCode);
1566
0
    }
1567
    // ATTR_VARIABLE_TOP not supported because 'B' was broken.
1568
    // See ICU tickets #10372 and #10386.
1569
0
    if(attributeHasBeenSetExplicitly(UCOL_CASE_FIRST)) {
1570
0
        appendAttribute(result, 'C', getAttribute(UCOL_CASE_FIRST, errorCode), errorCode);
1571
0
    }
1572
0
    if(attributeHasBeenSetExplicitly(UCOL_NUMERIC_COLLATION)) {
1573
0
        appendAttribute(result, 'D', getAttribute(UCOL_NUMERIC_COLLATION, errorCode), errorCode);
1574
0
    }
1575
0
    if(attributeHasBeenSetExplicitly(UCOL_CASE_LEVEL)) {
1576
0
        appendAttribute(result, 'E', getAttribute(UCOL_CASE_LEVEL, errorCode), errorCode);
1577
0
    }
1578
0
    if(attributeHasBeenSetExplicitly(UCOL_FRENCH_COLLATION)) {
1579
0
        appendAttribute(result, 'F', getAttribute(UCOL_FRENCH_COLLATION, errorCode), errorCode);
1580
0
    }
1581
    // Note: UCOL_HIRAGANA_QUATERNARY_MODE is deprecated and never changes away from default.
1582
0
    CharString collation = ulocimp_getKeywordValue(resultLocale, "collation", errorCode);
1583
0
    appendSubtag(result, 'K', collation.data(), collation.length(), errorCode);
1584
0
    CharString language;
1585
0
    CharString script;
1586
0
    CharString region;
1587
0
    CharString variant;
1588
0
    ulocimp_getSubtags(resultLocale, &language, &script, &region, &variant, nullptr, errorCode);
1589
0
    if (language.isEmpty()) {
1590
0
        appendSubtag(result, 'L', "root", 4, errorCode);
1591
0
    } else {
1592
0
        appendSubtag(result, 'L', language.data(), language.length(), errorCode);
1593
0
    }
1594
0
    if(attributeHasBeenSetExplicitly(UCOL_NORMALIZATION_MODE)) {
1595
0
        appendAttribute(result, 'N', getAttribute(UCOL_NORMALIZATION_MODE, errorCode), errorCode);
1596
0
    }
1597
0
    appendSubtag(result, 'R', region.data(), region.length(), errorCode);
1598
0
    if(attributeHasBeenSetExplicitly(UCOL_STRENGTH)) {
1599
0
        appendAttribute(result, 'S', getAttribute(UCOL_STRENGTH, errorCode), errorCode);
1600
0
    }
1601
0
    appendSubtag(result, 'V', variant.data(), variant.length(), errorCode);
1602
0
    appendSubtag(result, 'Z', script.data(), script.length(), errorCode);
1603
1604
0
    if(U_FAILURE(errorCode)) { return 0; }
1605
0
    return result.extract(buffer, capacity, errorCode);
1606
0
}
1607
1608
UBool
1609
0
RuleBasedCollator::isUnsafe(UChar32 c) const {
1610
0
    return data->isUnsafeBackward(c, settings->isNumeric());
1611
0
}
1612
1613
void U_CALLCONV
1614
0
RuleBasedCollator::computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode) {
1615
0
    t->maxExpansions = CollationElementIterator::computeMaxExpansions(t->data, errorCode);
1616
0
}
1617
1618
UBool
1619
0
RuleBasedCollator::initMaxExpansions(UErrorCode &errorCode) const {
1620
0
    umtx_initOnce(tailoring->maxExpansionsInitOnce, computeMaxExpansions, tailoring, errorCode);
1621
0
    return U_SUCCESS(errorCode);
1622
0
}
1623
1624
CollationElementIterator *
1625
0
RuleBasedCollator::createCollationElementIterator(const UnicodeString& source) const {
1626
0
    UErrorCode errorCode = U_ZERO_ERROR;
1627
0
    if(!initMaxExpansions(errorCode)) { return nullptr; }
1628
0
    CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1629
0
    if(U_FAILURE(errorCode)) {
1630
0
        delete cei;
1631
0
        return nullptr;
1632
0
    }
1633
0
    return cei;
1634
0
}
1635
1636
CollationElementIterator *
1637
0
RuleBasedCollator::createCollationElementIterator(const CharacterIterator& source) const {
1638
0
    UErrorCode errorCode = U_ZERO_ERROR;
1639
0
    if(!initMaxExpansions(errorCode)) { return nullptr; }
1640
0
    CollationElementIterator *cei = new CollationElementIterator(source, this, errorCode);
1641
0
    if(U_FAILURE(errorCode)) {
1642
0
        delete cei;
1643
0
        return nullptr;
1644
0
    }
1645
0
    return cei;
1646
0
}
1647
1648
int32_t
1649
0
RuleBasedCollator::getMaxExpansion(int32_t order) const {
1650
0
    UErrorCode errorCode = U_ZERO_ERROR;
1651
0
    (void)initMaxExpansions(errorCode);
1652
0
    return CollationElementIterator::getMaxExpansion(tailoring->maxExpansions, order);
1653
0
}
1654
1655
U_NAMESPACE_END
1656
1657
#endif  // !UCONFIG_NO_COLLATION