Coverage Report

Created: 2026-01-22 06:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/i18n/collationkeys.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2012-2015, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* collationkeys.cpp
9
*
10
* created on: 2012sep02
11
* created by: Markus W. Scherer
12
*/
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_COLLATION
17
18
#include "unicode/bytestream.h"
19
#include "collation.h"
20
#include "collationiterator.h"
21
#include "collationkeys.h"
22
#include "collationsettings.h"
23
#include "uassert.h"
24
25
U_NAMESPACE_BEGIN
26
27
8.18k
SortKeyByteSink::~SortKeyByteSink() {}
28
29
void
30
1.74M
SortKeyByteSink::Append(const char *bytes, int32_t n) {
31
1.74M
    if (n <= 0 || bytes == nullptr) {
32
266
        return;
33
266
    }
34
1.74M
    if (ignore_ > 0) {
35
0
        int32_t ignoreRest = ignore_ - n;
36
0
        if (ignoreRest >= 0) {
37
0
            ignore_ = ignoreRest;
38
0
            return;
39
0
        } else {
40
0
            bytes += ignore_;
41
0
            n = -ignoreRest;
42
0
            ignore_ = 0;
43
0
        }
44
0
    }
45
1.74M
    int32_t length = appended_;
46
1.74M
    appended_ += n;
47
1.74M
    if ((buffer_ + length) == bytes) {
48
493
        return;  // the caller used GetAppendBuffer() and wrote the bytes already
49
493
    }
50
1.74M
    int32_t available = capacity_ - length;
51
1.74M
    if (n <= available) {
52
866k
        uprv_memcpy(buffer_ + length, bytes, n);
53
875k
    } else {
54
875k
        AppendBeyondCapacity(bytes, n, length);
55
875k
    }
56
1.74M
}
57
58
char *
59
SortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
60
                                 int32_t desired_capacity_hint,
61
                                 char *scratch,
62
                                 int32_t scratch_capacity,
63
2.97k
                                 int32_t *result_capacity) {
64
2.97k
    if (min_capacity < 1 || scratch_capacity < min_capacity) {
65
0
        *result_capacity = 0;
66
0
        return nullptr;
67
0
    }
68
2.97k
    if (ignore_ > 0) {
69
        // Do not write ignored bytes right at the end of the buffer.
70
0
        *result_capacity = scratch_capacity;
71
0
        return scratch;
72
0
    }
73
2.97k
    int32_t available = capacity_ - appended_;
74
2.97k
    if (available >= min_capacity) {
75
620
        *result_capacity = available;
76
620
        return buffer_ + appended_;
77
2.35k
    } else if (Resize(desired_capacity_hint, appended_)) {
78
2
        *result_capacity = capacity_ - appended_;
79
2
        return buffer_ + appended_;
80
2.35k
    } else {
81
2.35k
        *result_capacity = scratch_capacity;
82
2.35k
        return scratch;
83
2.35k
    }
84
2.97k
}
85
86
namespace {
87
88
/**
89
 * uint8_t byte buffer, similar to CharString but simpler.
90
 */
91
class SortKeyLevel : public UMemory {
92
public:
93
32.7k
    SortKeyLevel() : len(0), ok(true) {}
94
32.7k
    ~SortKeyLevel() {}
95
96
    /** @return false if memory allocation failed */
97
16.7k
    UBool isOk() const { return ok; }
98
268
    UBool isEmpty() const { return len == 0; }
99
2.14k
    int32_t length() const { return len; }
100
0
    const uint8_t *data() const { return buffer.getAlias(); }
101
0
    uint8_t operator[](int32_t index) const { return buffer[index]; }
102
103
1.07k
    uint8_t *data() { return buffer.getAlias(); }
104
105
    void appendByte(uint32_t b);
106
    void appendWeight16(uint32_t w);
107
    void appendWeight32(uint32_t w);
108
    void appendReverseWeight16(uint32_t w);
109
110
    /** Appends all but the last byte to the sink. The last byte should be the 01 terminator. */
111
16.7k
    void appendTo(ByteSink &sink) const {
112
16.7k
        U_ASSERT(len > 0 && buffer[len - 1] == 1);
113
16.7k
        sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len - 1);
114
16.7k
    }
115
116
private:
117
    MaybeStackArray<uint8_t, 40> buffer;
118
    int32_t len;
119
    UBool ok;
120
121
    UBool ensureCapacity(int32_t appendCapacity);
122
123
    SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class
124
    SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class
125
};
126
127
911k
void SortKeyLevel::appendByte(uint32_t b) {
128
911k
    if(len < buffer.getCapacity() || ensureCapacity(1)) {
129
911k
        buffer[len++] = static_cast<uint8_t>(b);
130
911k
    }
131
911k
}
132
133
void
134
1.07M
SortKeyLevel::appendWeight16(uint32_t w) {
135
1.07M
    U_ASSERT((w & 0xffff) != 0);
136
1.07M
    uint8_t b0 = static_cast<uint8_t>(w >> 8);
137
1.07M
    uint8_t b1 = static_cast<uint8_t>(w);
138
1.07M
    int32_t appendLength = (b1 == 0) ? 1 : 2;
139
1.07M
    if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
140
1.07M
        buffer[len++] = b0;
141
1.07M
        if(b1 != 0) {
142
51.8k
            buffer[len++] = b1;
143
51.8k
        }
144
1.07M
    }
145
1.07M
}
146
147
void
148
33.5k
SortKeyLevel::appendWeight32(uint32_t w) {
149
33.5k
    U_ASSERT(w != 0);
150
33.5k
    uint8_t bytes[4] = {
151
33.5k
        static_cast<uint8_t>(w >> 24),
152
33.5k
        static_cast<uint8_t>(w >> 16),
153
33.5k
        static_cast<uint8_t>(w >> 8),
154
33.5k
        static_cast<uint8_t>(w)
155
33.5k
    };
156
33.5k
    int32_t appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
157
33.5k
    if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
158
33.5k
        buffer[len++] = bytes[0];
159
33.5k
        if(bytes[1] != 0) {
160
27.9k
            buffer[len++] = bytes[1];
161
27.9k
            if(bytes[2] != 0) {
162
7.94k
                buffer[len++] = bytes[2];
163
7.94k
                if(bytes[3] != 0) {
164
4.49k
                    buffer[len++] = bytes[3];
165
4.49k
                }
166
7.94k
            }
167
27.9k
        }
168
33.5k
    }
169
33.5k
}
170
171
void
172
137k
SortKeyLevel::appendReverseWeight16(uint32_t w) {
173
137k
    U_ASSERT((w & 0xffff) != 0);
174
137k
    uint8_t b0 = static_cast<uint8_t>(w >> 8);
175
137k
    uint8_t b1 = static_cast<uint8_t>(w);
176
137k
    int32_t appendLength = (b1 == 0) ? 1 : 2;
177
137k
    if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
178
137k
        if(b1 == 0) {
179
132k
            buffer[len++] = b0;
180
132k
        } else {
181
4.47k
            buffer[len] = b1;
182
4.47k
            buffer[len + 1] = b0;
183
4.47k
            len += 2;
184
4.47k
        }
185
137k
    }
186
137k
}
187
188
5.00k
UBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) {
189
5.00k
    if(!ok) {
190
0
        return false;
191
0
    }
192
5.00k
    int32_t newCapacity = 2 * buffer.getCapacity();
193
5.00k
    int32_t altCapacity = len + 2 * appendCapacity;
194
5.00k
    if (newCapacity < altCapacity) {
195
0
        newCapacity = altCapacity;
196
0
    }
197
5.00k
    if (newCapacity < 200) {
198
2.64k
        newCapacity = 200;
199
2.64k
    }
200
5.00k
    if(buffer.resize(newCapacity, len)==nullptr) {
201
0
        return ok = false;
202
0
    }
203
5.00k
    return true;
204
5.00k
}
205
206
}  // namespace
207
208
8.18k
CollationKeys::LevelCallback::~LevelCallback() {}
209
210
UBool
211
16.7k
CollationKeys::LevelCallback::needToWrite(Collation::Level /*level*/) { return true; }
212
213
/**
214
 * Map from collation strength (UColAttributeValue)
215
 * to a mask of Collation::Level bits up to that strength,
216
 * excluding the CASE_LEVEL which is independent of the strength,
217
 * and excluding IDENTICAL_LEVEL which this function does not write.
218
 */
219
static const uint32_t levelMasks[UCOL_STRENGTH_LIMIT] = {
220
    2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
221
    6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
222
    0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
223
    0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
224
    0, 0, 0, 0,
225
    0, 0, 0, 0,
226
    0, 0, 0,
227
    0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
228
};
229
230
void
231
CollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
232
                                          const UBool *compressibleBytes,
233
                                          const CollationSettings &settings,
234
                                          SortKeyByteSink &sink,
235
                                          Collation::Level minLevel, LevelCallback &callback,
236
8.18k
                                          UBool preflight, UErrorCode &errorCode) {
237
8.18k
    if(U_FAILURE(errorCode)) { return; }
238
239
8.18k
    int32_t options = settings.options;
240
    // Set of levels to process and write.
241
8.18k
    uint32_t levels = levelMasks[CollationSettings::getStrength(options)];
242
8.18k
    if((options & CollationSettings::CASE_LEVEL) != 0) {
243
0
        levels |= Collation::CASE_LEVEL_FLAG;
244
0
    }
245
    // Minus the levels below minLevel.
246
8.18k
    levels &= ~((static_cast<uint32_t>(1) << minLevel) - 1);
247
8.18k
    if(levels == 0) { return; }
248
249
8.18k
    uint32_t variableTop;
250
8.18k
    if((options & CollationSettings::ALTERNATE_MASK) == 0) {
251
7.38k
        variableTop = 0;
252
7.38k
    } else {
253
        // +1 so that we can use "<" and primary ignorables test out early.
254
796
        variableTop = settings.variableTop + 1;
255
796
    }
256
257
8.18k
    uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
258
259
8.18k
    SortKeyLevel cases;
260
8.18k
    SortKeyLevel secondaries;
261
8.18k
    SortKeyLevel tertiaries;
262
8.18k
    SortKeyLevel quaternaries;
263
264
8.18k
    uint32_t prevReorderedPrimary = 0;  // 0==no compression
265
8.18k
    int32_t commonCases = 0;
266
8.18k
    int32_t commonSecondaries = 0;
267
8.18k
    int32_t commonTertiaries = 0;
268
8.18k
    int32_t commonQuaternaries = 0;
269
270
8.18k
    uint32_t prevSecondary = 0;
271
8.18k
    int32_t secSegmentStart = 0;
272
273
2.53M
    for(;;) {
274
        // No need to keep all CEs in the buffer when we write a sort key.
275
2.53M
        iter.clearCEsIfNoneRemaining();
276
2.53M
        int64_t ce = iter.nextCE(errorCode);
277
2.53M
        uint32_t p = static_cast<uint32_t>(ce >> 32);
278
2.53M
        if(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY) {
279
            // Variable CE, shift it to quaternary level.
280
            // Ignore all following primary ignorables, and shift further variable CEs.
281
46.9k
            if(commonQuaternaries != 0) {
282
23.2k
                --commonQuaternaries;
283
23.8k
                while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
284
612
                    quaternaries.appendByte(QUAT_COMMON_MIDDLE);
285
612
                    commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
286
612
                }
287
                // Shifted primary weights are lower than the common weight.
288
23.2k
                quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
289
23.2k
                commonQuaternaries = 0;
290
23.2k
            }
291
61.8k
            do {
292
61.8k
                if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
293
33.5k
                    if(settings.hasReordering()) {
294
33.5k
                        p = settings.reorder(p);
295
33.5k
                    }
296
33.5k
                    if((p >> 24) >= QUAT_SHIFTED_LIMIT_BYTE) {
297
                        // Prevent shifted primary lead bytes from
298
                        // overlapping with the common compression range.
299
0
                        quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
300
0
                    }
301
33.5k
                    quaternaries.appendWeight32(p);
302
33.5k
                }
303
65.7k
                do {
304
65.7k
                    ce = iter.nextCE(errorCode);
305
65.7k
                    p = static_cast<uint32_t>(ce >> 32);
306
65.7k
                } while(p == 0);
307
61.8k
            } while(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY);
308
46.9k
        }
309
        // ce could be primary ignorable, or NO_CE, or the merge separator,
310
        // or a regular primary CE, but it is not variable.
311
        // If ce==NO_CE, then write nothing for the primary level but
312
        // terminate compression on all levels and then exit the loop.
313
2.53M
        if(p > Collation::NO_CE_PRIMARY && (levels & Collation::PRIMARY_LEVEL_FLAG) != 0) {
314
            // Test the un-reordered primary for compressibility.
315
2.11M
            UBool isCompressible = compressibleBytes[p >> 24];
316
2.11M
            if(settings.hasReordering()) {
317
763k
                p = settings.reorder(p);
318
763k
            }
319
2.11M
            uint32_t p1 = p >> 24;
320
2.11M
            if(!isCompressible || p1 != (prevReorderedPrimary >> 24)) {
321
1.76M
                if(prevReorderedPrimary != 0) {
322
393k
                    if(p < prevReorderedPrimary) {
323
                        // No primary compression terminator
324
                        // at the end of the level or merged segment.
325
269k
                        if(p1 > Collation::MERGE_SEPARATOR_BYTE) {
326
268k
                            sink.Append(Collation::PRIMARY_COMPRESSION_LOW_BYTE);
327
268k
                        }
328
269k
                    } else {
329
124k
                        sink.Append(Collation::PRIMARY_COMPRESSION_HIGH_BYTE);
330
124k
                    }
331
393k
                }
332
1.76M
                sink.Append(p1);
333
1.76M
                if(isCompressible) {
334
396k
                    prevReorderedPrimary = p;
335
1.36M
                } else {
336
1.36M
                    prevReorderedPrimary = 0;
337
1.36M
                }
338
1.76M
            }
339
2.11M
            char p2 = static_cast<char>(p >> 16);
340
2.11M
            if(p2 != 0) {
341
1.71M
                char buffer[3] = {p2, static_cast<char>(p >> 8), static_cast<char>(p)};
342
1.71M
                sink.Append(buffer, (buffer[1] == 0) ? 1 : (buffer[2] == 0) ? 2 : 3);
343
1.71M
            }
344
            // Optimization for internalNextSortKeyPart():
345
            // When the primary level overflows we can stop because we need not
346
            // calculate (preflight) the whole sort key length.
347
2.11M
            if(!preflight && sink.Overflowed()) {
348
0
                if(U_SUCCESS(errorCode) && !sink.IsOk()) {
349
0
                    errorCode = U_MEMORY_ALLOCATION_ERROR;
350
0
                }
351
0
                return;
352
0
            }
353
2.11M
        }
354
355
2.53M
        uint32_t lower32 = static_cast<uint32_t>(ce);
356
2.53M
        if(lower32 == 0) { continue; }  // completely ignorable, no secondary/case/tertiary/quaternary
357
358
2.50M
        if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
359
2.50M
            uint32_t s = lower32 >> 16;
360
2.50M
            if(s == 0) {
361
                // secondary ignorable
362
2.50M
            } else if(s == Collation::COMMON_WEIGHT16 &&
363
2.03M
                    ((options & CollationSettings::BACKWARD_SECONDARY) == 0 ||
364
2.03M
                        p != Collation::MERGE_SEPARATOR_PRIMARY)) {
365
                // s is a common secondary weight, and
366
                // backwards-secondary is off or the ce is not the merge separator.
367
2.03M
                ++commonSecondaries;
368
2.03M
            } else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
369
327k
                if(commonSecondaries != 0) {
370
283k
                    --commonSecondaries;
371
302k
                    while(commonSecondaries >= SEC_COMMON_MAX_COUNT) {
372
18.9k
                        secondaries.appendByte(SEC_COMMON_MIDDLE);
373
18.9k
                        commonSecondaries -= SEC_COMMON_MAX_COUNT;
374
18.9k
                    }
375
283k
                    uint32_t b;
376
283k
                    if(s < Collation::COMMON_WEIGHT16) {
377
6.35k
                        b = SEC_COMMON_LOW + commonSecondaries;
378
276k
                    } else {
379
276k
                        b = SEC_COMMON_HIGH - commonSecondaries;
380
276k
                    }
381
283k
                    secondaries.appendByte(b);
382
283k
                    commonSecondaries = 0;
383
283k
                }
384
327k
                secondaries.appendWeight16(s);
385
327k
            } else {
386
138k
                if(commonSecondaries != 0) {
387
136k
                    --commonSecondaries;
388
                    // Append reverse weights. The level will be re-reversed later.
389
136k
                    int32_t remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
390
136k
                    uint32_t b;
391
136k
                    if(prevSecondary < Collation::COMMON_WEIGHT16) {
392
1.54k
                        b = SEC_COMMON_LOW + remainder;
393
135k
                    } else {
394
135k
                        b = SEC_COMMON_HIGH - remainder;
395
135k
                    }
396
136k
                    secondaries.appendByte(b);
397
136k
                    commonSecondaries -= remainder;
398
                    // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
399
141k
                    while(commonSecondaries > 0) {  // same as >= SEC_COMMON_MAX_COUNT
400
5.31k
                        secondaries.appendByte(SEC_COMMON_MIDDLE);
401
5.31k
                        commonSecondaries -= SEC_COMMON_MAX_COUNT;
402
5.31k
                    }
403
                    // commonSecondaries == 0
404
136k
                }
405
138k
                if(0 < p && p <= Collation::MERGE_SEPARATOR_PRIMARY) {
406
                    // The backwards secondary level compares secondary weights backwards
407
                    // within segments separated by the merge separator (U+FFFE).
408
1.07k
                    uint8_t *secs = secondaries.data();
409
1.07k
                    int32_t last = secondaries.length() - 1;
410
1.07k
                    if(secSegmentStart < last) {
411
590
                        uint8_t *q = secs + secSegmentStart;
412
590
                        uint8_t *r = secs + last;
413
141k
                        do {
414
141k
                            uint8_t b = *q;
415
141k
                            *q++ = *r;
416
141k
                            *r-- = b;
417
141k
                        } while(q < r);
418
590
                    }
419
1.07k
                    secondaries.appendByte(p == Collation::NO_CE_PRIMARY ?
420
732
                        Collation::LEVEL_SEPARATOR_BYTE : Collation::MERGE_SEPARATOR_BYTE);
421
1.07k
                    prevSecondary = 0;
422
1.07k
                    secSegmentStart = secondaries.length();
423
137k
                } else {
424
137k
                    secondaries.appendReverseWeight16(s);
425
137k
                    prevSecondary = s;
426
137k
                }
427
138k
            }
428
2.50M
        }
429
430
2.50M
        if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
431
0
            if((CollationSettings::getStrength(options) == UCOL_PRIMARY) ?
432
0
                    p == 0 : lower32 <= 0xffff) {
433
                // Primary+caseLevel: Ignore case level weights of primary ignorables.
434
                // Otherwise: Ignore case level weights of secondary ignorables.
435
                // For details see the comments in the CollationCompare class.
436
0
            } else {
437
0
                uint32_t c = (lower32 >> 8) & 0xff;  // case bits & tertiary lead byte
438
0
                U_ASSERT((c & 0xc0) != 0xc0);
439
0
                if((c & 0xc0) == 0 && c > Collation::LEVEL_SEPARATOR_BYTE) {
440
0
                    ++commonCases;
441
0
                } else {
442
0
                    if((options & CollationSettings::UPPER_FIRST) == 0) {
443
                        // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
444
                        // If there are only common (=lowest) weights in the whole level,
445
                        // then we need not write anything.
446
                        // Level length differences are handled already on the next-higher level.
447
0
                        if(commonCases != 0 &&
448
0
                                (c > Collation::LEVEL_SEPARATOR_BYTE || !cases.isEmpty())) {
449
0
                            --commonCases;
450
0
                            while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
451
0
                                cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
452
0
                                commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
453
0
                            }
454
0
                            uint32_t b;
455
0
                            if(c <= Collation::LEVEL_SEPARATOR_BYTE) {
456
0
                                b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
457
0
                            } else {
458
0
                                b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
459
0
                            }
460
0
                            cases.appendByte(b << 4);
461
0
                            commonCases = 0;
462
0
                        }
463
0
                        if(c > Collation::LEVEL_SEPARATOR_BYTE) {
464
0
                            c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4;  // 14 or 15
465
0
                        }
466
0
                    } else {
467
                        // upperFirst: Compress common weights to nibbles 3..15, mixed=2, upper=1.
468
                        // The compressed common case weights only go up from the "low" value
469
                        // because with upperFirst the common weight is the highest one.
470
0
                        if(commonCases != 0) {
471
0
                            --commonCases;
472
0
                            while(commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
473
0
                                cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
474
0
                                commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
475
0
                            }
476
0
                            cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
477
0
                            commonCases = 0;
478
0
                        }
479
0
                        if(c > Collation::LEVEL_SEPARATOR_BYTE) {
480
0
                            c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4;  // 2 or 1
481
0
                        }
482
0
                    }
483
                    // c is a separator byte 01,
484
                    // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
485
0
                    cases.appendByte(c);
486
0
                }
487
0
            }
488
0
        }
489
490
2.50M
        if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
491
2.50M
            uint32_t t = lower32 & tertiaryMask;
492
2.50M
            U_ASSERT((lower32 & 0xc000) != 0xc000);
493
2.50M
            if(t == Collation::COMMON_WEIGHT16) {
494
1.75M
                ++commonTertiaries;
495
1.75M
            } else if((tertiaryMask & 0x8000) == 0) {
496
                // Tertiary weights without case bits.
497
                // Move lead bytes 06..3F to C6..FF for a large common-weight range.
498
623k
                if(commonTertiaries != 0) {
499
286k
                    --commonTertiaries;
500
292k
                    while(commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
501
6.03k
                        tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
502
6.03k
                        commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
503
6.03k
                    }
504
286k
                    uint32_t b;
505
286k
                    if(t < Collation::COMMON_WEIGHT16) {
506
7.77k
                        b = TER_ONLY_COMMON_LOW + commonTertiaries;
507
279k
                    } else {
508
279k
                        b = TER_ONLY_COMMON_HIGH - commonTertiaries;
509
279k
                    }
510
286k
                    tertiaries.appendByte(b);
511
286k
                    commonTertiaries = 0;
512
286k
                }
513
623k
                if(t > Collation::COMMON_WEIGHT16) { t += 0xc000; }
514
623k
                tertiaries.appendWeight16(t);
515
623k
            } else if((options & CollationSettings::UPPER_FIRST) == 0) {
516
                // Tertiary weights with caseFirst=lowerFirst.
517
                // Move lead bytes 06..BF to 46..FF for the common-weight range.
518
0
                if(commonTertiaries != 0) {
519
0
                    --commonTertiaries;
520
0
                    while(commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
521
0
                        tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
522
0
                        commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
523
0
                    }
524
0
                    uint32_t b;
525
0
                    if(t < Collation::COMMON_WEIGHT16) {
526
0
                        b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
527
0
                    } else {
528
0
                        b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
529
0
                    }
530
0
                    tertiaries.appendByte(b);
531
0
                    commonTertiaries = 0;
532
0
                }
533
0
                if(t > Collation::COMMON_WEIGHT16) { t += 0x4000; }
534
0
                tertiaries.appendWeight16(t);
535
122k
            } else {
536
                // Tertiary weights with caseFirst=upperFirst.
537
                // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
538
                // to keep tertiary CEs well-formed.
539
                // Their case+tertiary weights must be greater than those of
540
                // primary and secondary CEs.
541
                //
542
                // Separator         01 -> 01      (unchanged)
543
                // Lowercase     02..04 -> 82..84  (includes uncased)
544
                // Common weight     05 -> 85..C5  (common-weight compression range)
545
                // Lowercase     06..3F -> C6..FF
546
                // Mixed case    42..7F -> 42..7F
547
                // Uppercase     82..BF -> 02..3F
548
                // Tertiary CE   86..BF -> C6..FF
549
122k
                if(t <= Collation::NO_CE_WEIGHT16) {
550
                    // Keep separators unchanged.
551
122k
                } else if(lower32 > 0xffff) {
552
                    // Invert case bits of primary & secondary CEs.
553
120k
                    t ^= 0xc000;
554
120k
                    if(t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
555
4.02k
                        t -= 0x4000;
556
4.02k
                    }
557
120k
                } else {
558
                    // Keep uppercase bits of tertiary CEs.
559
2.35k
                    U_ASSERT(0x8600 <= t && t <= 0xbfff);
560
2.35k
                    t += 0x4000;
561
2.35k
                }
562
122k
                if(commonTertiaries != 0) {
563
118k
                    --commonTertiaries;
564
123k
                    while(commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
565
5.79k
                        tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
566
5.79k
                        commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
567
5.79k
                    }
568
118k
                    uint32_t b;
569
118k
                    if(t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
570
3.71k
                        b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
571
114k
                    } else {
572
114k
                        b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
573
114k
                    }
574
118k
                    tertiaries.appendByte(b);
575
118k
                    commonTertiaries = 0;
576
118k
                }
577
122k
                tertiaries.appendWeight16(t);
578
122k
            }
579
2.50M
        }
580
581
2.50M
        if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
582
556k
            uint32_t q = lower32 & 0xffff;
583
556k
            if((q & 0xc0) == 0 && q > Collation::NO_CE_WEIGHT16) {
584
542k
                ++commonQuaternaries;
585
542k
            } else if(q == Collation::NO_CE_WEIGHT16 &&
586
454
                    (options & CollationSettings::ALTERNATE_MASK) == 0 &&
587
268
                    quaternaries.isEmpty()) {
588
                // If alternate=non-ignorable and there are only common quaternary weights,
589
                // then we need not write anything.
590
                // The only weights greater than the merge separator and less than the common weight
591
                // are shifted primary weights, which are not generated for alternate=non-ignorable.
592
                // There are also exactly as many quaternary weights as tertiary weights,
593
                // so level length differences are handled already on tertiary level.
594
                // Any above-common quaternary weight will compare greater regardless.
595
212
                quaternaries.appendByte(Collation::LEVEL_SEPARATOR_BYTE);
596
13.9k
            } else {
597
13.9k
                if(q == Collation::NO_CE_WEIGHT16) {
598
242
                    q = Collation::LEVEL_SEPARATOR_BYTE;
599
13.7k
                } else {
600
13.7k
                    q = 0xfc + ((q >> 6) & 3);
601
13.7k
                }
602
13.9k
                if(commonQuaternaries != 0) {
603
10.2k
                    --commonQuaternaries;
604
11.6k
                    while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
605
1.40k
                        quaternaries.appendByte(QUAT_COMMON_MIDDLE);
606
1.40k
                        commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
607
1.40k
                    }
608
10.2k
                    uint32_t b;
609
10.2k
                    if(q < QUAT_COMMON_LOW) {
610
222
                        b = QUAT_COMMON_LOW + commonQuaternaries;
611
9.99k
                    } else {
612
9.99k
                        b = QUAT_COMMON_HIGH - commonQuaternaries;
613
9.99k
                    }
614
10.2k
                    quaternaries.appendByte(b);
615
10.2k
                    commonQuaternaries = 0;
616
10.2k
                }
617
13.9k
                quaternaries.appendByte(q);
618
13.9k
            }
619
556k
        }
620
621
2.50M
        if((lower32 >> 24) == Collation::LEVEL_SEPARATOR_BYTE) { break; }  // ce == NO_CE
622
2.50M
    }
623
624
8.18k
    if(U_FAILURE(errorCode)) { return; }
625
626
    // Append the beyond-primary levels.
627
8.18k
    UBool ok = true;
628
8.18k
    if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
629
8.16k
        if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
630
8.16k
        ok &= secondaries.isOk();
631
8.16k
        sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
632
8.16k
        secondaries.appendTo(sink);
633
8.16k
    }
634
635
8.18k
    if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
636
0
        if(!callback.needToWrite(Collation::CASE_LEVEL)) { return; }
637
0
        ok &= cases.isOk();
638
0
        sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
639
        // Write pairs of nibbles as bytes, except separator bytes as themselves.
640
0
        int32_t length = cases.length() - 1;  // Ignore the trailing NO_CE.
641
0
        uint8_t b = 0;
642
0
        for(int32_t i = 0; i < length; ++i) {
643
0
            uint8_t c = cases[i];
644
0
            U_ASSERT((c & 0xf) == 0 && c != 0);
645
0
            if(b == 0) {
646
0
                b = c;
647
0
            } else {
648
0
                sink.Append(b | (c >> 4));
649
0
                b = 0;
650
0
            }
651
0
        }
652
0
        if(b != 0) {
653
0
            sink.Append(b);
654
0
        }
655
0
    }
656
657
8.18k
    if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
658
8.16k
        if(!callback.needToWrite(Collation::TERTIARY_LEVEL)) { return; }
659
8.16k
        ok &= tertiaries.isOk();
660
8.16k
        sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
661
8.16k
        tertiaries.appendTo(sink);
662
8.16k
    }
663
664
8.18k
    if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
665
454
        if(!callback.needToWrite(Collation::QUATERNARY_LEVEL)) { return; }
666
454
        ok &= quaternaries.isOk();
667
454
        sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
668
454
        quaternaries.appendTo(sink);
669
454
    }
670
671
8.18k
    if(!ok || !sink.IsOk()) {
672
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
673
0
    }
674
8.18k
}
675
676
U_NAMESPACE_END
677
678
#endif  // !UCONFIG_NO_COLLATION