Coverage Report

Created: 2026-01-25 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/umutablecptrie.cpp
Line
Count
Source
1
// © 2017 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// umutablecptrie.cpp (inspired by utrie2_builder.cpp)
5
// created: 2017dec29 Markus W. Scherer
6
7
// #define UCPTRIE_DEBUG
8
#ifdef UCPTRIE_DEBUG
9
#   include <stdio.h>
10
#endif
11
12
#include "unicode/utypes.h"
13
#include "unicode/ucptrie.h"
14
#include "unicode/umutablecptrie.h"
15
#include "unicode/uobject.h"
16
#include "unicode/utf16.h"
17
#include "cmemory.h"
18
#include "uassert.h"
19
#include "ucptrie_impl.h"
20
21
// ICU-20235 In case Microsoft math.h has defined this, undefine it.
22
#ifdef OVERFLOW
23
#undef OVERFLOW
24
#endif
25
26
U_NAMESPACE_BEGIN
27
28
namespace {
29
30
constexpr int32_t MAX_UNICODE = 0x10ffff;
31
32
constexpr int32_t UNICODE_LIMIT = 0x110000;
33
constexpr int32_t BMP_LIMIT = 0x10000;
34
constexpr int32_t ASCII_LIMIT = 0x80;
35
36
constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3;
37
constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3;
38
constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3;
39
40
constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3));
41
42
// Flag values for data blocks.
43
constexpr uint8_t ALL_SAME = 0;
44
constexpr uint8_t MIXED = 1;
45
constexpr uint8_t SAME_AS = 2;
46
47
/** Start with allocation of 16k data entries. */
48
constexpr int32_t INITIAL_DATA_LENGTH = static_cast<int32_t>(1) << 14;
49
50
/** Grow about 8x each time. */
51
constexpr int32_t MEDIUM_DATA_LENGTH = static_cast<int32_t>(1) << 17;
52
53
/**
54
 * Maximum length of the build-time data array.
55
 * One entry per 0x110000 code points.
56
 */
57
constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT;
58
59
// Flag values for index-3 blocks while compacting/building.
60
constexpr uint8_t I3_NULL = 0;
61
constexpr uint8_t I3_BMP = 1;
62
constexpr uint8_t I3_16 = 2;
63
constexpr uint8_t I3_18 = 3;
64
65
constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
66
67
class AllSameBlocks;
68
class MixedBlocks;
69
70
class MutableCodePointTrie : public UMemory {
71
public:
72
    MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode);
73
    MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode);
74
    MutableCodePointTrie(const MutableCodePointTrie &other) = delete;
75
    ~MutableCodePointTrie();
76
77
    MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
78
79
    static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);
80
    static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
81
82
    uint32_t get(UChar32 c) const;
83
    int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,
84
                     uint32_t *pValue) const;
85
86
    void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
87
    void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode);
88
89
    UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode);
90
91
private:
92
    void clear();
93
94
    bool ensureHighStart(UChar32 c);
95
    int32_t allocDataBlock(int32_t blockLength);
96
    int32_t getDataBlock(int32_t i);
97
98
    void maskValues(uint32_t mask);
99
    UChar32 findHighStart() const;
100
    int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
101
    int32_t compactData(
102
            int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
103
            int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
104
    int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
105
    int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
106
107
    uint32_t *index = nullptr;
108
    int32_t indexCapacity = 0;
109
    int32_t index3NullOffset = -1;
110
    uint32_t *data = nullptr;
111
    int32_t dataCapacity = 0;
112
    int32_t dataLength = 0;
113
    int32_t dataNullOffset = -1;
114
115
    uint32_t origInitialValue;
116
    uint32_t initialValue;
117
    uint32_t errorValue;
118
    UChar32 highStart;
119
    uint32_t highValue;
120
#ifdef UCPTRIE_DEBUG
121
public:
122
    const char *name;
123
#endif
124
private:
125
    /** Temporary array while building the final data. */
126
    uint16_t *index16 = nullptr;
127
    uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3];
128
};
129
130
MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) :
131
2.59k
        origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue),
132
2.59k
        highStart(0), highValue(initialValue)
133
#ifdef UCPTRIE_DEBUG
134
        , name("open")
135
#endif
136
2.59k
        {
137
2.59k
    if (U_FAILURE(errorCode)) { return; }
138
2.59k
    index = static_cast<uint32_t*>(uprv_malloc(BMP_I_LIMIT * 4));
139
2.59k
    data = static_cast<uint32_t*>(uprv_malloc(INITIAL_DATA_LENGTH * 4));
140
2.59k
    if (index == nullptr || data == nullptr) {
141
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
142
0
        return;
143
0
    }
144
2.59k
    indexCapacity = BMP_I_LIMIT;
145
2.59k
    dataCapacity = INITIAL_DATA_LENGTH;
146
2.59k
}
147
148
MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) :
149
0
        index3NullOffset(other.index3NullOffset),
150
0
        dataNullOffset(other.dataNullOffset),
151
0
        origInitialValue(other.origInitialValue), initialValue(other.initialValue),
152
0
        errorValue(other.errorValue),
153
0
        highStart(other.highStart), highValue(other.highValue)
154
#ifdef UCPTRIE_DEBUG
155
        , name("mutable clone")
156
#endif
157
0
        {
158
0
    if (U_FAILURE(errorCode)) { return; }
159
0
    int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
160
0
    index = static_cast<uint32_t*>(uprv_malloc(iCapacity * 4));
161
0
    data = static_cast<uint32_t*>(uprv_malloc(other.dataCapacity * 4));
162
0
    if (index == nullptr || data == nullptr) {
163
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
164
0
        return;
165
0
    }
166
0
    indexCapacity = iCapacity;
167
0
    dataCapacity = other.dataCapacity;
168
169
0
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
170
0
    uprv_memcpy(flags, other.flags, iLimit);
171
0
    uprv_memcpy(index, other.index, iLimit * 4);
172
0
    uprv_memcpy(data, other.data, (size_t)other.dataLength * 4);
173
0
    dataLength = other.dataLength;
174
0
    U_ASSERT(other.index16 == nullptr);
175
0
}
176
177
2.59k
MutableCodePointTrie::~MutableCodePointTrie() {
178
2.59k
    uprv_free(index);
179
2.59k
    uprv_free(data);
180
2.59k
    uprv_free(index16);
181
2.59k
}
182
183
0
MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) {
184
    // Use the highValue as the initialValue to reduce the highStart.
185
0
    uint32_t errorValue = ucpmap_get(map, -1);
186
0
    uint32_t initialValue = ucpmap_get(map, 0x10ffff);
187
0
    LocalPointer<MutableCodePointTrie> mutableTrie(
188
0
        new MutableCodePointTrie(initialValue, errorValue, errorCode),
189
0
        errorCode);
190
0
    if (U_FAILURE(errorCode)) {
191
0
        return nullptr;
192
0
    }
193
0
    UChar32 start = 0, end;
194
0
    uint32_t value;
195
0
    while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
196
0
                                  nullptr, nullptr, &value)) >= 0) {
197
0
        if (value != initialValue) {
198
0
            if (start == end) {
199
0
                mutableTrie->set(start, value, errorCode);
200
0
            } else {
201
0
                mutableTrie->setRange(start, end, value, errorCode);
202
0
            }
203
0
        }
204
0
        start = end + 1;
205
0
    }
206
0
    if (U_SUCCESS(errorCode)) {
207
0
        return mutableTrie.orphan();
208
0
    } else {
209
0
        return nullptr;
210
0
    }
211
0
}
212
213
0
MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
214
    // Use the highValue as the initialValue to reduce the highStart.
215
0
    uint32_t errorValue;
216
0
    uint32_t initialValue;
217
0
    switch (trie->valueWidth) {
218
0
    case UCPTRIE_VALUE_BITS_16:
219
0
        errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
220
0
        initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
221
0
        break;
222
0
    case UCPTRIE_VALUE_BITS_32:
223
0
        errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
224
0
        initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
225
0
        break;
226
0
    case UCPTRIE_VALUE_BITS_8:
227
0
        errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
228
0
        initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
229
0
        break;
230
0
    default:
231
        // Unreachable if the trie is properly initialized.
232
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
233
0
        return nullptr;
234
0
    }
235
0
    LocalPointer<MutableCodePointTrie> mutableTrie(
236
0
        new MutableCodePointTrie(initialValue, errorValue, errorCode),
237
0
        errorCode);
238
0
    if (U_FAILURE(errorCode)) {
239
0
        return nullptr;
240
0
    }
241
0
    UChar32 start = 0, end;
242
0
    uint32_t value;
243
0
    while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
244
0
                                   nullptr, nullptr, &value)) >= 0) {
245
0
        if (value != initialValue) {
246
0
            if (start == end) {
247
0
                mutableTrie->set(start, value, errorCode);
248
0
            } else {
249
0
                mutableTrie->setRange(start, end, value, errorCode);
250
0
            }
251
0
        }
252
0
        start = end + 1;
253
0
    }
254
0
    if (U_SUCCESS(errorCode)) {
255
0
        return mutableTrie.orphan();
256
0
    } else {
257
0
        return nullptr;
258
0
    }
259
0
}
260
261
2.59k
void MutableCodePointTrie::clear() {
262
2.59k
    index3NullOffset = dataNullOffset = -1;
263
2.59k
    dataLength = 0;
264
2.59k
    highValue = initialValue = origInitialValue;
265
2.59k
    highStart = 0;
266
2.59k
    uprv_free(index16);
267
2.59k
    index16 = nullptr;
268
2.59k
}
269
270
341k
uint32_t MutableCodePointTrie::get(UChar32 c) const {
271
341k
    if (static_cast<uint32_t>(c) > MAX_UNICODE) {
272
0
        return errorValue;
273
0
    }
274
341k
    if (c >= highStart) {
275
1.88k
        return highValue;
276
1.88k
    }
277
340k
    int32_t i = c >> UCPTRIE_SHIFT_3;
278
340k
    if (flags[i] == ALL_SAME) {
279
166k
        return index[i];
280
173k
    } else {
281
173k
        return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)];
282
173k
    }
283
340k
}
284
285
inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
286
0
                                 UCPMapValueFilter *filter, const void *context) {
287
0
    if (value == initialValue) {
288
0
        value = nullValue;
289
0
    } else if (filter != nullptr) {
290
0
        value = filter(context, value);
291
0
    }
292
0
    return value;
293
0
}
294
295
UChar32 MutableCodePointTrie::getRange(
296
        UChar32 start, UCPMapValueFilter *filter, const void *context,
297
0
        uint32_t *pValue) const {
298
0
    if (static_cast<uint32_t>(start) > MAX_UNICODE) {
299
0
        return U_SENTINEL;
300
0
    }
301
0
    if (start >= highStart) {
302
0
        if (pValue != nullptr) {
303
0
            uint32_t value = highValue;
304
0
            if (filter != nullptr) { value = filter(context, value); }
305
0
            *pValue = value;
306
0
        }
307
0
        return MAX_UNICODE;
308
0
    }
309
0
    uint32_t nullValue = initialValue;
310
0
    if (filter != nullptr) { nullValue = filter(context, nullValue); }
311
0
    UChar32 c = start;
312
0
    uint32_t trieValue, value;
313
0
    bool haveValue = false;
314
0
    int32_t i = c >> UCPTRIE_SHIFT_3;
315
0
    do {
316
0
        if (flags[i] == ALL_SAME) {
317
0
            uint32_t trieValue2 = index[i];
318
0
            if (haveValue) {
319
0
                if (trieValue2 != trieValue) {
320
0
                    if (filter == nullptr ||
321
0
                            maybeFilterValue(trieValue2, initialValue, nullValue,
322
0
                                             filter, context) != value) {
323
0
                        return c - 1;
324
0
                    }
325
0
                    trieValue = trieValue2;  // may or may not help
326
0
                }
327
0
            } else {
328
0
                trieValue = trieValue2;
329
0
                value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
330
0
                if (pValue != nullptr) { *pValue = value; }
331
0
                haveValue = true;
332
0
            }
333
0
            c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
334
0
        } else /* MIXED */ {
335
0
            int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
336
0
            uint32_t trieValue2 = data[di];
337
0
            if (haveValue) {
338
0
                if (trieValue2 != trieValue) {
339
0
                    if (filter == nullptr ||
340
0
                            maybeFilterValue(trieValue2, initialValue, nullValue,
341
0
                                             filter, context) != value) {
342
0
                        return c - 1;
343
0
                    }
344
0
                    trieValue = trieValue2;  // may or may not help
345
0
                }
346
0
            } else {
347
0
                trieValue = trieValue2;
348
0
                value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
349
0
                if (pValue != nullptr) { *pValue = value; }
350
0
                haveValue = true;
351
0
            }
352
0
            while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
353
0
                trieValue2 = data[++di];
354
0
                if (trieValue2 != trieValue) {
355
0
                    if (filter == nullptr ||
356
0
                            maybeFilterValue(trieValue2, initialValue, nullValue,
357
0
                                             filter, context) != value) {
358
0
                        return c - 1;
359
0
                    }
360
0
                }
361
0
                trieValue = trieValue2;  // may or may not help
362
0
            }
363
0
        }
364
0
        ++i;
365
0
    } while (c < highStart);
366
0
    U_ASSERT(haveValue);
367
0
    if (maybeFilterValue(highValue, initialValue, nullValue,
368
0
                         filter, context) != value) {
369
0
        return c - 1;
370
0
    } else {
371
0
        return MAX_UNICODE;
372
0
    }
373
0
}
374
375
void
376
606k
writeBlock(uint32_t *block, uint32_t value) {
377
606k
    uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
378
10.3M
    while (block < limit) {
379
9.70M
        *block++ = value;
380
9.70M
    }
381
606k
}
382
383
834k
bool MutableCodePointTrie::ensureHighStart(UChar32 c) {
384
834k
    if (c >= highStart) {
385
        // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
386
79.5k
        c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
387
79.5k
        int32_t i = highStart >> UCPTRIE_SHIFT_3;
388
79.5k
        int32_t iLimit = c >> UCPTRIE_SHIFT_3;
389
79.5k
        if (iLimit > indexCapacity) {
390
2.59k
            uint32_t* newIndex = static_cast<uint32_t*>(uprv_malloc(I_LIMIT * 4));
391
2.59k
            if (newIndex == nullptr) { return false; }
392
2.59k
            uprv_memcpy(newIndex, index, i * 4);
393
2.59k
            uprv_free(index);
394
2.59k
            index = newIndex;
395
2.59k
            indexCapacity = I_LIMIT;
396
2.59k
        }
397
179M
        do {
398
179M
            flags[i] = ALL_SAME;
399
179M
            index[i] = initialValue;
400
179M
        } while(++i < iLimit);
401
79.5k
        highStart = c;
402
79.5k
    }
403
834k
    return true;
404
834k
}
405
406
242k
int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) {
407
242k
    int32_t newBlock = dataLength;
408
242k
    int32_t newTop = newBlock + blockLength;
409
242k
    if (newTop > dataCapacity) {
410
101
        int32_t capacity;
411
101
        if (dataCapacity < MEDIUM_DATA_LENGTH) {
412
101
            capacity = MEDIUM_DATA_LENGTH;
413
101
        } else if (dataCapacity < MAX_DATA_LENGTH) {
414
0
            capacity = MAX_DATA_LENGTH;
415
0
        } else {
416
            // Should never occur.
417
            // Either MAX_DATA_LENGTH is incorrect,
418
            // or the code writes more values than should be possible.
419
0
            return -1;
420
0
        }
421
101
        uint32_t* newData = static_cast<uint32_t*>(uprv_malloc(capacity * 4));
422
101
        if (newData == nullptr) {
423
0
            return -1;
424
0
        }
425
101
        uprv_memcpy(newData, data, (size_t)dataLength * 4);
426
101
        uprv_free(data);
427
101
        data = newData;
428
101
        dataCapacity = capacity;
429
101
    }
430
242k
    dataLength = newTop;
431
242k
    return newBlock;
432
242k
}
433
434
/**
435
 * No error checking for illegal arguments.
436
 *
437
 * @return -1 if no new data block available (out of memory in data array)
438
 * @internal
439
 */
440
975k
int32_t MutableCodePointTrie::getDataBlock(int32_t i) {
441
975k
    if (flags[i] == MIXED) {
442
733k
        return index[i];
443
733k
    }
444
242k
    if (i < BMP_I_LIMIT) {
445
121k
        int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH);
446
121k
        if (newBlock < 0) { return newBlock; }
447
121k
        int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);
448
121k
        int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
449
485k
        do {
450
485k
            U_ASSERT(flags[iStart] == ALL_SAME);
451
485k
            writeBlock(data + newBlock, index[iStart]);
452
485k
            flags[iStart] = MIXED;
453
485k
            index[iStart++] = newBlock;
454
485k
            newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
455
485k
        } while (iStart < iLimit);
456
121k
        return index[i];
457
121k
    } else {
458
121k
        int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH);
459
121k
        if (newBlock < 0) { return newBlock; }
460
121k
        writeBlock(data + newBlock, index[i]);
461
121k
        flags[i] = MIXED;
462
121k
        index[i] = newBlock;
463
121k
        return newBlock;
464
121k
    }
465
242k
}
466
467
4.83k
void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) {
468
4.83k
    if (U_FAILURE(errorCode)) {
469
0
        return;
470
0
    }
471
4.83k
    if (static_cast<uint32_t>(c) > MAX_UNICODE) {
472
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
473
0
        return;
474
0
    }
475
476
4.83k
    int32_t block;
477
4.83k
    if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) {
478
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
479
0
        return;
480
0
    }
481
482
4.83k
    data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value;
483
4.83k
}
484
485
void
486
1.12M
fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) {
487
1.12M
    uint32_t *pLimit = block + limit;
488
1.12M
    block += start;
489
8.44M
    while (block < pLimit) {
490
7.32M
        *block++ = value;
491
7.32M
    }
492
1.12M
}
493
494
829k
void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) {
495
829k
    if (U_FAILURE(errorCode)) {
496
0
        return;
497
0
    }
498
829k
    if (static_cast<uint32_t>(start) > MAX_UNICODE || static_cast<uint32_t>(end) > MAX_UNICODE || start > end) {
499
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
500
0
        return;
501
0
    }
502
829k
    if (!ensureHighStart(end)) {
503
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
504
0
        return;
505
0
    }
506
507
829k
    UChar32 limit = end + 1;
508
829k
    if (start & UCPTRIE_SMALL_DATA_MASK) {
509
        // Set partial block at [start..following block boundary[.
510
662k
        int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
511
662k
        if (block < 0) {
512
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
513
0
            return;
514
0
        }
515
516
662k
        UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK;
517
662k
        if (nextStart <= limit) {
518
305k
            fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH,
519
305k
                      value);
520
305k
            start = nextStart;
521
357k
        } else {
522
357k
            fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK,
523
357k
                      value);
524
357k
            return;
525
357k
        }
526
662k
    }
527
528
    // Number of positions in the last, partial block.
529
471k
    int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK;
530
531
    // Round down limit to a block boundary.
532
471k
    limit &= ~UCPTRIE_SMALL_DATA_MASK;
533
534
    // Iterate over all-value blocks.
535
179M
    while (start < limit) {
536
178M
        int32_t i = start >> UCPTRIE_SHIFT_3;
537
178M
        if (flags[i] == ALL_SAME) {
538
178M
            index[i] = value;
539
178M
        } else /* MIXED */ {
540
152k
            fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value);
541
152k
        }
542
178M
        start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
543
178M
    }
544
545
471k
    if (rest > 0) {
546
        // Set partial block at [last block boundary..limit[.
547
306k
        int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
548
306k
        if (block < 0) {
549
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
550
0
            return;
551
0
        }
552
553
306k
        fillBlock(data + block, 0, rest, value);
554
306k
    }
555
471k
}
556
557
/* compaction --------------------------------------------------------------- */
558
559
2.59k
void MutableCodePointTrie::maskValues(uint32_t mask) {
560
2.59k
    initialValue &= mask;
561
2.59k
    errorValue &= mask;
562
2.59k
    highValue &= mask;
563
2.59k
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
564
179M
    for (int32_t i = 0; i < iLimit; ++i) {
565
179M
        if (flags[i] == ALL_SAME) {
566
179M
            index[i] &= mask;
567
179M
        }
568
179M
    }
569
9.55M
    for (int32_t i = 0; i < dataLength; ++i) {
570
9.55M
        data[i] &= mask;
571
9.55M
    }
572
2.59k
}
573
574
template<typename UIntA, typename UIntB>
575
11.9M
bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
576
150M
    while (length > 0 && *s == *t) {
577
138M
        ++s;
578
138M
        ++t;
579
138M
        --length;
580
138M
    }
581
11.9M
    return length == 0;
582
11.9M
}
umutablecptrie.cpp:bool icu_79::(anonymous namespace)::equalBlocks<unsigned int, unsigned int>(unsigned int const*, unsigned int const*, int)
Line
Count
Source
575
9.63M
bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
576
121M
    while (length > 0 && *s == *t) {
577
111M
        ++s;
578
111M
        ++t;
579
111M
        --length;
580
111M
    }
581
9.63M
    return length == 0;
582
9.63M
}
umutablecptrie.cpp:bool icu_79::(anonymous namespace)::equalBlocks<unsigned short, unsigned short>(unsigned short const*, unsigned short const*, int)
Line
Count
Source
575
1.49M
bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
576
21.7M
    while (length > 0 && *s == *t) {
577
20.2M
        ++s;
578
20.2M
        ++t;
579
20.2M
        --length;
580
20.2M
    }
581
1.49M
    return length == 0;
582
1.49M
}
umutablecptrie.cpp:bool icu_79::(anonymous namespace)::equalBlocks<unsigned short, unsigned int>(unsigned short const*, unsigned int const*, int)
Line
Count
Source
575
822k
bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
576
7.30M
    while (length > 0 && *s == *t) {
577
6.48M
        ++s;
578
6.48M
        ++t;
579
6.48M
        --length;
580
6.48M
    }
581
822k
    return length == 0;
582
822k
}
583
584
249k
bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) {
585
249k
    const uint32_t *pLimit = p + length;
586
3.96M
    while (p < pLimit && *p == value) { ++p; }
587
249k
    return p == pLimit;
588
249k
}
589
590
/** Search for an identical block. */
591
int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
592
825
                      const uint16_t *q, int32_t qStart, int32_t blockLength) {
593
    // Ensure that we do not even partially get past length.
594
825
    length -= blockLength;
595
596
825
    q += qStart;
597
735k
    while (pStart <= length) {
598
734k
        if (equalBlocks(p + pStart, q, blockLength)) {
599
9
            return pStart;
600
9
        }
601
734k
        ++pStart;
602
734k
    }
603
816
    return -1;
604
825
}
605
606
int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
607
60
                         uint32_t value, int32_t blockLength) {
608
    // Ensure that we do not even partially get past limit.
609
60
    limit -= blockLength;
610
611
124k
    for (int32_t block = start; block <= limit; ++block) {
612
124k
        if (p[block] == value) {
613
5.85k
            for (int32_t i = 1;; ++i) {
614
5.85k
                if (i == blockLength) {
615
38
                    return block;
616
38
                }
617
5.81k
                if (p[block + i] != value) {
618
1.62k
                    block += i;
619
1.62k
                    break;
620
1.62k
                }
621
5.81k
            }
622
1.66k
        }
623
124k
    }
624
22
    return -1;
625
60
}
626
627
/**
628
 * Look for maximum overlap of the beginning of the other block
629
 * with the previous, adjacent block.
630
 */
631
template<typename UIntA, typename UIntB>
632
int32_t getOverlap(const UIntA *p, int32_t length,
633
151k
                   const UIntB *q, int32_t qStart, int32_t blockLength) {
634
151k
    int32_t overlap = blockLength - 1;
635
151k
    U_ASSERT(overlap <= length);
636
151k
    q += qStart;
637
6.09M
    while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
638
5.94M
        --overlap;
639
5.94M
    }
640
151k
    return overlap;
641
151k
}
umutablecptrie.cpp:int icu_79::(anonymous namespace)::getOverlap<unsigned int, unsigned int>(unsigned int const*, int, unsigned int const*, int, int)
Line
Count
Source
633
122k
                   const UIntB *q, int32_t qStart, int32_t blockLength) {
634
122k
    int32_t overlap = blockLength - 1;
635
122k
    U_ASSERT(overlap <= length);
636
122k
    q += qStart;
637
5.30M
    while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
638
5.18M
        --overlap;
639
5.18M
    }
640
122k
    return overlap;
641
122k
}
umutablecptrie.cpp:int icu_79::(anonymous namespace)::getOverlap<unsigned short, unsigned int>(unsigned short const*, int, unsigned int const*, int, int)
Line
Count
Source
633
24.1k
                   const UIntB *q, int32_t qStart, int32_t blockLength) {
634
24.1k
    int32_t overlap = blockLength - 1;
635
24.1k
    U_ASSERT(overlap <= length);
636
24.1k
    q += qStart;
637
659k
    while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
638
634k
        --overlap;
639
634k
    }
640
24.1k
    return overlap;
641
24.1k
}
umutablecptrie.cpp:int icu_79::(anonymous namespace)::getOverlap<unsigned short, unsigned short>(unsigned short const*, int, unsigned short const*, int, int)
Line
Count
Source
633
5.55k
                   const UIntB *q, int32_t qStart, int32_t blockLength) {
634
5.55k
    int32_t overlap = blockLength - 1;
635
5.55k
    U_ASSERT(overlap <= length);
636
5.55k
    q += qStart;
637
132k
    while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
638
127k
        --overlap;
639
127k
    }
640
5.55k
    return overlap;
641
5.55k
}
642
643
int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,
644
2.19k
                          int32_t blockLength) {
645
2.19k
    int32_t min = length - (blockLength - 1);
646
2.19k
    int32_t i = length;
647
34.2k
    while (min < i && p[i - 1] == value) { --i; }
648
2.19k
    return length - i;
649
2.19k
}
650
651
125
bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) {
652
78.1k
    for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
653
78.1k
        if (index[i] == dataOffset) {
654
60
            return true;
655
60
        }
656
78.1k
    }
657
65
    return false;
658
125
}
659
660
/**
661
 * Finds the start of the last range in the trie by enumerating backward.
662
 * Indexes for code points higher than this will be omitted.
663
 */
664
2.59k
UChar32 MutableCodePointTrie::findHighStart() const {
665
2.59k
    int32_t i = highStart >> UCPTRIE_SHIFT_3;
666
138M
    while (i > 0) {
667
138M
        bool match;
668
138M
        if (flags[--i] == ALL_SAME) {
669
138M
            match = index[i] == highValue;
670
138M
        } else /* MIXED */ {
671
7.86k
            const uint32_t *p = data + index[i];
672
112k
            for (int32_t j = 0;; ++j) {
673
112k
                if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) {
674
5.47k
                    match = true;
675
5.47k
                    break;
676
5.47k
                }
677
106k
                if (p[j] != highValue) {
678
2.38k
                    match = false;
679
2.38k
                    break;
680
2.38k
                }
681
106k
            }
682
7.86k
        }
683
138M
        if (!match) {
684
2.55k
            return (i + 1) << UCPTRIE_SHIFT_3;
685
2.55k
        }
686
138M
    }
687
44
    return 0;
688
2.59k
}
689
690
class AllSameBlocks {
691
public:
692
    static constexpr int32_t NEW_UNIQUE = -1;
693
    static constexpr int32_t OVERFLOW = -2;
694
695
2.59k
    AllSameBlocks() : length(0), mostRecent(-1) {}
696
697
35.0M
    int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) {
698
35.0M
        if (mostRecent >= 0 && values[mostRecent] == value) {
699
34.9M
            refCounts[mostRecent] += count;
700
34.9M
            return indexes[mostRecent];
701
34.9M
        }
702
178k
        for (int32_t i = 0; i < length; ++i) {
703
173k
            if (values[i] == value) {
704
68.1k
                mostRecent = i;
705
68.1k
                refCounts[i] += count;
706
68.1k
                return indexes[i];
707
68.1k
            }
708
173k
        }
709
4.41k
        if (length == CAPACITY) {
710
443
            return OVERFLOW;
711
443
        }
712
3.97k
        mostRecent = length;
713
3.97k
        indexes[length] = index;
714
3.97k
        values[length] = value;
715
3.97k
        refCounts[length++] = count;
716
3.97k
        return NEW_UNIQUE;
717
4.41k
    }
718
719
    /** Replaces the block which has the lowest reference count. */
720
443
    void add(int32_t index, int32_t count, uint32_t value) {
721
443
        U_ASSERT(length == CAPACITY);
722
443
        int32_t least = -1;
723
443
        int32_t leastCount = I_LIMIT;
724
14.6k
        for (int32_t i = 0; i < length; ++i) {
725
14.1k
            U_ASSERT(values[i] != value);
726
14.1k
            if (refCounts[i] < leastCount) {
727
1.57k
                least = i;
728
1.57k
                leastCount = refCounts[i];
729
1.57k
            }
730
14.1k
        }
731
443
        U_ASSERT(least >= 0);
732
443
        mostRecent = least;
733
443
        indexes[least] = index;
734
443
        values[least] = value;
735
443
        refCounts[least] = count;
736
443
    }
737
738
2.59k
    int32_t findMostUsed() const {
739
2.59k
        if (length == 0) { return -1; }
740
2.59k
        int32_t max = -1;
741
2.59k
        int32_t maxCount = 0;
742
6.56k
        for (int32_t i = 0; i < length; ++i) {
743
3.97k
            if (refCounts[i] > maxCount) {
744
3.08k
                max = i;
745
3.08k
                maxCount = refCounts[i];
746
3.08k
            }
747
3.97k
        }
748
2.59k
        return indexes[max];
749
2.59k
    }
750
751
private:
752
    static constexpr int32_t CAPACITY = 32;
753
754
    int32_t length;
755
    int32_t mostRecent;
756
757
    int32_t indexes[CAPACITY];
758
    uint32_t values[CAPACITY];
759
    int32_t refCounts[CAPACITY];
760
};
761
762
// Custom hash table for mixed-value blocks to be found anywhere in the
763
// compacted data or index so far.
764
class MixedBlocks {
765
public:
766
3.54k
    MixedBlocks() {}
767
3.54k
    ~MixedBlocks() {
768
3.54k
        uprv_free(table);
769
3.54k
    }
770
771
5.43k
    bool init(int32_t maxLength, int32_t newBlockLength) {
772
        // We store actual data indexes + 1 to reserve 0 for empty entries.
773
5.43k
        int32_t maxDataIndex = maxLength - newBlockLength + 1;
774
5.43k
        int32_t newLength;
775
5.43k
        if (maxDataIndex <= 0xfff) {  // 4k
776
3.97k
            newLength = 6007;
777
3.97k
            shift = 12;
778
3.97k
            mask = 0xfff;
779
3.97k
        } else if (maxDataIndex <= 0x7fff) {  // 32k
780
1.38k
            newLength = 50021;
781
1.38k
            shift = 15;
782
1.38k
            mask = 0x7fff;
783
1.38k
        } else if (maxDataIndex <= 0x1ffff) {  // 128k
784
72
            newLength = 200003;
785
72
            shift = 17;
786
72
            mask = 0x1ffff;
787
72
        } else {
788
            // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
789
0
            newLength = 1500007;
790
0
            shift = 21;
791
0
            mask = 0x1fffff;
792
0
        }
793
5.43k
        if (newLength > capacity) {
794
2.66k
            uprv_free(table);
795
2.66k
            table = static_cast<uint32_t*>(uprv_malloc(newLength * 4));
796
2.66k
            if (table == nullptr) {
797
0
                return false;
798
0
            }
799
2.66k
            capacity = newLength;
800
2.66k
        }
801
5.43k
        length = newLength;
802
5.43k
        uprv_memset(table, 0, length * 4);
803
804
5.43k
        blockLength = newBlockLength;
805
5.43k
        return true;
806
5.43k
    }
807
808
    template<typename UInt>
809
159k
    void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
810
159k
        int32_t start = prevDataLength - blockLength;
811
159k
        if (start >= minStart) {
812
154k
            ++start;  // Skip the last block that we added last time.
813
154k
        } else {
814
5.43k
            start = minStart;  // Begin with the first full block.
815
5.43k
        }
816
11.2M
        for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
817
11.1M
            uint32_t hashCode = makeHashCode(data, start);
818
11.1M
            addEntry(data, start, hashCode, start);
819
11.1M
        }
820
159k
    }
umutablecptrie.cpp:void icu_79::(anonymous namespace)::MixedBlocks::extend<unsigned int>(unsigned int const*, int, int, int)
Line
Count
Source
809
127k
    void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
810
127k
        int32_t start = prevDataLength - blockLength;
811
127k
        if (start >= minStart) {
812
124k
            ++start;  // Skip the last block that we added last time.
813
124k
        } else {
814
3.54k
            start = minStart;  // Begin with the first full block.
815
3.54k
        }
816
9.56M
        for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
817
9.43M
            uint32_t hashCode = makeHashCode(data, start);
818
9.43M
            addEntry(data, start, hashCode, start);
819
9.43M
        }
820
127k
    }
umutablecptrie.cpp:void icu_79::(anonymous namespace)::MixedBlocks::extend<unsigned short>(unsigned short const*, int, int, int)
Line
Count
Source
809
31.6k
    void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
810
31.6k
        int32_t start = prevDataLength - blockLength;
811
31.6k
        if (start >= minStart) {
812
29.7k
            ++start;  // Skip the last block that we added last time.
813
29.7k
        } else {
814
1.89k
            start = minStart;  // Begin with the first full block.
815
1.89k
        }
816
1.73M
        for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
817
1.70M
            uint32_t hashCode = makeHashCode(data, start);
818
1.70M
            addEntry(data, start, hashCode, start);
819
1.70M
        }
820
31.6k
    }
821
822
    template<typename UIntA, typename UIntB>
823
538k
    int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
824
538k
        uint32_t hashCode = makeHashCode(blockData, blockStart);
825
538k
        int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
826
538k
        if (entryIndex >= 0) {
827
288k
            return (table[entryIndex] & mask) - 1;
828
288k
        } else {
829
250k
            return -1;
830
250k
        }
831
538k
    }
umutablecptrie.cpp:int icu_79::(anonymous namespace)::MixedBlocks::findBlock<unsigned int, unsigned int>(unsigned int const*, unsigned int const*, int) const
Line
Count
Source
823
238k
    int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
824
238k
        uint32_t hashCode = makeHashCode(blockData, blockStart);
825
238k
        int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
826
238k
        if (entryIndex >= 0) {
827
116k
            return (table[entryIndex] & mask) - 1;
828
122k
        } else {
829
122k
            return -1;
830
122k
        }
831
238k
    }
umutablecptrie.cpp:int icu_79::(anonymous namespace)::MixedBlocks::findBlock<unsigned short, unsigned int>(unsigned short const*, unsigned int const*, int) const
Line
Count
Source
823
268k
    int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
824
268k
        uint32_t hashCode = makeHashCode(blockData, blockStart);
825
268k
        int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
826
268k
        if (entryIndex >= 0) {
827
145k
            return (table[entryIndex] & mask) - 1;
828
145k
        } else {
829
123k
            return -1;
830
123k
        }
831
268k
    }
umutablecptrie.cpp:int icu_79::(anonymous namespace)::MixedBlocks::findBlock<unsigned short, unsigned short>(unsigned short const*, unsigned short const*, int) const
Line
Count
Source
823
31.4k
    int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
824
31.4k
        uint32_t hashCode = makeHashCode(blockData, blockStart);
825
31.4k
        int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
826
31.4k
        if (entryIndex >= 0) {
827
26.6k
            return (table[entryIndex] & mask) - 1;
828
26.6k
        } else {
829
4.75k
            return -1;
830
4.75k
        }
831
31.4k
    }
832
833
2.74k
    int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const {
834
2.74k
        uint32_t hashCode = makeHashCode(blockValue);
835
2.74k
        int32_t entryIndex = findEntry(data, blockValue, hashCode);
836
2.74k
        if (entryIndex >= 0) {
837
572
            return (table[entryIndex] & mask) - 1;
838
2.17k
        } else {
839
2.17k
            return -1;
840
2.17k
        }
841
2.74k
    }
842
843
private:
844
    template<typename UInt>
845
11.6M
    uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
846
11.6M
        int32_t blockLimit = blockStart + blockLength;
847
11.6M
        uint32_t hashCode = blockData[blockStart++];
848
462M
        do {
849
462M
            hashCode = 37 * hashCode + blockData[blockStart++];
850
462M
        } while (blockStart < blockLimit);
851
11.6M
        return hashCode;
852
11.6M
    }
umutablecptrie.cpp:unsigned int icu_79::(anonymous namespace)::MixedBlocks::makeHashCode<unsigned int>(unsigned int const*, int) const
Line
Count
Source
845
9.93M
    uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
846
9.93M
        int32_t blockLimit = blockStart + blockLength;
847
9.93M
        uint32_t hashCode = blockData[blockStart++];
848
408M
        do {
849
408M
            hashCode = 37 * hashCode + blockData[blockStart++];
850
408M
        } while (blockStart < blockLimit);
851
9.93M
        return hashCode;
852
9.93M
    }
umutablecptrie.cpp:unsigned int icu_79::(anonymous namespace)::MixedBlocks::makeHashCode<unsigned short>(unsigned short const*, int) const
Line
Count
Source
845
1.73M
    uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
846
1.73M
        int32_t blockLimit = blockStart + blockLength;
847
1.73M
        uint32_t hashCode = blockData[blockStart++];
848
53.8M
        do {
849
53.8M
            hashCode = 37 * hashCode + blockData[blockStart++];
850
53.8M
        } while (blockStart < blockLimit);
851
1.73M
        return hashCode;
852
1.73M
    }
853
854
2.74k
    uint32_t makeHashCode(uint32_t blockValue) const {
855
2.74k
        uint32_t hashCode = blockValue;
856
134k
        for (int32_t i = 1; i < blockLength; ++i) {
857
131k
            hashCode = 37 * hashCode + blockValue;
858
131k
        }
859
2.74k
        return hashCode;
860
2.74k
    }
861
862
    template<typename UInt>
863
11.1M
    void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
864
11.1M
        U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
865
11.1M
        int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
866
11.1M
        if (entryIndex < 0) {
867
7.47M
            table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
868
7.47M
        }
869
11.1M
    }
umutablecptrie.cpp:void icu_79::(anonymous namespace)::MixedBlocks::addEntry<unsigned int>(unsigned int const*, int, unsigned int, int)
Line
Count
Source
863
9.43M
    void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
864
9.43M
        U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
865
9.43M
        int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
866
9.43M
        if (entryIndex < 0) {
867
6.34M
            table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
868
6.34M
        }
869
9.43M
    }
umutablecptrie.cpp:void icu_79::(anonymous namespace)::MixedBlocks::addEntry<unsigned short>(unsigned short const*, int, unsigned int, int)
Line
Count
Source
863
1.70M
    void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
864
1.70M
        U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
865
1.70M
        int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
866
1.70M
        if (entryIndex < 0) {
867
1.13M
            table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
868
1.13M
        }
869
1.70M
    }
870
871
    template<typename UIntA, typename UIntB>
872
    int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,
873
11.6M
                      uint32_t hashCode) const {
874
11.6M
        uint32_t shiftedHashCode = hashCode << shift;
875
11.6M
        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
876
13.7M
        for (int32_t entryIndex = initialEntryIndex;;) {
877
13.7M
            uint32_t entry = table[entryIndex];
878
13.7M
            if (entry == 0) {
879
7.72M
                return ~entryIndex;
880
7.72M
            }
881
6.01M
            if ((entry & ~mask) == shiftedHashCode) {
882
5.16M
                int32_t dataIndex = (entry & mask) - 1;
883
5.16M
                if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
884
3.94M
                    return entryIndex;
885
3.94M
                }
886
5.16M
            }
887
2.06M
            entryIndex = nextIndex(initialEntryIndex, entryIndex);
888
2.06M
        }
889
11.6M
    }
umutablecptrie.cpp:int icu_79::(anonymous namespace)::MixedBlocks::findEntry<unsigned int, unsigned int>(unsigned int const*, unsigned int const*, int, unsigned int) const
Line
Count
Source
873
9.67M
                      uint32_t hashCode) const {
874
9.67M
        uint32_t shiftedHashCode = hashCode << shift;
875
9.67M
        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
876
11.5M
        for (int32_t entryIndex = initialEntryIndex;;) {
877
11.5M
            uint32_t entry = table[entryIndex];
878
11.5M
            if (entry == 0) {
879
6.46M
                return ~entryIndex;
880
6.46M
            }
881
5.10M
            if ((entry & ~mask) == shiftedHashCode) {
882
4.35M
                int32_t dataIndex = (entry & mask) - 1;
883
4.35M
                if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
884
3.20M
                    return entryIndex;
885
3.20M
                }
886
4.35M
            }
887
1.89M
            entryIndex = nextIndex(initialEntryIndex, entryIndex);
888
1.89M
        }
889
9.67M
    }
umutablecptrie.cpp:int icu_79::(anonymous namespace)::MixedBlocks::findEntry<unsigned short, unsigned short>(unsigned short const*, unsigned short const*, int, unsigned int) const
Line
Count
Source
873
1.73M
                      uint32_t hashCode) const {
874
1.73M
        uint32_t shiftedHashCode = hashCode << shift;
875
1.73M
        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
876
1.86M
        for (int32_t entryIndex = initialEntryIndex;;) {
877
1.86M
            uint32_t entry = table[entryIndex];
878
1.86M
            if (entry == 0) {
879
1.14M
                return ~entryIndex;
880
1.14M
            }
881
718k
            if ((entry & ~mask) == shiftedHashCode) {
882
630k
                int32_t dataIndex = (entry & mask) - 1;
883
630k
                if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
884
594k
                    return entryIndex;
885
594k
                }
886
630k
            }
887
123k
            entryIndex = nextIndex(initialEntryIndex, entryIndex);
888
123k
        }
889
1.73M
    }
umutablecptrie.cpp:int icu_79::(anonymous namespace)::MixedBlocks::findEntry<unsigned short, unsigned int>(unsigned short const*, unsigned int const*, int, unsigned int) const
Line
Count
Source
873
268k
                      uint32_t hashCode) const {
874
268k
        uint32_t shiftedHashCode = hashCode << shift;
875
268k
        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
876
313k
        for (int32_t entryIndex = initialEntryIndex;;) {
877
313k
            uint32_t entry = table[entryIndex];
878
313k
            if (entry == 0) {
879
123k
                return ~entryIndex;
880
123k
            }
881
190k
            if ((entry & ~mask) == shiftedHashCode) {
882
176k
                int32_t dataIndex = (entry & mask) - 1;
883
176k
                if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
884
145k
                    return entryIndex;
885
145k
                }
886
176k
            }
887
45.2k
            entryIndex = nextIndex(initialEntryIndex, entryIndex);
888
45.2k
        }
889
268k
    }
890
891
2.74k
    int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const {
892
2.74k
        uint32_t shiftedHashCode = hashCode << shift;
893
2.74k
        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
894
3.42k
        for (int32_t entryIndex = initialEntryIndex;;) {
895
3.42k
            uint32_t entry = table[entryIndex];
896
3.42k
            if (entry == 0) {
897
2.17k
                return ~entryIndex;
898
2.17k
            }
899
1.24k
            if ((entry & ~mask) == shiftedHashCode) {
900
857
                int32_t dataIndex = (entry & mask) - 1;
901
857
                if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) {
902
572
                    return entryIndex;
903
572
                }
904
857
            }
905
677
            entryIndex = nextIndex(initialEntryIndex, entryIndex);
906
677
        }
907
2.74k
    }
908
909
2.06M
    inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const {
910
        // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
911
2.06M
        return (entryIndex + initialEntryIndex) % length;
912
2.06M
    }
913
914
    // Hash table.
915
    // The length is a prime number, larger than the maximum data length.
916
    // The "shift" lower bits store a data index + 1.
917
    // The remaining upper bits store a partial hashCode of the block data values.
918
    uint32_t *table = nullptr;
919
    int32_t capacity = 0;
920
    int32_t length = 0;
921
    int32_t shift = 0;
922
    uint32_t mask = 0;
923
924
    int32_t blockLength = 0;
925
};
926
927
2.59k
int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
928
#ifdef UCPTRIE_DEBUG
929
    bool overflow = false;
930
#endif
931
932
    // ASCII data will be stored as a linear table, even if the following code
933
    // does not yet count it that way.
934
2.59k
    int32_t newDataCapacity = ASCII_LIMIT;
935
    // Add room for a small data null block in case it would match the start of
936
    // a fast data block where dataNullOffset must not be set in that case.
937
2.59k
    newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
938
    // Add room for special values (errorValue, highValue) and padding.
939
2.59k
    newDataCapacity += 4;
940
2.59k
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
941
2.59k
    int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
942
2.59k
    int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
943
35.2M
    for (int32_t i = 0; i < iLimit; i += inc) {
944
35.2M
        if (i == fastILimit) {
945
946
            blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
946
946
            inc = 1;
947
946
        }
948
35.2M
        uint32_t value = index[i];
949
35.2M
        if (flags[i] == MIXED) {
950
            // Really mixed?
951
248k
            const uint32_t *p = data + value;
952
248k
            value = *p;
953
248k
            if (allValuesSameAs(p + 1, blockLength - 1, value)) {
954
8.66k
                flags[i] = ALL_SAME;
955
8.66k
                index[i] = value;
956
                // Fall through to ALL_SAME handling.
957
239k
            } else {
958
239k
                newDataCapacity += blockLength;
959
239k
                continue;
960
239k
            }
961
35.0M
        } else {
962
35.0M
            U_ASSERT(flags[i] == ALL_SAME);
963
35.0M
            if (inc > 1) {
964
                // Do all of the fast-range data block's ALL_SAME parts have the same value?
965
2.51M
                bool allSame = true;
966
2.51M
                int32_t next_i = i + inc;
967
10.0M
                for (int32_t j = i + 1; j < next_i; ++j) {
968
7.54M
                    U_ASSERT(flags[j] == ALL_SAME);
969
7.54M
                    if (index[j] != value) {
970
1.36k
                        allSame = false;
971
1.36k
                        break;
972
1.36k
                    }
973
7.54M
                }
974
2.51M
                if (!allSame) {
975
                    // Turn it into a MIXED block.
976
1.36k
                    if (getDataBlock(i) < 0) {
977
0
                        return -1;
978
0
                    }
979
1.36k
                    newDataCapacity += blockLength;
980
1.36k
                    continue;
981
1.36k
                }
982
2.51M
            }
983
35.0M
        }
984
        // Is there another ALL_SAME block with the same value?
985
35.0M
        int32_t other = allSameBlocks.findOrAdd(i, inc, value);
986
35.0M
        if (other == AllSameBlocks::OVERFLOW) {
987
            // The fixed-size array overflowed. Slow check for a duplicate block.
988
#ifdef UCPTRIE_DEBUG
989
            if (!overflow) {
990
                puts("UCPTrie AllSameBlocks overflow");
991
                overflow = true;
992
            }
993
#endif
994
443
            int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
995
1.86M
            for (int32_t j = 0;; j += jInc) {
996
1.86M
                if (j == i) {
997
431
                    allSameBlocks.add(i, inc, value);
998
431
                    break;
999
431
                }
1000
1.86M
                if (j == fastILimit) {
1001
442
                    jInc = 1;
1002
442
                }
1003
1.86M
                if (flags[j] == ALL_SAME && index[j] == value) {
1004
12
                    allSameBlocks.add(j, jInc + inc, value);
1005
12
                    other = j;
1006
12
                    break;
1007
                    // We could keep counting blocks with the same value
1008
                    // before we add the first one, which may improve compaction in rare cases,
1009
                    // but it would make it slower.
1010
12
                }
1011
1.86M
            }
1012
443
        }
1013
35.0M
        if (other >= 0) {
1014
35.0M
            flags[i] = SAME_AS;
1015
35.0M
            index[i] = other;
1016
35.0M
        } else {
1017
            // New unique same-value block.
1018
4.40k
            newDataCapacity += blockLength;
1019
4.40k
        }
1020
35.0M
    }
1021
2.59k
    return newDataCapacity;
1022
2.59k
}
1023
1024
#ifdef UCPTRIE_DEBUG
1025
#   define DEBUG_DO(expr) expr
1026
#else
1027
#   define DEBUG_DO(expr)
1028
#endif
1029
1030
#ifdef UCPTRIE_DEBUG
1031
// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF
1032
int32_t appendValue(char s[], int32_t length, uint32_t value) {
1033
    value ^= value >> 16;
1034
    value ^= value >> 8;
1035
    s[length] = 0xE2;
1036
    s[length + 1] = (char)(0xA0 + ((value >> 6) & 3));
1037
    s[length + 2] = (char)(0x80 + (value & 0x3F));
1038
    return length + 3;
1039
}
1040
1041
void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,
1042
                UChar32 start, int32_t overlap, uint32_t initialValue) {
1043
    char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3];
1044
    int32_t length = 0;
1045
    int32_t i;
1046
    for (i = 0; i < overlap; ++i) {
1047
        length = appendValue(s, length, 0);  // Braille blank
1048
    }
1049
    s[length++] = '|';
1050
    for (; i < blockLength; ++i) {
1051
        if (block != nullptr) {
1052
            value = block[i];
1053
        }
1054
        if (value == initialValue) {
1055
            value = 0x40;  // Braille lower left dot
1056
        }
1057
        length = appendValue(s, length, value);
1058
    }
1059
    s[length] = 0;
1060
    start += overlap;
1061
    if (start <= 0xffff) {
1062
        printf("    %04lX  %s|\n", (long)start, s);
1063
    } else if (start <= 0xfffff) {
1064
        printf("   %5lX  %s|\n", (long)start, s);
1065
    } else {
1066
        printf("  %6lX  %s|\n", (long)start, s);
1067
    }
1068
}
1069
#endif
1070
1071
/**
1072
 * Compacts a build-time trie.
1073
 *
1074
 * The compaction
1075
 * - removes blocks that are identical with earlier ones
1076
 * - overlaps each new non-duplicate block as much as possible with the previously-written one
1077
 * - works with fast-range data blocks whose length is a multiple of that of
1078
 *   higher-code-point data blocks
1079
 *
1080
 * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
1081
 */
1082
int32_t MutableCodePointTrie::compactData(
1083
        int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
1084
2.59k
        int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) {
1085
#ifdef UCPTRIE_DEBUG
1086
    int32_t countSame=0, sumOverlaps=0;
1087
    bool printData = dataLength == 29088 /* line.brk */ ||
1088
        // dataLength == 30048 /* CanonIterData */ ||
1089
        dataLength == 50400 /* zh.txt~stroke */;
1090
#endif
1091
1092
    // The linear ASCII data has been copied into newData already.
1093
2.59k
    int32_t newDataLength = 0;
1094
7.78k
    for (int32_t i = 0; newDataLength < ASCII_LIMIT;
1095
5.18k
            newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
1096
5.18k
        index[i] = newDataLength;
1097
#ifdef UCPTRIE_DEBUG
1098
        if (printData) {
1099
            printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);
1100
        }
1101
#endif
1102
5.18k
    }
1103
1104
2.59k
    int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
1105
2.59k
    if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1106
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1107
0
        return 0;
1108
0
    }
1109
2.59k
    mixedBlocks.extend(newData, 0, 0, newDataLength);
1110
1111
2.59k
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1112
2.59k
    int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1113
2.59k
    int32_t fastLength = 0;
1114
35.2M
    for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
1115
35.2M
        if (i == fastILimit) {
1116
946
            blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1117
946
            inc = 1;
1118
946
            fastLength = newDataLength;
1119
946
            if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1120
0
                errorCode = U_MEMORY_ALLOCATION_ERROR;
1121
0
                return 0;
1122
0
            }
1123
946
            mixedBlocks.extend(newData, 0, 0, newDataLength);
1124
946
        }
1125
35.2M
        if (flags[i] == ALL_SAME) {
1126
2.74k
            uint32_t value = index[i];
1127
            // Find an earlier part of the data array of length blockLength
1128
            // that is filled with this value.
1129
2.74k
            int32_t n = mixedBlocks.findAllSameBlock(newData, value);
1130
            // If we find a match, and the current block is the data null block,
1131
            // and it is not a fast block but matches the start of a fast block,
1132
            // then we need to continue looking.
1133
            // This is because this small block is shorter than the fast block,
1134
            // and not all of the rest of the fast block is filled with this value.
1135
            // Otherwise trie.getRange() would detect that the fast block starts at
1136
            // dataNullOffset and assume incorrectly that it is filled with the null value.
1137
2.80k
            while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
1138
125
                    isStartOfSomeFastBlock(n, index, fastILimit)) {
1139
60
                n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
1140
60
            }
1141
2.74k
            if (n >= 0) {
1142
550
                DEBUG_DO(++countSame);
1143
550
                index[i] = n;
1144
2.19k
            } else {
1145
2.19k
                n = getAllSameOverlap(newData, newDataLength, value, blockLength);
1146
2.19k
                DEBUG_DO(sumOverlaps += n);
1147
#ifdef UCPTRIE_DEBUG
1148
                if (printData) {
1149
                    printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);
1150
                }
1151
#endif
1152
2.19k
                index[i] = newDataLength - n;
1153
2.19k
                int32_t prevDataLength = newDataLength;
1154
79.6k
                while (n < blockLength) {
1155
77.4k
                    newData[newDataLength++] = value;
1156
77.4k
                    ++n;
1157
77.4k
                }
1158
2.19k
                mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1159
2.19k
            }
1160
35.2M
        } else if (flags[i] == MIXED) {
1161
238k
            const uint32_t *block = data + index[i];
1162
238k
            int32_t n = mixedBlocks.findBlock(newData, block, 0);
1163
238k
            if (n >= 0) {
1164
116k
                DEBUG_DO(++countSame);
1165
116k
                index[i] = n;
1166
122k
            } else {
1167
122k
                n = getOverlap(newData, newDataLength, block, 0, blockLength);
1168
122k
                DEBUG_DO(sumOverlaps += n);
1169
#ifdef UCPTRIE_DEBUG
1170
                if (printData) {
1171
                    printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);
1172
                }
1173
#endif
1174
122k
                index[i] = newDataLength - n;
1175
122k
                int32_t prevDataLength = newDataLength;
1176
5.43M
                while (n < blockLength) {
1177
5.30M
                    newData[newDataLength++] = block[n++];
1178
5.30M
                }
1179
122k
                mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1180
122k
            }
1181
35.0M
        } else /* SAME_AS */ {
1182
35.0M
            uint32_t j = index[i];
1183
35.0M
            index[i] = index[j];
1184
35.0M
        }
1185
35.2M
    }
1186
1187
#ifdef UCPTRIE_DEBUG
1188
    /* we saved some space */
1189
    printf("compacting UCPTrie: count of 32-bit data words %lu->%lu  countSame=%ld  sumOverlaps=%ld\n",
1190
            (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps);
1191
#endif
1192
2.59k
    return newDataLength;
1193
2.59k
}
1194
1195
int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
1196
2.59k
                                           UErrorCode &errorCode) {
1197
2.59k
    int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
1198
2.59k
    if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
1199
        // Only the linear fast index, no multi-stage index tables.
1200
1.64k
        index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1201
1.64k
        return fastIndexLength;
1202
1.64k
    }
1203
1204
    // Condense the fast index table.
1205
    // Also, does it contain an index-3 block with all dataNullOffset?
1206
946
    uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH];  // fastIndexLength
1207
946
    int32_t i3FirstNull = -1;
1208
943k
    for (int32_t i = 0, j = 0; i < fastILimit; ++j) {
1209
942k
        uint32_t i3 = index[i];
1210
942k
        fastIndex[j] = static_cast<uint16_t>(i3);
1211
942k
        if (i3 == static_cast<uint32_t>(dataNullOffset)) {
1212
599k
            if (i3FirstNull < 0) {
1213
15.6k
                i3FirstNull = j;
1214
583k
            } else if (index3NullOffset < 0 &&
1215
48.9k
                    (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1216
793
                index3NullOffset = i3FirstNull;
1217
793
            }
1218
599k
        } else {
1219
343k
            i3FirstNull = -1;
1220
343k
        }
1221
        // Set the index entries that compactData() skipped.
1222
        // Needed when the multi-stage index covers the fast index range as well.
1223
942k
        int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1224
3.77M
        while (++i < iNext) {
1225
2.82M
            i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1226
2.82M
            index[i] = i3;
1227
2.82M
        }
1228
942k
    }
1229
1230
946
    if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1231
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1232
0
        return 0;
1233
0
    }
1234
946
    mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);
1235
1236
    // Examine index-3 blocks. For each determine one of:
1237
    // - same as the index-3 null block
1238
    // - same as a fast-index block
1239
    // - 16-bit indexes
1240
    // - 18-bit indexes
1241
    // We store this in the first flags entry for the index-3 block.
1242
    //
1243
    // Also determine an upper limit for the index-3 table length.
1244
946
    int32_t index3Capacity = 0;
1245
946
    i3FirstNull = index3NullOffset;
1246
946
    bool hasLongI3Blocks = false;
1247
    // If the fast index covers the whole BMP, then
1248
    // the multi-stage index is only for supplementary code points.
1249
    // Otherwise, the multi-stage index covers all of Unicode.
1250
946
    int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
1251
946
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1252
1.02M
    for (int32_t i = iStart; i < iLimit;) {
1253
1.01M
        int32_t j = i;
1254
1.01M
        int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1255
1.01M
        uint32_t oredI3 = 0;
1256
1.01M
        bool isNull = true;
1257
32.6M
        do {
1258
32.6M
            uint32_t i3 = index[j];
1259
32.6M
            oredI3 |= i3;
1260
32.6M
            if (i3 != static_cast<uint32_t>(dataNullOffset)) {
1261
5.10M
                isNull = false;
1262
5.10M
            }
1263
32.6M
        } while (++j < jLimit);
1264
1.01M
        if (isNull) {
1265
849k
            flags[i] = I3_NULL;
1266
849k
            if (i3FirstNull < 0) {
1267
153
                if (oredI3 <= 0xffff) {
1268
153
                    index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1269
153
                } else {
1270
0
                    index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1271
0
                    hasLongI3Blocks = true;
1272
0
                }
1273
153
                i3FirstNull = 0;
1274
153
            }
1275
849k
        } else {
1276
170k
            if (oredI3 <= 0xffff) {
1277
170k
                int32_t n = mixedBlocks.findBlock(fastIndex, index, i);
1278
170k
                if (n >= 0) {
1279
72.1k
                    flags[i] = I3_BMP;
1280
72.1k
                    index[i] = n;
1281
98.1k
                } else {
1282
98.1k
                    flags[i] = I3_16;
1283
98.1k
                    index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1284
98.1k
                }
1285
170k
            } else {
1286
0
                flags[i] = I3_18;
1287
0
                index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1288
0
                hasLongI3Blocks = true;
1289
0
            }
1290
170k
        }
1291
1.01M
        i = j;
1292
1.01M
    }
1293
1294
946
    int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3;
1295
1296
    // Length of the index-1 table, rounded up.
1297
946
    int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;
1298
1299
    // Index table: Fast index, index-1, index-3, index-2.
1300
    // +1 for possible index table padding.
1301
946
    int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
1302
946
    index16 = static_cast<uint16_t*>(uprv_malloc(index16Capacity * 2));
1303
946
    if (index16 == nullptr) {
1304
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1305
0
        return 0;
1306
0
    }
1307
946
    uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
1308
1309
946
    if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1310
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1311
0
        return 0;
1312
0
    }
1313
946
    MixedBlocks longI3Blocks;
1314
946
    if (hasLongI3Blocks) {
1315
0
        if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
1316
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
1317
0
            return 0;
1318
0
        }
1319
0
    }
1320
1321
    // Compact the index-3 table and write an uncompacted version of the index-2 table.
1322
946
    uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2];  // index2Capacity
1323
946
    int32_t i2Length = 0;
1324
946
    i3FirstNull = index3NullOffset;
1325
946
    int32_t index3Start = fastIndexLength + index1Length;
1326
946
    int32_t indexLength = index3Start;
1327
1.02M
    for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1328
1.01M
        int32_t i3;
1329
1.01M
        uint8_t f = flags[i];
1330
1.01M
        if (f == I3_NULL && i3FirstNull < 0) {
1331
            // First index-3 null block. Write & overlap it like a normal block, then remember it.
1332
153
            f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
1333
153
            i3FirstNull = 0;
1334
153
        }
1335
1.01M
        if (f == I3_NULL) {
1336
849k
            i3 = index3NullOffset;
1337
849k
        } else if (f == I3_BMP) {
1338
72.1k
            i3 = index[i];
1339
98.3k
        } else if (f == I3_16) {
1340
98.3k
            int32_t n = mixedBlocks.findBlock(index16, index, i);
1341
98.3k
            if (n >= 0) {
1342
73.2k
                i3 = n;
1343
73.2k
            } else {
1344
25.1k
                if (indexLength == index3Start) {
1345
                    // No overlap at the boundary between the index-1 and index-3 tables.
1346
937
                    n = 0;
1347
24.1k
                } else {
1348
24.1k
                    n = getOverlap(index16, indexLength,
1349
24.1k
                                   index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
1350
24.1k
                }
1351
25.1k
                i3 = indexLength - n;
1352
25.1k
                int32_t prevIndexLength = indexLength;
1353
714k
                while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1354
689k
                    index16[indexLength++] = index[i + n++];
1355
689k
                }
1356
25.1k
                mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1357
25.1k
                if (hasLongI3Blocks) {
1358
0
                    longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1359
0
                }
1360
25.1k
            }
1361
98.3k
        } else {
1362
0
            U_ASSERT(f == I3_18);
1363
0
            U_ASSERT(hasLongI3Blocks);
1364
            // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
1365
0
            int32_t j = i;
1366
0
            int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1367
0
            int32_t k = indexLength;
1368
0
            do {
1369
0
                ++k;
1370
0
                uint32_t v = index[j++];
1371
0
                uint32_t upperBits = (v & 0x30000) >> 2;
1372
0
                index16[k++] = v;
1373
0
                v = index[j++];
1374
0
                upperBits |= (v & 0x30000) >> 4;
1375
0
                index16[k++] = v;
1376
0
                v = index[j++];
1377
0
                upperBits |= (v & 0x30000) >> 6;
1378
0
                index16[k++] = v;
1379
0
                v = index[j++];
1380
0
                upperBits |= (v & 0x30000) >> 8;
1381
0
                index16[k++] = v;
1382
0
                v = index[j++];
1383
0
                upperBits |= (v & 0x30000) >> 10;
1384
0
                index16[k++] = v;
1385
0
                v = index[j++];
1386
0
                upperBits |= (v & 0x30000) >> 12;
1387
0
                index16[k++] = v;
1388
0
                v = index[j++];
1389
0
                upperBits |= (v & 0x30000) >> 14;
1390
0
                index16[k++] = v;
1391
0
                v = index[j++];
1392
0
                upperBits |= (v & 0x30000) >> 16;
1393
0
                index16[k++] = v;
1394
0
                index16[k - 9] = upperBits;
1395
0
            } while (j < jLimit);
1396
0
            int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);
1397
0
            if (n >= 0) {
1398
0
                i3 = n | 0x8000;
1399
0
            } else {
1400
0
                if (indexLength == index3Start) {
1401
                    // No overlap at the boundary between the index-1 and index-3 tables.
1402
0
                    n = 0;
1403
0
                } else {
1404
0
                    n = getOverlap(index16, indexLength,
1405
0
                                   index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
1406
0
                }
1407
0
                i3 = (indexLength - n) | 0x8000;
1408
0
                int32_t prevIndexLength = indexLength;
1409
0
                if (n > 0) {
1410
0
                    int32_t start = indexLength;
1411
0
                    while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
1412
0
                        index16[indexLength++] = index16[start + n++];
1413
0
                    }
1414
0
                } else {
1415
0
                    indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
1416
0
                }
1417
0
                mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1418
0
                if (hasLongI3Blocks) {
1419
0
                    longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1420
0
                }
1421
0
            }
1422
0
        }
1423
1.01M
        if (index3NullOffset < 0 && i3FirstNull >= 0) {
1424
153
            index3NullOffset = i3;
1425
153
        }
1426
        // Set the index-2 table entry.
1427
1.01M
        index2[i2Length++] = i3;
1428
1.01M
    }
1429
946
    U_ASSERT(i2Length == index2Capacity);
1430
946
    U_ASSERT(indexLength <= index3Start + index3Capacity);
1431
1432
946
    if (index3NullOffset < 0) {
1433
0
        index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1434
0
    }
1435
946
    if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1436
        // The index-3 offsets exceed 15 bits, or
1437
        // the last one cannot be distinguished from the no-null-block value.
1438
0
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1439
0
        return 0;
1440
0
    }
1441
1442
    // Compact the index-2 table and write the index-1 table.
1443
946
    static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,
1444
946
                  "must re-init mixedBlocks");
1445
946
    int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
1446
946
    int32_t i1 = fastIndexLength;
1447
33.1k
    for (int32_t i = 0; i < i2Length; i += blockLength) {
1448
32.2k
        int32_t n;
1449
32.2k
        if ((i2Length - i) >= blockLength) {
1450
            // normal block
1451
31.4k
            U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
1452
31.4k
            n = mixedBlocks.findBlock(index16, index2, i);
1453
31.4k
        } else {
1454
            // highStart is inside the last index-2 block. Shorten it.
1455
825
            blockLength = i2Length - i;
1456
825
            n = findSameBlock(index16, index3Start, indexLength,
1457
825
                              index2, i, blockLength);
1458
825
        }
1459
32.2k
        int32_t i2;
1460
32.2k
        if (n >= 0) {
1461
26.6k
            i2 = n;
1462
26.6k
        } else {
1463
5.56k
            if (indexLength == index3Start) {
1464
                // No overlap at the boundary between the index-1 and index-3/2 tables.
1465
9
                n = 0;
1466
5.55k
            } else {
1467
5.55k
                n = getOverlap(index16, indexLength, index2, i, blockLength);
1468
5.55k
            }
1469
5.56k
            i2 = indexLength - n;
1470
5.56k
            int32_t prevIndexLength = indexLength;
1471
138k
            while (n < blockLength) {
1472
132k
                index16[indexLength++] = index2[i + n++];
1473
132k
            }
1474
5.56k
            mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1475
5.56k
        }
1476
        // Set the index-1 table entry.
1477
32.2k
        index16[i1++] = i2;
1478
32.2k
    }
1479
946
    U_ASSERT(i1 == index3Start);
1480
946
    U_ASSERT(indexLength <= index16Capacity);
1481
1482
#ifdef UCPTRIE_DEBUG
1483
    /* we saved some space */
1484
    printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n",
1485
            (long)iLimit, (long)indexLength);
1486
#endif
1487
1488
946
    return indexLength;
1489
946
}
1490
1491
2.59k
int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) {
1492
    // Find the real highStart and round it up.
1493
2.59k
    U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
1494
2.59k
    highValue = get(MAX_UNICODE);
1495
2.59k
    int32_t realHighStart = findHighStart();
1496
2.59k
    realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &
1497
2.59k
        ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
1498
2.59k
    if (realHighStart == UNICODE_LIMIT) {
1499
76
        highValue = initialValue;
1500
76
    }
1501
1502
#ifdef UCPTRIE_DEBUG
1503
    printf("UCPTrie: highStart U+%06lx  highValue 0x%lx  initialValue 0x%lx\n",
1504
            (long)realHighStart, (long)highValue, (long)initialValue);
1505
#endif
1506
1507
    // We always store indexes and data values for the fast range.
1508
    // Pin highStart to the top of that range while building.
1509
2.59k
    UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3;
1510
2.59k
    if (realHighStart < fastLimit) {
1511
2.05M
        for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) {
1512
2.05M
            flags[i] = ALL_SAME;
1513
2.05M
            index[i] = highValue;
1514
2.05M
        }
1515
887
        highStart = fastLimit;
1516
1.70k
    } else {
1517
1.70k
        highStart = realHighStart;
1518
1.70k
    }
1519
1520
2.59k
    uint32_t asciiData[ASCII_LIMIT];
1521
334k
    for (int32_t i = 0; i < ASCII_LIMIT; ++i) {
1522
332k
        asciiData[i] = get(i);
1523
332k
    }
1524
1525
    // First we look for which data blocks have the same value repeated over the whole block,
1526
    // deduplicate such blocks, find a good null data block (for faster enumeration),
1527
    // and get an upper bound for the necessary data array length.
1528
2.59k
    AllSameBlocks allSameBlocks;
1529
2.59k
    int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
1530
2.59k
    if (newDataCapacity < 0) {
1531
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1532
0
        return 0;
1533
0
    }
1534
2.59k
    uint32_t* newData = static_cast<uint32_t*>(uprv_malloc(newDataCapacity * 4));
1535
2.59k
    if (newData == nullptr) {
1536
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1537
0
        return 0;
1538
0
    }
1539
2.59k
    uprv_memcpy(newData, asciiData, sizeof(asciiData));
1540
1541
2.59k
    int32_t dataNullIndex = allSameBlocks.findMostUsed();
1542
1543
2.59k
    MixedBlocks mixedBlocks;
1544
2.59k
    int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
1545
2.59k
                                        dataNullIndex, mixedBlocks, errorCode);
1546
2.59k
    if (U_FAILURE(errorCode)) { return 0; }
1547
2.59k
    U_ASSERT(newDataLength <= newDataCapacity);
1548
2.59k
    uprv_free(data);
1549
2.59k
    data = newData;
1550
2.59k
    dataCapacity = newDataCapacity;
1551
2.59k
    dataLength = newDataLength;
1552
2.59k
    if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) {
1553
        // The offset of the last data block is too high to be stored in the index table.
1554
0
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1555
0
        return 0;
1556
0
    }
1557
1558
2.59k
    if (dataNullIndex >= 0) {
1559
2.59k
        dataNullOffset = index[dataNullIndex];
1560
#ifdef UCPTRIE_DEBUG
1561
        if (data[dataNullOffset] != initialValue) {
1562
            printf("UCPTrie initialValue %lx -> more common nullValue %lx\n",
1563
                   (long)initialValue, (long)data[dataNullOffset]);
1564
        }
1565
#endif
1566
2.59k
        initialValue = data[dataNullOffset];
1567
2.59k
    } else {
1568
0
        dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
1569
0
    }
1570
1571
2.59k
    int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);
1572
2.59k
    highStart = realHighStart;
1573
2.59k
    return indexLength;
1574
2.59k
}
1575
1576
2.59k
UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
1577
2.59k
    if (U_FAILURE(errorCode)) {
1578
0
        return nullptr;
1579
0
    }
1580
2.59k
    if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
1581
2.59k
            valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) {
1582
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1583
0
        return nullptr;
1584
0
    }
1585
1586
    // The mutable trie always stores 32-bit values.
1587
    // When we build a UCPTrie for a smaller value width, we first mask off unused bits
1588
    // before compacting the data.
1589
2.59k
    switch (valueWidth) {
1590
2
    case UCPTRIE_VALUE_BITS_32:
1591
2
        break;
1592
64
    case UCPTRIE_VALUE_BITS_16:
1593
64
        maskValues(0xffff);
1594
64
        break;
1595
2.52k
    case UCPTRIE_VALUE_BITS_8:
1596
2.52k
        maskValues(0xff);
1597
2.52k
        break;
1598
0
    default:
1599
0
        break;
1600
2.59k
    }
1601
1602
2.59k
    UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT;
1603
2.59k
    int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode);
1604
2.59k
    if (U_FAILURE(errorCode)) {
1605
0
        clear();
1606
0
        return nullptr;
1607
0
    }
1608
1609
    // Ensure data table alignment: The index length must be even for uint32_t data.
1610
2.59k
    if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) {
1611
0
        index16[indexLength++] = 0xffee;  // arbitrary value
1612
0
    }
1613
1614
    // Make the total trie structure length a multiple of 4 bytes by padding the data table,
1615
    // and store special values as the last two data values.
1616
2.59k
    int32_t length = indexLength * 2;
1617
2.59k
    if (valueWidth == UCPTRIE_VALUE_BITS_16) {
1618
64
        if (((indexLength ^ dataLength) & 1) != 0) {
1619
            // padding
1620
38
            data[dataLength++] = errorValue;
1621
38
        }
1622
64
        if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1623
48
            data[dataLength++] = highValue;
1624
48
            data[dataLength++] = errorValue;
1625
48
        }
1626
64
        length += dataLength * 2;
1627
2.53k
    } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
1628
        // 32-bit data words never need padding to a multiple of 4 bytes.
1629
2
        if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1630
0
            if (data[dataLength - 1] != highValue) {
1631
0
                data[dataLength++] = highValue;
1632
0
            }
1633
0
            data[dataLength++] = errorValue;
1634
0
        }
1635
2
        length += dataLength * 4;
1636
2.52k
    } else {
1637
2.52k
        int32_t and3 = (length + dataLength) & 3;
1638
2.52k
        if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) {
1639
            // all set
1640
2.52k
        } else if(and3 == 3 && data[dataLength - 1] == highValue) {
1641
334
            data[dataLength++] = errorValue;
1642
2.18k
        } else {
1643
5.01k
            while (and3 != 2) {
1644
2.82k
                data[dataLength++] = highValue;
1645
2.82k
                and3 = (and3 + 1) & 3;
1646
2.82k
            }
1647
2.18k
            data[dataLength++] = highValue;
1648
2.18k
            data[dataLength++] = errorValue;
1649
2.18k
        }
1650
2.52k
        length += dataLength;
1651
2.52k
    }
1652
1653
    // Calculate the total length of the UCPTrie as a single memory block.
1654
2.59k
    length += sizeof(UCPTrie);
1655
2.59k
    U_ASSERT((length & 3) == 0);
1656
1657
2.59k
    uint8_t* bytes = static_cast<uint8_t*>(uprv_malloc(length));
1658
2.59k
    if (bytes == nullptr) {
1659
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1660
0
        clear();
1661
0
        return nullptr;
1662
0
    }
1663
2.59k
    UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes);
1664
2.59k
    uprv_memset(trie, 0, sizeof(UCPTrie));
1665
2.59k
    trie->indexLength = indexLength;
1666
2.59k
    trie->dataLength = dataLength;
1667
1668
2.59k
    trie->highStart = highStart;
1669
    // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes.
1670
    // Runtime code needs to then test for the real highStart as well.
1671
2.59k
    trie->shifted12HighStart = (highStart + 0xfff) >> 12;
1672
2.59k
    trie->type = type;
1673
2.59k
    trie->valueWidth = valueWidth;
1674
1675
2.59k
    trie->index3NullOffset = index3NullOffset;
1676
2.59k
    trie->dataNullOffset = dataNullOffset;
1677
2.59k
    trie->nullValue = initialValue;
1678
1679
2.59k
    bytes += sizeof(UCPTrie);
1680
1681
    // Fill the index and data arrays.
1682
2.59k
    uint16_t* dest16 = reinterpret_cast<uint16_t*>(bytes);
1683
2.59k
    trie->index = dest16;
1684
1685
2.59k
    if (highStart <= fastLimit) {
1686
        // Condense only the fast index from the mutable-trie index.
1687
1.68M
        for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
1688
1.68M
            *dest16++ = static_cast<uint16_t>(index[i]); // dest16[j]
1689
1.68M
        }
1690
1.64k
    } else {
1691
946
        uprv_memcpy(dest16, index16, indexLength * 2);
1692
946
        dest16 += indexLength;
1693
946
    }
1694
2.59k
    bytes += indexLength * 2;
1695
1696
    // Write the data array.
1697
2.59k
    const uint32_t *p = data;
1698
2.59k
    switch (valueWidth) {
1699
64
    case UCPTRIE_VALUE_BITS_16:
1700
        // Write 16-bit data values.
1701
64
        trie->data.ptr16 = dest16;
1702
787k
        for (int32_t i = dataLength; i > 0; --i) {
1703
787k
            *dest16++ = static_cast<uint16_t>(*p++);
1704
787k
        }
1705
64
        break;
1706
2
    case UCPTRIE_VALUE_BITS_32:
1707
        // Write 32-bit data values.
1708
2
        trie->data.ptr32 = reinterpret_cast<uint32_t*>(bytes);
1709
2
        uprv_memcpy(bytes, p, (size_t)dataLength * 4);
1710
2
        break;
1711
2.52k
    case UCPTRIE_VALUE_BITS_8:
1712
        // Write 8-bit data values.
1713
2.52k
        trie->data.ptr8 = bytes;
1714
4.91M
        for (int32_t i = dataLength; i > 0; --i) {
1715
4.91M
            *bytes++ = static_cast<uint8_t>(*p++);
1716
4.91M
        }
1717
2.52k
        break;
1718
0
    default:
1719
        // Will not occur, valueWidth checked at the beginning.
1720
0
        break;
1721
2.59k
    }
1722
1723
#ifdef UCPTRIE_DEBUG
1724
    trie->name = name;
1725
1726
    ucptrie_printLengths(trie, "");
1727
#endif
1728
1729
2.59k
    clear();
1730
2.59k
    return trie;
1731
2.59k
}
1732
1733
}  // namespace
1734
1735
U_NAMESPACE_END
1736
1737
U_NAMESPACE_USE
1738
1739
U_CAPI UMutableCPTrie * U_EXPORT2
1740
2.63k
umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
1741
2.63k
    if (U_FAILURE(*pErrorCode)) {
1742
43
        return nullptr;
1743
43
    }
1744
2.59k
    LocalPointer<MutableCodePointTrie> trie(
1745
2.59k
        new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode);
1746
2.59k
    if (U_FAILURE(*pErrorCode)) {
1747
0
        return nullptr;
1748
0
    }
1749
2.59k
    return reinterpret_cast<UMutableCPTrie *>(trie.orphan());
1750
2.59k
}
1751
1752
U_CAPI UMutableCPTrie * U_EXPORT2
1753
0
umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) {
1754
0
    if (U_FAILURE(*pErrorCode)) {
1755
0
        return nullptr;
1756
0
    }
1757
0
    if (other == nullptr) {
1758
0
        return nullptr;
1759
0
    }
1760
0
    LocalPointer<MutableCodePointTrie> clone(
1761
0
        new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode);
1762
0
    if (U_FAILURE(*pErrorCode)) {
1763
0
        return nullptr;
1764
0
    }
1765
0
    return reinterpret_cast<UMutableCPTrie *>(clone.orphan());
1766
0
}
1767
1768
U_CAPI void U_EXPORT2
1769
7.63k
umutablecptrie_close(UMutableCPTrie *trie) {
1770
7.63k
    delete reinterpret_cast<MutableCodePointTrie *>(trie);
1771
7.63k
}
1772
1773
U_CAPI UMutableCPTrie * U_EXPORT2
1774
0
umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) {
1775
0
    if (U_FAILURE(*pErrorCode)) {
1776
0
        return nullptr;
1777
0
    }
1778
0
    if (map == nullptr) {
1779
0
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1780
0
        return nullptr;
1781
0
    }
1782
0
    return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));
1783
0
}
1784
1785
U_CAPI UMutableCPTrie * U_EXPORT2
1786
0
umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
1787
0
    if (U_FAILURE(*pErrorCode)) {
1788
0
        return nullptr;
1789
0
    }
1790
0
    if (trie == nullptr) {
1791
0
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1792
0
        return nullptr;
1793
0
    }
1794
0
    return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode));
1795
0
}
1796
1797
U_CAPI uint32_t U_EXPORT2
1798
7.34k
umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) {
1799
7.34k
    return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c);
1800
7.34k
}
1801
1802
namespace {
1803
1804
UChar32 getRange(const void *trie, UChar32 start,
1805
0
                 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1806
0
    return reinterpret_cast<const MutableCodePointTrie *>(trie)->
1807
0
        getRange(start, filter, context, pValue);
1808
0
}
1809
1810
}  // namespace
1811
1812
U_CAPI UChar32 U_EXPORT2
1813
umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
1814
                        UCPMapRangeOption option, uint32_t surrogateValue,
1815
0
                        UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1816
0
    return ucptrie_internalGetRange(getRange, trie, start,
1817
0
                                    option, surrogateValue,
1818
0
                                    filter, context, pValue);
1819
0
}
1820
1821
U_CAPI void U_EXPORT2
1822
4.83k
umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
1823
4.83k
    if (U_FAILURE(*pErrorCode)) {
1824
0
        return;
1825
0
    }
1826
4.83k
    reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode);
1827
4.83k
}
1828
1829
U_CAPI void U_EXPORT2
1830
umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end,
1831
829k
                   uint32_t value, UErrorCode *pErrorCode) {
1832
829k
    if (U_FAILURE(*pErrorCode)) {
1833
0
        return;
1834
0
    }
1835
829k
    reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode);
1836
829k
}
1837
1838
/* Compact and internally serialize the trie. */
1839
U_CAPI UCPTrie * U_EXPORT2
1840
umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
1841
2.59k
                              UErrorCode *pErrorCode) {
1842
2.59k
    if (U_FAILURE(*pErrorCode)) {
1843
0
        return nullptr;
1844
0
    }
1845
2.59k
    return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode);
1846
2.59k
}
1847
1848
#ifdef UCPTRIE_DEBUG
1849
U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) {
1850
    reinterpret_cast<MutableCodePointTrie *>(trie)->name = name;
1851
}
1852
#endif