Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/umutablecptrie.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2017 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// umutablecptrie.cpp (inspired by utrie2_builder.cpp)
5
// created: 2017dec29 Markus W. Scherer
6
7
// #define UCPTRIE_DEBUG
8
#ifdef UCPTRIE_DEBUG
9
#   include <stdio.h>
10
#endif
11
12
#include "unicode/utypes.h"
13
#include "unicode/ucptrie.h"
14
#include "unicode/umutablecptrie.h"
15
#include "unicode/uobject.h"
16
#include "unicode/utf16.h"
17
#include "cmemory.h"
18
#include "uassert.h"
19
#include "ucptrie_impl.h"
20
21
// ICU-20235 In case Microsoft math.h has defined this, undefine it.
22
#ifdef OVERFLOW
23
#undef OVERFLOW
24
#endif
25
26
U_NAMESPACE_BEGIN
27
28
namespace {
29
30
constexpr int32_t MAX_UNICODE = 0x10ffff;
31
32
constexpr int32_t UNICODE_LIMIT = 0x110000;
33
constexpr int32_t BMP_LIMIT = 0x10000;
34
constexpr int32_t ASCII_LIMIT = 0x80;
35
36
constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3;
37
constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3;
38
constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3;
39
40
constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3));
41
42
// Flag values for data blocks.
43
constexpr uint8_t ALL_SAME = 0;
44
constexpr uint8_t MIXED = 1;
45
constexpr uint8_t SAME_AS = 2;
46
47
/** Start with allocation of 16k data entries. */
48
constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14);
49
50
/** Grow about 8x each time. */
51
constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17);
52
53
/**
54
 * Maximum length of the build-time data array.
55
 * One entry per 0x110000 code points.
56
 */
57
constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT;
58
59
// Flag values for index-3 blocks while compacting/building.
60
constexpr uint8_t I3_NULL = 0;
61
constexpr uint8_t I3_BMP = 1;
62
constexpr uint8_t I3_16 = 2;
63
constexpr uint8_t I3_18 = 3;
64
65
constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
66
67
class AllSameBlocks;
68
class MixedBlocks;
69
70
class MutableCodePointTrie : public UMemory {
71
public:
72
    MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode);
73
    MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode);
74
    MutableCodePointTrie(const MutableCodePointTrie &other) = delete;
75
    ~MutableCodePointTrie();
76
77
    MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
78
79
    static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);
80
    static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
81
82
    uint32_t get(UChar32 c) const;
83
    int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,
84
                     uint32_t *pValue) const;
85
86
    void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
87
    void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode);
88
89
    UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode);
90
91
private:
92
    void clear();
93
94
    bool ensureHighStart(UChar32 c);
95
    int32_t allocDataBlock(int32_t blockLength);
96
    int32_t getDataBlock(int32_t i);
97
98
    void maskValues(uint32_t mask);
99
    UChar32 findHighStart() const;
100
    int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
101
    int32_t compactData(
102
            int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
103
            int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
104
    int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);
105
    int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
106
107
    uint32_t *index = nullptr;
108
    int32_t indexCapacity = 0;
109
    int32_t index3NullOffset = -1;
110
    uint32_t *data = nullptr;
111
    int32_t dataCapacity = 0;
112
    int32_t dataLength = 0;
113
    int32_t dataNullOffset = -1;
114
115
    uint32_t origInitialValue;
116
    uint32_t initialValue;
117
    uint32_t errorValue;
118
    UChar32 highStart;
119
    uint32_t highValue;
120
#ifdef UCPTRIE_DEBUG
121
public:
122
    const char *name;
123
#endif
124
private:
125
    /** Temporary array while building the final data. */
126
    uint16_t *index16 = nullptr;
127
    uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3];
128
};
129
130
MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) :
131
0
        origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue),
132
0
        highStart(0), highValue(initialValue)
133
#ifdef UCPTRIE_DEBUG
134
        , name("open")
135
#endif
136
0
        {
137
0
    if (U_FAILURE(errorCode)) { return; }
138
0
    index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4);
139
0
    data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4);
140
0
    if (index == nullptr || data == nullptr) {
141
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
142
0
        return;
143
0
    }
144
0
    indexCapacity = BMP_I_LIMIT;
145
0
    dataCapacity = INITIAL_DATA_LENGTH;
146
0
}
147
148
MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) :
149
0
        index3NullOffset(other.index3NullOffset),
150
0
        dataNullOffset(other.dataNullOffset),
151
0
        origInitialValue(other.origInitialValue), initialValue(other.initialValue),
152
0
        errorValue(other.errorValue),
153
0
        highStart(other.highStart), highValue(other.highValue)
154
#ifdef UCPTRIE_DEBUG
155
        , name("mutable clone")
156
#endif
157
0
        {
158
0
    if (U_FAILURE(errorCode)) { return; }
159
0
    int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
160
0
    index = (uint32_t *)uprv_malloc(iCapacity * 4);
161
0
    data = (uint32_t *)uprv_malloc(other.dataCapacity * 4);
162
0
    if (index == nullptr || data == nullptr) {
163
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
164
0
        return;
165
0
    }
166
0
    indexCapacity = iCapacity;
167
0
    dataCapacity = other.dataCapacity;
168
169
0
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
170
0
    uprv_memcpy(flags, other.flags, iLimit);
171
0
    uprv_memcpy(index, other.index, iLimit * 4);
172
0
    uprv_memcpy(data, other.data, (size_t)other.dataLength * 4);
173
0
    dataLength = other.dataLength;
174
0
    U_ASSERT(other.index16 == nullptr);
175
0
}
176
177
0
MutableCodePointTrie::~MutableCodePointTrie() {
178
0
    uprv_free(index);
179
0
    uprv_free(data);
180
0
    uprv_free(index16);
181
0
}
182
183
0
MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) {
184
    // Use the highValue as the initialValue to reduce the highStart.
185
0
    uint32_t errorValue = ucpmap_get(map, -1);
186
0
    uint32_t initialValue = ucpmap_get(map, 0x10ffff);
187
0
    LocalPointer<MutableCodePointTrie> mutableTrie(
188
0
        new MutableCodePointTrie(initialValue, errorValue, errorCode),
189
0
        errorCode);
190
0
    if (U_FAILURE(errorCode)) {
191
0
        return nullptr;
192
0
    }
193
0
    UChar32 start = 0, end;
194
0
    uint32_t value;
195
0
    while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
196
0
                                  nullptr, nullptr, &value)) >= 0) {
197
0
        if (value != initialValue) {
198
0
            if (start == end) {
199
0
                mutableTrie->set(start, value, errorCode);
200
0
            } else {
201
0
                mutableTrie->setRange(start, end, value, errorCode);
202
0
            }
203
0
        }
204
0
        start = end + 1;
205
0
    }
206
0
    if (U_SUCCESS(errorCode)) {
207
0
        return mutableTrie.orphan();
208
0
    } else {
209
0
        return nullptr;
210
0
    }
211
0
}
212
213
0
MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
214
    // Use the highValue as the initialValue to reduce the highStart.
215
0
    uint32_t errorValue;
216
0
    uint32_t initialValue;
217
0
    switch (trie->valueWidth) {
218
0
    case UCPTRIE_VALUE_BITS_16:
219
0
        errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
220
0
        initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
221
0
        break;
222
0
    case UCPTRIE_VALUE_BITS_32:
223
0
        errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
224
0
        initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
225
0
        break;
226
0
    case UCPTRIE_VALUE_BITS_8:
227
0
        errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
228
0
        initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
229
0
        break;
230
0
    default:
231
        // Unreachable if the trie is properly initialized.
232
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
233
0
        return nullptr;
234
0
    }
235
0
    LocalPointer<MutableCodePointTrie> mutableTrie(
236
0
        new MutableCodePointTrie(initialValue, errorValue, errorCode),
237
0
        errorCode);
238
0
    if (U_FAILURE(errorCode)) {
239
0
        return nullptr;
240
0
    }
241
0
    UChar32 start = 0, end;
242
0
    uint32_t value;
243
0
    while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
244
0
                                   nullptr, nullptr, &value)) >= 0) {
245
0
        if (value != initialValue) {
246
0
            if (start == end) {
247
0
                mutableTrie->set(start, value, errorCode);
248
0
            } else {
249
0
                mutableTrie->setRange(start, end, value, errorCode);
250
0
            }
251
0
        }
252
0
        start = end + 1;
253
0
    }
254
0
    if (U_SUCCESS(errorCode)) {
255
0
        return mutableTrie.orphan();
256
0
    } else {
257
0
        return nullptr;
258
0
    }
259
0
}
260
261
0
void MutableCodePointTrie::clear() {
262
0
    index3NullOffset = dataNullOffset = -1;
263
0
    dataLength = 0;
264
0
    highValue = initialValue = origInitialValue;
265
0
    highStart = 0;
266
0
    uprv_free(index16);
267
0
    index16 = nullptr;
268
0
}
269
270
0
uint32_t MutableCodePointTrie::get(UChar32 c) const {
271
0
    if ((uint32_t)c > MAX_UNICODE) {
272
0
        return errorValue;
273
0
    }
274
0
    if (c >= highStart) {
275
0
        return highValue;
276
0
    }
277
0
    int32_t i = c >> UCPTRIE_SHIFT_3;
278
0
    if (flags[i] == ALL_SAME) {
279
0
        return index[i];
280
0
    } else {
281
0
        return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)];
282
0
    }
283
0
}
284
285
inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
286
0
                                 UCPMapValueFilter *filter, const void *context) {
287
0
    if (value == initialValue) {
288
0
        value = nullValue;
289
0
    } else if (filter != nullptr) {
290
0
        value = filter(context, value);
291
0
    }
292
0
    return value;
293
0
}
294
295
UChar32 MutableCodePointTrie::getRange(
296
        UChar32 start, UCPMapValueFilter *filter, const void *context,
297
0
        uint32_t *pValue) const {
298
0
    if ((uint32_t)start > MAX_UNICODE) {
299
0
        return U_SENTINEL;
300
0
    }
301
0
    if (start >= highStart) {
302
0
        if (pValue != nullptr) {
303
0
            uint32_t value = highValue;
304
0
            if (filter != nullptr) { value = filter(context, value); }
305
0
            *pValue = value;
306
0
        }
307
0
        return MAX_UNICODE;
308
0
    }
309
0
    uint32_t nullValue = initialValue;
310
0
    if (filter != nullptr) { nullValue = filter(context, nullValue); }
311
0
    UChar32 c = start;
312
0
    uint32_t trieValue, value;
313
0
    bool haveValue = false;
314
0
    int32_t i = c >> UCPTRIE_SHIFT_3;
315
0
    do {
316
0
        if (flags[i] == ALL_SAME) {
317
0
            uint32_t trieValue2 = index[i];
318
0
            if (haveValue) {
319
0
                if (trieValue2 != trieValue) {
320
0
                    if (filter == nullptr ||
321
0
                            maybeFilterValue(trieValue2, initialValue, nullValue,
322
0
                                             filter, context) != value) {
323
0
                        return c - 1;
324
0
                    }
325
0
                    trieValue = trieValue2;  // may or may not help
326
0
                }
327
0
            } else {
328
0
                trieValue = trieValue2;
329
0
                value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
330
0
                if (pValue != nullptr) { *pValue = value; }
331
0
                haveValue = true;
332
0
            }
333
0
            c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
334
0
        } else /* MIXED */ {
335
0
            int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
336
0
            uint32_t trieValue2 = data[di];
337
0
            if (haveValue) {
338
0
                if (trieValue2 != trieValue) {
339
0
                    if (filter == nullptr ||
340
0
                            maybeFilterValue(trieValue2, initialValue, nullValue,
341
0
                                             filter, context) != value) {
342
0
                        return c - 1;
343
0
                    }
344
0
                    trieValue = trieValue2;  // may or may not help
345
0
                }
346
0
            } else {
347
0
                trieValue = trieValue2;
348
0
                value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);
349
0
                if (pValue != nullptr) { *pValue = value; }
350
0
                haveValue = true;
351
0
            }
352
0
            while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
353
0
                trieValue2 = data[++di];
354
0
                if (trieValue2 != trieValue) {
355
0
                    if (filter == nullptr ||
356
0
                            maybeFilterValue(trieValue2, initialValue, nullValue,
357
0
                                             filter, context) != value) {
358
0
                        return c - 1;
359
0
                    }
360
0
                }
361
0
                trieValue = trieValue2;  // may or may not help
362
0
            }
363
0
        }
364
0
        ++i;
365
0
    } while (c < highStart);
366
0
    U_ASSERT(haveValue);
367
0
    if (maybeFilterValue(highValue, initialValue, nullValue,
368
0
                         filter, context) != value) {
369
0
        return c - 1;
370
0
    } else {
371
0
        return MAX_UNICODE;
372
0
    }
373
0
}
374
375
void
376
0
writeBlock(uint32_t *block, uint32_t value) {
377
0
    uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
378
0
    while (block < limit) {
379
0
        *block++ = value;
380
0
    }
381
0
}
382
383
0
bool MutableCodePointTrie::ensureHighStart(UChar32 c) {
384
0
    if (c >= highStart) {
385
        // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
386
0
        c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
387
0
        int32_t i = highStart >> UCPTRIE_SHIFT_3;
388
0
        int32_t iLimit = c >> UCPTRIE_SHIFT_3;
389
0
        if (iLimit > indexCapacity) {
390
0
            uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4);
391
0
            if (newIndex == nullptr) { return false; }
392
0
            uprv_memcpy(newIndex, index, i * 4);
393
0
            uprv_free(index);
394
0
            index = newIndex;
395
0
            indexCapacity = I_LIMIT;
396
0
        }
397
0
        do {
398
0
            flags[i] = ALL_SAME;
399
0
            index[i] = initialValue;
400
0
        } while(++i < iLimit);
401
0
        highStart = c;
402
0
    }
403
0
    return true;
404
0
}
405
406
0
int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) {
407
0
    int32_t newBlock = dataLength;
408
0
    int32_t newTop = newBlock + blockLength;
409
0
    if (newTop > dataCapacity) {
410
0
        int32_t capacity;
411
0
        if (dataCapacity < MEDIUM_DATA_LENGTH) {
412
0
            capacity = MEDIUM_DATA_LENGTH;
413
0
        } else if (dataCapacity < MAX_DATA_LENGTH) {
414
0
            capacity = MAX_DATA_LENGTH;
415
0
        } else {
416
            // Should never occur.
417
            // Either MAX_DATA_LENGTH is incorrect,
418
            // or the code writes more values than should be possible.
419
0
            return -1;
420
0
        }
421
0
        uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4);
422
0
        if (newData == nullptr) {
423
0
            return -1;
424
0
        }
425
0
        uprv_memcpy(newData, data, (size_t)dataLength * 4);
426
0
        uprv_free(data);
427
0
        data = newData;
428
0
        dataCapacity = capacity;
429
0
    }
430
0
    dataLength = newTop;
431
0
    return newBlock;
432
0
}
433
434
/**
435
 * No error checking for illegal arguments.
436
 *
437
 * @return -1 if no new data block available (out of memory in data array)
438
 * @internal
439
 */
440
0
int32_t MutableCodePointTrie::getDataBlock(int32_t i) {
441
0
    if (flags[i] == MIXED) {
442
0
        return index[i];
443
0
    }
444
0
    if (i < BMP_I_LIMIT) {
445
0
        int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH);
446
0
        if (newBlock < 0) { return newBlock; }
447
0
        int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);
448
0
        int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
449
0
        do {
450
0
            U_ASSERT(flags[iStart] == ALL_SAME);
451
0
            writeBlock(data + newBlock, index[iStart]);
452
0
            flags[iStart] = MIXED;
453
0
            index[iStart++] = newBlock;
454
0
            newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
455
0
        } while (iStart < iLimit);
456
0
        return index[i];
457
0
    } else {
458
0
        int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH);
459
0
        if (newBlock < 0) { return newBlock; }
460
0
        writeBlock(data + newBlock, index[i]);
461
0
        flags[i] = MIXED;
462
0
        index[i] = newBlock;
463
0
        return newBlock;
464
0
    }
465
0
}
466
467
0
void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) {
468
0
    if (U_FAILURE(errorCode)) {
469
0
        return;
470
0
    }
471
0
    if ((uint32_t)c > MAX_UNICODE) {
472
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
473
0
        return;
474
0
    }
475
476
0
    int32_t block;
477
0
    if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) {
478
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
479
0
        return;
480
0
    }
481
482
0
    data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value;
483
0
}
484
485
void
486
0
fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) {
487
0
    uint32_t *pLimit = block + limit;
488
0
    block += start;
489
0
    while (block < pLimit) {
490
0
        *block++ = value;
491
0
    }
492
0
}
493
494
0
void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) {
495
0
    if (U_FAILURE(errorCode)) {
496
0
        return;
497
0
    }
498
0
    if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) {
499
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
500
0
        return;
501
0
    }
502
0
    if (!ensureHighStart(end)) {
503
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
504
0
        return;
505
0
    }
506
507
0
    UChar32 limit = end + 1;
508
0
    if (start & UCPTRIE_SMALL_DATA_MASK) {
509
        // Set partial block at [start..following block boundary[.
510
0
        int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
511
0
        if (block < 0) {
512
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
513
0
            return;
514
0
        }
515
516
0
        UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK;
517
0
        if (nextStart <= limit) {
518
0
            fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH,
519
0
                      value);
520
0
            start = nextStart;
521
0
        } else {
522
0
            fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK,
523
0
                      value);
524
0
            return;
525
0
        }
526
0
    }
527
528
    // Number of positions in the last, partial block.
529
0
    int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK;
530
531
    // Round down limit to a block boundary.
532
0
    limit &= ~UCPTRIE_SMALL_DATA_MASK;
533
534
    // Iterate over all-value blocks.
535
0
    while (start < limit) {
536
0
        int32_t i = start >> UCPTRIE_SHIFT_3;
537
0
        if (flags[i] == ALL_SAME) {
538
0
            index[i] = value;
539
0
        } else /* MIXED */ {
540
0
            fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value);
541
0
        }
542
0
        start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
543
0
    }
544
545
0
    if (rest > 0) {
546
        // Set partial block at [last block boundary..limit[.
547
0
        int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
548
0
        if (block < 0) {
549
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
550
0
            return;
551
0
        }
552
553
0
        fillBlock(data + block, 0, rest, value);
554
0
    }
555
0
}
556
557
/* compaction --------------------------------------------------------------- */
558
559
0
void MutableCodePointTrie::maskValues(uint32_t mask) {
560
0
    initialValue &= mask;
561
0
    errorValue &= mask;
562
0
    highValue &= mask;
563
0
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
564
0
    for (int32_t i = 0; i < iLimit; ++i) {
565
0
        if (flags[i] == ALL_SAME) {
566
0
            index[i] &= mask;
567
0
        }
568
0
    }
569
0
    for (int32_t i = 0; i < dataLength; ++i) {
570
0
        data[i] &= mask;
571
0
    }
572
0
}
573
574
template<typename UIntA, typename UIntB>
575
0
bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) {
576
0
    while (length > 0 && *s == *t) {
577
0
        ++s;
578
0
        ++t;
579
0
        --length;
580
0
    }
581
0
    return length == 0;
582
0
}
Unexecuted instantiation: umutablecptrie.cpp:bool icu_70::(anonymous namespace)::equalBlocks<unsigned int, unsigned int>(unsigned int const*, unsigned int const*, int)
Unexecuted instantiation: umutablecptrie.cpp:bool icu_70::(anonymous namespace)::equalBlocks<unsigned short, unsigned short>(unsigned short const*, unsigned short const*, int)
Unexecuted instantiation: umutablecptrie.cpp:bool icu_70::(anonymous namespace)::equalBlocks<unsigned short, unsigned int>(unsigned short const*, unsigned int const*, int)
583
584
0
bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) {
585
0
    const uint32_t *pLimit = p + length;
586
0
    while (p < pLimit && *p == value) { ++p; }
587
0
    return p == pLimit;
588
0
}
589
590
/** Search for an identical block. */
591
int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
592
0
                      const uint16_t *q, int32_t qStart, int32_t blockLength) {
593
    // Ensure that we do not even partially get past length.
594
0
    length -= blockLength;
595
596
0
    q += qStart;
597
0
    while (pStart <= length) {
598
0
        if (equalBlocks(p + pStart, q, blockLength)) {
599
0
            return pStart;
600
0
        }
601
0
        ++pStart;
602
0
    }
603
0
    return -1;
604
0
}
605
606
int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
607
0
                         uint32_t value, int32_t blockLength) {
608
    // Ensure that we do not even partially get past limit.
609
0
    limit -= blockLength;
610
611
0
    for (int32_t block = start; block <= limit; ++block) {
612
0
        if (p[block] == value) {
613
0
            for (int32_t i = 1;; ++i) {
614
0
                if (i == blockLength) {
615
0
                    return block;
616
0
                }
617
0
                if (p[block + i] != value) {
618
0
                    block += i;
619
0
                    break;
620
0
                }
621
0
            }
622
0
        }
623
0
    }
624
0
    return -1;
625
0
}
626
627
/**
628
 * Look for maximum overlap of the beginning of the other block
629
 * with the previous, adjacent block.
630
 */
631
template<typename UIntA, typename UIntB>
632
int32_t getOverlap(const UIntA *p, int32_t length,
633
0
                   const UIntB *q, int32_t qStart, int32_t blockLength) {
634
0
    int32_t overlap = blockLength - 1;
635
0
    U_ASSERT(overlap <= length);
636
0
    q += qStart;
637
0
    while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
638
0
        --overlap;
639
0
    }
640
0
    return overlap;
641
0
}
Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::getOverlap<unsigned int, unsigned int>(unsigned int const*, int, unsigned int const*, int, int)
Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::getOverlap<unsigned short, unsigned int>(unsigned short const*, int, unsigned int const*, int, int)
Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::getOverlap<unsigned short, unsigned short>(unsigned short const*, int, unsigned short const*, int, int)
642
643
int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,
644
0
                          int32_t blockLength) {
645
0
    int32_t min = length - (blockLength - 1);
646
0
    int32_t i = length;
647
0
    while (min < i && p[i - 1] == value) { --i; }
648
0
    return length - i;
649
0
}
650
651
0
bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) {
652
0
    for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
653
0
        if (index[i] == dataOffset) {
654
0
            return true;
655
0
        }
656
0
    }
657
0
    return false;
658
0
}
659
660
/**
661
 * Finds the start of the last range in the trie by enumerating backward.
662
 * Indexes for code points higher than this will be omitted.
663
 */
664
0
UChar32 MutableCodePointTrie::findHighStart() const {
665
0
    int32_t i = highStart >> UCPTRIE_SHIFT_3;
666
0
    while (i > 0) {
667
0
        bool match;
668
0
        if (flags[--i] == ALL_SAME) {
669
0
            match = index[i] == highValue;
670
0
        } else /* MIXED */ {
671
0
            const uint32_t *p = data + index[i];
672
0
            for (int32_t j = 0;; ++j) {
673
0
                if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) {
674
0
                    match = true;
675
0
                    break;
676
0
                }
677
0
                if (p[j] != highValue) {
678
0
                    match = false;
679
0
                    break;
680
0
                }
681
0
            }
682
0
        }
683
0
        if (!match) {
684
0
            return (i + 1) << UCPTRIE_SHIFT_3;
685
0
        }
686
0
    }
687
0
    return 0;
688
0
}
689
690
class AllSameBlocks {
691
public:
692
    static constexpr int32_t NEW_UNIQUE = -1;
693
    static constexpr int32_t OVERFLOW = -2;
694
695
0
    AllSameBlocks() : length(0), mostRecent(-1) {}
696
697
0
    int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) {
698
0
        if (mostRecent >= 0 && values[mostRecent] == value) {
699
0
            refCounts[mostRecent] += count;
700
0
            return indexes[mostRecent];
701
0
        }
702
0
        for (int32_t i = 0; i < length; ++i) {
703
0
            if (values[i] == value) {
704
0
                mostRecent = i;
705
0
                refCounts[i] += count;
706
0
                return indexes[i];
707
0
            }
708
0
        }
709
0
        if (length == CAPACITY) {
710
0
            return OVERFLOW;
711
0
        }
712
0
        mostRecent = length;
713
0
        indexes[length] = index;
714
0
        values[length] = value;
715
0
        refCounts[length++] = count;
716
0
        return NEW_UNIQUE;
717
0
    }
718
719
    /** Replaces the block which has the lowest reference count. */
720
0
    void add(int32_t index, int32_t count, uint32_t value) {
721
0
        U_ASSERT(length == CAPACITY);
722
0
        int32_t least = -1;
723
0
        int32_t leastCount = I_LIMIT;
724
0
        for (int32_t i = 0; i < length; ++i) {
725
0
            U_ASSERT(values[i] != value);
726
0
            if (refCounts[i] < leastCount) {
727
0
                least = i;
728
0
                leastCount = refCounts[i];
729
0
            }
730
0
        }
731
0
        U_ASSERT(least >= 0);
732
0
        mostRecent = least;
733
0
        indexes[least] = index;
734
0
        values[least] = value;
735
0
        refCounts[least] = count;
736
0
    }
737
738
0
    int32_t findMostUsed() const {
739
0
        if (length == 0) { return -1; }
740
0
        int32_t max = -1;
741
0
        int32_t maxCount = 0;
742
0
        for (int32_t i = 0; i < length; ++i) {
743
0
            if (refCounts[i] > maxCount) {
744
0
                max = i;
745
0
                maxCount = refCounts[i];
746
0
            }
747
0
        }
748
0
        return indexes[max];
749
0
    }
750
751
private:
752
    static constexpr int32_t CAPACITY = 32;
753
754
    int32_t length;
755
    int32_t mostRecent;
756
757
    int32_t indexes[CAPACITY];
758
    uint32_t values[CAPACITY];
759
    int32_t refCounts[CAPACITY];
760
};
761
762
// Custom hash table for mixed-value blocks to be found anywhere in the
763
// compacted data or index so far.
764
class MixedBlocks {
765
public:
766
0
    MixedBlocks() {}
767
0
    ~MixedBlocks() {
768
0
        uprv_free(table);
769
0
    }
770
771
0
    bool init(int32_t maxLength, int32_t newBlockLength) {
772
        // We store actual data indexes + 1 to reserve 0 for empty entries.
773
0
        int32_t maxDataIndex = maxLength - newBlockLength + 1;
774
0
        int32_t newLength;
775
0
        if (maxDataIndex <= 0xfff) {  // 4k
776
0
            newLength = 6007;
777
0
            shift = 12;
778
0
            mask = 0xfff;
779
0
        } else if (maxDataIndex <= 0x7fff) {  // 32k
780
0
            newLength = 50021;
781
0
            shift = 15;
782
0
            mask = 0x7fff;
783
0
        } else if (maxDataIndex <= 0x1ffff) {  // 128k
784
0
            newLength = 200003;
785
0
            shift = 17;
786
0
            mask = 0x1ffff;
787
0
        } else {
788
            // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M
789
0
            newLength = 1500007;
790
0
            shift = 21;
791
0
            mask = 0x1fffff;
792
0
        }
793
0
        if (newLength > capacity) {
794
0
            uprv_free(table);
795
0
            table = (uint32_t *)uprv_malloc(newLength * 4);
796
0
            if (table == nullptr) {
797
0
                return false;
798
0
            }
799
0
            capacity = newLength;
800
0
        }
801
0
        length = newLength;
802
0
        uprv_memset(table, 0, length * 4);
803
804
0
        blockLength = newBlockLength;
805
0
        return true;
806
0
    }
807
808
    template<typename UInt>
809
0
    void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) {
810
0
        int32_t start = prevDataLength - blockLength;
811
0
        if (start >= minStart) {
812
0
            ++start;  // Skip the last block that we added last time.
813
0
        } else {
814
0
            start = minStart;  // Begin with the first full block.
815
0
        }
816
0
        for (int32_t end = newDataLength - blockLength; start <= end; ++start) {
817
0
            uint32_t hashCode = makeHashCode(data, start);
818
0
            addEntry(data, start, hashCode, start);
819
0
        }
820
0
    }
Unexecuted instantiation: umutablecptrie.cpp:void icu_70::(anonymous namespace)::MixedBlocks::extend<unsigned int>(unsigned int const*, int, int, int)
Unexecuted instantiation: umutablecptrie.cpp:void icu_70::(anonymous namespace)::MixedBlocks::extend<unsigned short>(unsigned short const*, int, int, int)
821
822
    template<typename UIntA, typename UIntB>
823
0
    int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const {
824
0
        uint32_t hashCode = makeHashCode(blockData, blockStart);
825
0
        int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);
826
0
        if (entryIndex >= 0) {
827
0
            return (table[entryIndex] & mask) - 1;
828
0
        } else {
829
0
            return -1;
830
0
        }
831
0
    }
Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findBlock<unsigned int, unsigned int>(unsigned int const*, unsigned int const*, int) const
Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findBlock<unsigned short, unsigned int>(unsigned short const*, unsigned int const*, int) const
Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findBlock<unsigned short, unsigned short>(unsigned short const*, unsigned short const*, int) const
832
833
0
    int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const {
834
0
        uint32_t hashCode = makeHashCode(blockValue);
835
0
        int32_t entryIndex = findEntry(data, blockValue, hashCode);
836
0
        if (entryIndex >= 0) {
837
0
            return (table[entryIndex] & mask) - 1;
838
0
        } else {
839
0
            return -1;
840
0
        }
841
0
    }
842
843
private:
844
    template<typename UInt>
845
0
    uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const {
846
0
        int32_t blockLimit = blockStart + blockLength;
847
0
        uint32_t hashCode = blockData[blockStart++];
848
0
        do {
849
0
            hashCode = 37 * hashCode + blockData[blockStart++];
850
0
        } while (blockStart < blockLimit);
851
0
        return hashCode;
852
0
    }
Unexecuted instantiation: umutablecptrie.cpp:unsigned int icu_70::(anonymous namespace)::MixedBlocks::makeHashCode<unsigned int>(unsigned int const*, int) const
Unexecuted instantiation: umutablecptrie.cpp:unsigned int icu_70::(anonymous namespace)::MixedBlocks::makeHashCode<unsigned short>(unsigned short const*, int) const
853
854
0
    uint32_t makeHashCode(uint32_t blockValue) const {
855
0
        uint32_t hashCode = blockValue;
856
0
        for (int32_t i = 1; i < blockLength; ++i) {
857
0
            hashCode = 37 * hashCode + blockValue;
858
0
        }
859
0
        return hashCode;
860
0
    }
861
862
    template<typename UInt>
863
0
    void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) {
864
0
        U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);
865
0
        int32_t entryIndex = findEntry(data, data, blockStart, hashCode);
866
0
        if (entryIndex < 0) {
867
0
            table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);
868
0
        }
869
0
    }
Unexecuted instantiation: umutablecptrie.cpp:void icu_70::(anonymous namespace)::MixedBlocks::addEntry<unsigned int>(unsigned int const*, int, unsigned int, int)
Unexecuted instantiation: umutablecptrie.cpp:void icu_70::(anonymous namespace)::MixedBlocks::addEntry<unsigned short>(unsigned short const*, int, unsigned int, int)
870
871
    template<typename UIntA, typename UIntB>
872
    int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,
873
0
                      uint32_t hashCode) const {
874
0
        uint32_t shiftedHashCode = hashCode << shift;
875
0
        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
876
0
        for (int32_t entryIndex = initialEntryIndex;;) {
877
0
            uint32_t entry = table[entryIndex];
878
0
            if (entry == 0) {
879
0
                return ~entryIndex;
880
0
            }
881
0
            if ((entry & ~mask) == shiftedHashCode) {
882
0
                int32_t dataIndex = (entry & mask) - 1;
883
0
                if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) {
884
0
                    return entryIndex;
885
0
                }
886
0
            }
887
0
            entryIndex = nextIndex(initialEntryIndex, entryIndex);
888
0
        }
889
0
    }
Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findEntry<unsigned int, unsigned int>(unsigned int const*, unsigned int const*, int, unsigned int) const
Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findEntry<unsigned short, unsigned short>(unsigned short const*, unsigned short const*, int, unsigned int) const
Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findEntry<unsigned short, unsigned int>(unsigned short const*, unsigned int const*, int, unsigned int) const
890
891
0
    int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const {
892
0
        uint32_t shiftedHashCode = hashCode << shift;
893
0
        int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1
894
0
        for (int32_t entryIndex = initialEntryIndex;;) {
895
0
            uint32_t entry = table[entryIndex];
896
0
            if (entry == 0) {
897
0
                return ~entryIndex;
898
0
            }
899
0
            if ((entry & ~mask) == shiftedHashCode) {
900
0
                int32_t dataIndex = (entry & mask) - 1;
901
0
                if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) {
902
0
                    return entryIndex;
903
0
                }
904
0
            }
905
0
            entryIndex = nextIndex(initialEntryIndex, entryIndex);
906
0
        }
907
0
    }
908
909
0
    inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const {
910
        // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);
911
0
        return (entryIndex + initialEntryIndex) % length;
912
0
    }
913
914
    // Hash table.
915
    // The length is a prime number, larger than the maximum data length.
916
    // The "shift" lower bits store a data index + 1.
917
    // The remaining upper bits store a partial hashCode of the block data values.
918
    uint32_t *table = nullptr;
919
    int32_t capacity = 0;
920
    int32_t length = 0;
921
    int32_t shift = 0;
922
    uint32_t mask = 0;
923
924
    int32_t blockLength = 0;
925
};
926
927
0
int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
928
#ifdef UCPTRIE_DEBUG
929
    bool overflow = false;
930
#endif
931
932
    // ASCII data will be stored as a linear table, even if the following code
933
    // does not yet count it that way.
934
0
    int32_t newDataCapacity = ASCII_LIMIT;
935
    // Add room for a small data null block in case it would match the start of
936
    // a fast data block where dataNullOffset must not be set in that case.
937
0
    newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
938
    // Add room for special values (errorValue, highValue) and padding.
939
0
    newDataCapacity += 4;
940
0
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
941
0
    int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
942
0
    int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
943
0
    for (int32_t i = 0; i < iLimit; i += inc) {
944
0
        if (i == fastILimit) {
945
0
            blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
946
0
            inc = 1;
947
0
        }
948
0
        uint32_t value = index[i];
949
0
        if (flags[i] == MIXED) {
950
            // Really mixed?
951
0
            const uint32_t *p = data + value;
952
0
            value = *p;
953
0
            if (allValuesSameAs(p + 1, blockLength - 1, value)) {
954
0
                flags[i] = ALL_SAME;
955
0
                index[i] = value;
956
                // Fall through to ALL_SAME handling.
957
0
            } else {
958
0
                newDataCapacity += blockLength;
959
0
                continue;
960
0
            }
961
0
        } else {
962
0
            U_ASSERT(flags[i] == ALL_SAME);
963
0
            if (inc > 1) {
964
                // Do all of the fast-range data block's ALL_SAME parts have the same value?
965
0
                bool allSame = true;
966
0
                int32_t next_i = i + inc;
967
0
                for (int32_t j = i + 1; j < next_i; ++j) {
968
0
                    U_ASSERT(flags[j] == ALL_SAME);
969
0
                    if (index[j] != value) {
970
0
                        allSame = false;
971
0
                        break;
972
0
                    }
973
0
                }
974
0
                if (!allSame) {
975
                    // Turn it into a MIXED block.
976
0
                    if (getDataBlock(i) < 0) {
977
0
                        return -1;
978
0
                    }
979
0
                    newDataCapacity += blockLength;
980
0
                    continue;
981
0
                }
982
0
            }
983
0
        }
984
        // Is there another ALL_SAME block with the same value?
985
0
        int32_t other = allSameBlocks.findOrAdd(i, inc, value);
986
0
        if (other == AllSameBlocks::OVERFLOW) {
987
            // The fixed-size array overflowed. Slow check for a duplicate block.
988
#ifdef UCPTRIE_DEBUG
989
            if (!overflow) {
990
                puts("UCPTrie AllSameBlocks overflow");
991
                overflow = true;
992
            }
993
#endif
994
0
            int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
995
0
            for (int32_t j = 0;; j += jInc) {
996
0
                if (j == i) {
997
0
                    allSameBlocks.add(i, inc, value);
998
0
                    break;
999
0
                }
1000
0
                if (j == fastILimit) {
1001
0
                    jInc = 1;
1002
0
                }
1003
0
                if (flags[j] == ALL_SAME && index[j] == value) {
1004
0
                    allSameBlocks.add(j, jInc + inc, value);
1005
0
                    other = j;
1006
0
                    break;
1007
                    // We could keep counting blocks with the same value
1008
                    // before we add the first one, which may improve compaction in rare cases,
1009
                    // but it would make it slower.
1010
0
                }
1011
0
            }
1012
0
        }
1013
0
        if (other >= 0) {
1014
0
            flags[i] = SAME_AS;
1015
0
            index[i] = other;
1016
0
        } else {
1017
            // New unique same-value block.
1018
0
            newDataCapacity += blockLength;
1019
0
        }
1020
0
    }
1021
0
    return newDataCapacity;
1022
0
}
1023
1024
#ifdef UCPTRIE_DEBUG
1025
#   define DEBUG_DO(expr) expr
1026
#else
1027
#   define DEBUG_DO(expr)
1028
#endif
1029
1030
#ifdef UCPTRIE_DEBUG
1031
// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF
1032
int32_t appendValue(char s[], int32_t length, uint32_t value) {
1033
    value ^= value >> 16;
1034
    value ^= value >> 8;
1035
    s[length] = 0xE2;
1036
    s[length + 1] = (char)(0xA0 + ((value >> 6) & 3));
1037
    s[length + 2] = (char)(0x80 + (value & 0x3F));
1038
    return length + 3;
1039
}
1040
1041
void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,
1042
                UChar32 start, int32_t overlap, uint32_t initialValue) {
1043
    char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3];
1044
    int32_t length = 0;
1045
    int32_t i;
1046
    for (i = 0; i < overlap; ++i) {
1047
        length = appendValue(s, length, 0);  // Braille blank
1048
    }
1049
    s[length++] = '|';
1050
    for (; i < blockLength; ++i) {
1051
        if (block != nullptr) {
1052
            value = block[i];
1053
        }
1054
        if (value == initialValue) {
1055
            value = 0x40;  // Braille lower left dot
1056
        }
1057
        length = appendValue(s, length, value);
1058
    }
1059
    s[length] = 0;
1060
    start += overlap;
1061
    if (start <= 0xffff) {
1062
        printf("    %04lX  %s|\n", (long)start, s);
1063
    } else if (start <= 0xfffff) {
1064
        printf("   %5lX  %s|\n", (long)start, s);
1065
    } else {
1066
        printf("  %6lX  %s|\n", (long)start, s);
1067
    }
1068
}
1069
#endif
1070
1071
/**
1072
 * Compacts a build-time trie.
1073
 *
1074
 * The compaction
1075
 * - removes blocks that are identical with earlier ones
1076
 * - overlaps each new non-duplicate block as much as possible with the previously-written one
1077
 * - works with fast-range data blocks whose length is a multiple of that of
1078
 *   higher-code-point data blocks
1079
 *
1080
 * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
1081
 */
1082
int32_t MutableCodePointTrie::compactData(
1083
        int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,
1084
0
        int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) {
1085
#ifdef UCPTRIE_DEBUG
1086
    int32_t countSame=0, sumOverlaps=0;
1087
    bool printData = dataLength == 29088 /* line.brk */ ||
1088
        // dataLength == 30048 /* CanonIterData */ ||
1089
        dataLength == 50400 /* zh.txt~stroke */;
1090
#endif
1091
1092
    // The linear ASCII data has been copied into newData already.
1093
0
    int32_t newDataLength = 0;
1094
0
    for (int32_t i = 0; newDataLength < ASCII_LIMIT;
1095
0
            newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
1096
0
        index[i] = newDataLength;
1097
#ifdef UCPTRIE_DEBUG
1098
        if (printData) {
1099
            printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);
1100
        }
1101
#endif
1102
0
    }
1103
1104
0
    int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
1105
0
    if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1106
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1107
0
        return 0;
1108
0
    }
1109
0
    mixedBlocks.extend(newData, 0, 0, newDataLength);
1110
1111
0
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1112
0
    int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1113
0
    int32_t fastLength = 0;
1114
0
    for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
1115
0
        if (i == fastILimit) {
1116
0
            blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1117
0
            inc = 1;
1118
0
            fastLength = newDataLength;
1119
0
            if (!mixedBlocks.init(newDataCapacity, blockLength)) {
1120
0
                errorCode = U_MEMORY_ALLOCATION_ERROR;
1121
0
                return 0;
1122
0
            }
1123
0
            mixedBlocks.extend(newData, 0, 0, newDataLength);
1124
0
        }
1125
0
        if (flags[i] == ALL_SAME) {
1126
0
            uint32_t value = index[i];
1127
            // Find an earlier part of the data array of length blockLength
1128
            // that is filled with this value.
1129
0
            int32_t n = mixedBlocks.findAllSameBlock(newData, value);
1130
            // If we find a match, and the current block is the data null block,
1131
            // and it is not a fast block but matches the start of a fast block,
1132
            // then we need to continue looking.
1133
            // This is because this small block is shorter than the fast block,
1134
            // and not all of the rest of the fast block is filled with this value.
1135
            // Otherwise trie.getRange() would detect that the fast block starts at
1136
            // dataNullOffset and assume incorrectly that it is filled with the null value.
1137
0
            while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&
1138
0
                    isStartOfSomeFastBlock(n, index, fastILimit)) {
1139
0
                n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);
1140
0
            }
1141
0
            if (n >= 0) {
1142
0
                DEBUG_DO(++countSame);
1143
0
                index[i] = n;
1144
0
            } else {
1145
0
                n = getAllSameOverlap(newData, newDataLength, value, blockLength);
1146
0
                DEBUG_DO(sumOverlaps += n);
1147
#ifdef UCPTRIE_DEBUG
1148
                if (printData) {
1149
                    printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);
1150
                }
1151
#endif
1152
0
                index[i] = newDataLength - n;
1153
0
                int32_t prevDataLength = newDataLength;
1154
0
                while (n < blockLength) {
1155
0
                    newData[newDataLength++] = value;
1156
0
                    ++n;
1157
0
                }
1158
0
                mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1159
0
            }
1160
0
        } else if (flags[i] == MIXED) {
1161
0
            const uint32_t *block = data + index[i];
1162
0
            int32_t n = mixedBlocks.findBlock(newData, block, 0);
1163
0
            if (n >= 0) {
1164
0
                DEBUG_DO(++countSame);
1165
0
                index[i] = n;
1166
0
            } else {
1167
0
                n = getOverlap(newData, newDataLength, block, 0, blockLength);
1168
0
                DEBUG_DO(sumOverlaps += n);
1169
#ifdef UCPTRIE_DEBUG
1170
                if (printData) {
1171
                    printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);
1172
                }
1173
#endif
1174
0
                index[i] = newDataLength - n;
1175
0
                int32_t prevDataLength = newDataLength;
1176
0
                while (n < blockLength) {
1177
0
                    newData[newDataLength++] = block[n++];
1178
0
                }
1179
0
                mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);
1180
0
            }
1181
0
        } else /* SAME_AS */ {
1182
0
            uint32_t j = index[i];
1183
0
            index[i] = index[j];
1184
0
        }
1185
0
    }
1186
1187
#ifdef UCPTRIE_DEBUG
1188
    /* we saved some space */
1189
    printf("compacting UCPTrie: count of 32-bit data words %lu->%lu  countSame=%ld  sumOverlaps=%ld\n",
1190
            (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps);
1191
#endif
1192
0
    return newDataLength;
1193
0
}
1194
1195
int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,
1196
0
                                           UErrorCode &errorCode) {
1197
0
    int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
1198
0
    if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
1199
        // Only the linear fast index, no multi-stage index tables.
1200
0
        index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1201
0
        return fastIndexLength;
1202
0
    }
1203
1204
    // Condense the fast index table.
1205
    // Also, does it contain an index-3 block with all dataNullOffset?
1206
0
    uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH];  // fastIndexLength
1207
0
    int32_t i3FirstNull = -1;
1208
0
    for (int32_t i = 0, j = 0; i < fastILimit; ++j) {
1209
0
        uint32_t i3 = index[i];
1210
0
        fastIndex[j] = (uint16_t)i3;
1211
0
        if (i3 == (uint32_t)dataNullOffset) {
1212
0
            if (i3FirstNull < 0) {
1213
0
                i3FirstNull = j;
1214
0
            } else if (index3NullOffset < 0 &&
1215
0
                    (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1216
0
                index3NullOffset = i3FirstNull;
1217
0
            }
1218
0
        } else {
1219
0
            i3FirstNull = -1;
1220
0
        }
1221
        // Set the index entries that compactData() skipped.
1222
        // Needed when the multi-stage index covers the fast index range as well.
1223
0
        int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
1224
0
        while (++i < iNext) {
1225
0
            i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
1226
0
            index[i] = i3;
1227
0
        }
1228
0
    }
1229
1230
0
    if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1231
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1232
0
        return 0;
1233
0
    }
1234
0
    mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);
1235
1236
    // Examine index-3 blocks. For each determine one of:
1237
    // - same as the index-3 null block
1238
    // - same as a fast-index block
1239
    // - 16-bit indexes
1240
    // - 18-bit indexes
1241
    // We store this in the first flags entry for the index-3 block.
1242
    //
1243
    // Also determine an upper limit for the index-3 table length.
1244
0
    int32_t index3Capacity = 0;
1245
0
    i3FirstNull = index3NullOffset;
1246
0
    bool hasLongI3Blocks = false;
1247
    // If the fast index covers the whole BMP, then
1248
    // the multi-stage index is only for supplementary code points.
1249
    // Otherwise, the multi-stage index covers all of Unicode.
1250
0
    int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
1251
0
    int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
1252
0
    for (int32_t i = iStart; i < iLimit;) {
1253
0
        int32_t j = i;
1254
0
        int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1255
0
        uint32_t oredI3 = 0;
1256
0
        bool isNull = true;
1257
0
        do {
1258
0
            uint32_t i3 = index[j];
1259
0
            oredI3 |= i3;
1260
0
            if (i3 != (uint32_t)dataNullOffset) {
1261
0
                isNull = false;
1262
0
            }
1263
0
        } while (++j < jLimit);
1264
0
        if (isNull) {
1265
0
            flags[i] = I3_NULL;
1266
0
            if (i3FirstNull < 0) {
1267
0
                if (oredI3 <= 0xffff) {
1268
0
                    index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1269
0
                } else {
1270
0
                    index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1271
0
                    hasLongI3Blocks = true;
1272
0
                }
1273
0
                i3FirstNull = 0;
1274
0
            }
1275
0
        } else {
1276
0
            if (oredI3 <= 0xffff) {
1277
0
                int32_t n = mixedBlocks.findBlock(fastIndex, index, i);
1278
0
                if (n >= 0) {
1279
0
                    flags[i] = I3_BMP;
1280
0
                    index[i] = n;
1281
0
                } else {
1282
0
                    flags[i] = I3_16;
1283
0
                    index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
1284
0
                }
1285
0
            } else {
1286
0
                flags[i] = I3_18;
1287
0
                index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
1288
0
                hasLongI3Blocks = true;
1289
0
            }
1290
0
        }
1291
0
        i = j;
1292
0
    }
1293
1294
0
    int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3;
1295
1296
    // Length of the index-1 table, rounded up.
1297
0
    int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;
1298
1299
    // Index table: Fast index, index-1, index-3, index-2.
1300
    // +1 for possible index table padding.
1301
0
    int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
1302
0
    index16 = (uint16_t *)uprv_malloc(index16Capacity * 2);
1303
0
    if (index16 == nullptr) {
1304
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1305
0
        return 0;
1306
0
    }
1307
0
    uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
1308
1309
0
    if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1310
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1311
0
        return 0;
1312
0
    }
1313
0
    MixedBlocks longI3Blocks;
1314
0
    if (hasLongI3Blocks) {
1315
0
        if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) {
1316
0
            errorCode = U_MEMORY_ALLOCATION_ERROR;
1317
0
            return 0;
1318
0
        }
1319
0
    }
1320
1321
    // Compact the index-3 table and write an uncompacted version of the index-2 table.
1322
0
    uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2];  // index2Capacity
1323
0
    int32_t i2Length = 0;
1324
0
    i3FirstNull = index3NullOffset;
1325
0
    int32_t index3Start = fastIndexLength + index1Length;
1326
0
    int32_t indexLength = index3Start;
1327
0
    for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1328
0
        int32_t i3;
1329
0
        uint8_t f = flags[i];
1330
0
        if (f == I3_NULL && i3FirstNull < 0) {
1331
            // First index-3 null block. Write & overlap it like a normal block, then remember it.
1332
0
            f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
1333
0
            i3FirstNull = 0;
1334
0
        }
1335
0
        if (f == I3_NULL) {
1336
0
            i3 = index3NullOffset;
1337
0
        } else if (f == I3_BMP) {
1338
0
            i3 = index[i];
1339
0
        } else if (f == I3_16) {
1340
0
            int32_t n = mixedBlocks.findBlock(index16, index, i);
1341
0
            if (n >= 0) {
1342
0
                i3 = n;
1343
0
            } else {
1344
0
                if (indexLength == index3Start) {
1345
                    // No overlap at the boundary between the index-1 and index-3 tables.
1346
0
                    n = 0;
1347
0
                } else {
1348
0
                    n = getOverlap(index16, indexLength,
1349
0
                                   index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
1350
0
                }
1351
0
                i3 = indexLength - n;
1352
0
                int32_t prevIndexLength = indexLength;
1353
0
                while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
1354
0
                    index16[indexLength++] = index[i + n++];
1355
0
                }
1356
0
                mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1357
0
                if (hasLongI3Blocks) {
1358
0
                    longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1359
0
                }
1360
0
            }
1361
0
        } else {
1362
0
            U_ASSERT(f == I3_18);
1363
0
            U_ASSERT(hasLongI3Blocks);
1364
            // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
1365
0
            int32_t j = i;
1366
0
            int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
1367
0
            int32_t k = indexLength;
1368
0
            do {
1369
0
                ++k;
1370
0
                uint32_t v = index[j++];
1371
0
                uint32_t upperBits = (v & 0x30000) >> 2;
1372
0
                index16[k++] = v;
1373
0
                v = index[j++];
1374
0
                upperBits |= (v & 0x30000) >> 4;
1375
0
                index16[k++] = v;
1376
0
                v = index[j++];
1377
0
                upperBits |= (v & 0x30000) >> 6;
1378
0
                index16[k++] = v;
1379
0
                v = index[j++];
1380
0
                upperBits |= (v & 0x30000) >> 8;
1381
0
                index16[k++] = v;
1382
0
                v = index[j++];
1383
0
                upperBits |= (v & 0x30000) >> 10;
1384
0
                index16[k++] = v;
1385
0
                v = index[j++];
1386
0
                upperBits |= (v & 0x30000) >> 12;
1387
0
                index16[k++] = v;
1388
0
                v = index[j++];
1389
0
                upperBits |= (v & 0x30000) >> 14;
1390
0
                index16[k++] = v;
1391
0
                v = index[j++];
1392
0
                upperBits |= (v & 0x30000) >> 16;
1393
0
                index16[k++] = v;
1394
0
                index16[k - 9] = upperBits;
1395
0
            } while (j < jLimit);
1396
0
            int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);
1397
0
            if (n >= 0) {
1398
0
                i3 = n | 0x8000;
1399
0
            } else {
1400
0
                if (indexLength == index3Start) {
1401
                    // No overlap at the boundary between the index-1 and index-3 tables.
1402
0
                    n = 0;
1403
0
                } else {
1404
0
                    n = getOverlap(index16, indexLength,
1405
0
                                   index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
1406
0
                }
1407
0
                i3 = (indexLength - n) | 0x8000;
1408
0
                int32_t prevIndexLength = indexLength;
1409
0
                if (n > 0) {
1410
0
                    int32_t start = indexLength;
1411
0
                    while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
1412
0
                        index16[indexLength++] = index16[start + n++];
1413
0
                    }
1414
0
                } else {
1415
0
                    indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
1416
0
                }
1417
0
                mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1418
0
                if (hasLongI3Blocks) {
1419
0
                    longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);
1420
0
                }
1421
0
            }
1422
0
        }
1423
0
        if (index3NullOffset < 0 && i3FirstNull >= 0) {
1424
0
            index3NullOffset = i3;
1425
0
        }
1426
        // Set the index-2 table entry.
1427
0
        index2[i2Length++] = i3;
1428
0
    }
1429
0
    U_ASSERT(i2Length == index2Capacity);
1430
0
    U_ASSERT(indexLength <= index3Start + index3Capacity);
1431
1432
0
    if (index3NullOffset < 0) {
1433
0
        index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
1434
0
    }
1435
0
    if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
1436
        // The index-3 offsets exceed 15 bits, or
1437
        // the last one cannot be distinguished from the no-null-block value.
1438
0
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1439
0
        return 0;
1440
0
    }
1441
1442
    // Compact the index-2 table and write the index-1 table.
1443
0
    static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,
1444
0
                  "must re-init mixedBlocks");
1445
0
    int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
1446
0
    int32_t i1 = fastIndexLength;
1447
0
    for (int32_t i = 0; i < i2Length; i += blockLength) {
1448
0
        int32_t n;
1449
0
        if ((i2Length - i) >= blockLength) {
1450
            // normal block
1451
0
            U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);
1452
0
            n = mixedBlocks.findBlock(index16, index2, i);
1453
0
        } else {
1454
            // highStart is inside the last index-2 block. Shorten it.
1455
0
            blockLength = i2Length - i;
1456
0
            n = findSameBlock(index16, index3Start, indexLength,
1457
0
                              index2, i, blockLength);
1458
0
        }
1459
0
        int32_t i2;
1460
0
        if (n >= 0) {
1461
0
            i2 = n;
1462
0
        } else {
1463
0
            if (indexLength == index3Start) {
1464
                // No overlap at the boundary between the index-1 and index-3/2 tables.
1465
0
                n = 0;
1466
0
            } else {
1467
0
                n = getOverlap(index16, indexLength, index2, i, blockLength);
1468
0
            }
1469
0
            i2 = indexLength - n;
1470
0
            int32_t prevIndexLength = indexLength;
1471
0
            while (n < blockLength) {
1472
0
                index16[indexLength++] = index2[i + n++];
1473
0
            }
1474
0
            mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);
1475
0
        }
1476
        // Set the index-1 table entry.
1477
0
        index16[i1++] = i2;
1478
0
    }
1479
0
    U_ASSERT(i1 == index3Start);
1480
0
    U_ASSERT(indexLength <= index16Capacity);
1481
1482
#ifdef UCPTRIE_DEBUG
1483
    /* we saved some space */
1484
    printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n",
1485
            (long)iLimit, (long)indexLength);
1486
#endif
1487
1488
0
    return indexLength;
1489
0
}
1490
1491
0
int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) {
1492
    // Find the real highStart and round it up.
1493
0
    U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
1494
0
    highValue = get(MAX_UNICODE);
1495
0
    int32_t realHighStart = findHighStart();
1496
0
    realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &
1497
0
        ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
1498
0
    if (realHighStart == UNICODE_LIMIT) {
1499
0
        highValue = initialValue;
1500
0
    }
1501
1502
#ifdef UCPTRIE_DEBUG
1503
    printf("UCPTrie: highStart U+%06lx  highValue 0x%lx  initialValue 0x%lx\n",
1504
            (long)realHighStart, (long)highValue, (long)initialValue);
1505
#endif
1506
1507
    // We always store indexes and data values for the fast range.
1508
    // Pin highStart to the top of that range while building.
1509
0
    UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3;
1510
0
    if (realHighStart < fastLimit) {
1511
0
        for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) {
1512
0
            flags[i] = ALL_SAME;
1513
0
            index[i] = highValue;
1514
0
        }
1515
0
        highStart = fastLimit;
1516
0
    } else {
1517
0
        highStart = realHighStart;
1518
0
    }
1519
1520
0
    uint32_t asciiData[ASCII_LIMIT];
1521
0
    for (int32_t i = 0; i < ASCII_LIMIT; ++i) {
1522
0
        asciiData[i] = get(i);
1523
0
    }
1524
1525
    // First we look for which data blocks have the same value repeated over the whole block,
1526
    // deduplicate such blocks, find a good null data block (for faster enumeration),
1527
    // and get an upper bound for the necessary data array length.
1528
0
    AllSameBlocks allSameBlocks;
1529
0
    int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
1530
0
    if (newDataCapacity < 0) {
1531
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1532
0
        return 0;
1533
0
    }
1534
0
    uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4);
1535
0
    if (newData == nullptr) {
1536
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1537
0
        return 0;
1538
0
    }
1539
0
    uprv_memcpy(newData, asciiData, sizeof(asciiData));
1540
1541
0
    int32_t dataNullIndex = allSameBlocks.findMostUsed();
1542
1543
0
    MixedBlocks mixedBlocks;
1544
0
    int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,
1545
0
                                        dataNullIndex, mixedBlocks, errorCode);
1546
0
    if (U_FAILURE(errorCode)) { return 0; }
1547
0
    U_ASSERT(newDataLength <= newDataCapacity);
1548
0
    uprv_free(data);
1549
0
    data = newData;
1550
0
    dataCapacity = newDataCapacity;
1551
0
    dataLength = newDataLength;
1552
0
    if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) {
1553
        // The offset of the last data block is too high to be stored in the index table.
1554
0
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1555
0
        return 0;
1556
0
    }
1557
1558
0
    if (dataNullIndex >= 0) {
1559
0
        dataNullOffset = index[dataNullIndex];
1560
#ifdef UCPTRIE_DEBUG
1561
        if (data[dataNullOffset] != initialValue) {
1562
            printf("UCPTrie initialValue %lx -> more common nullValue %lx\n",
1563
                   (long)initialValue, (long)data[dataNullOffset]);
1564
        }
1565
#endif
1566
0
        initialValue = data[dataNullOffset];
1567
0
    } else {
1568
0
        dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
1569
0
    }
1570
1571
0
    int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);
1572
0
    highStart = realHighStart;
1573
0
    return indexLength;
1574
0
}
1575
1576
0
UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
1577
0
    if (U_FAILURE(errorCode)) {
1578
0
        return nullptr;
1579
0
    }
1580
0
    if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
1581
0
            valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) {
1582
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
1583
0
        return nullptr;
1584
0
    }
1585
1586
    // The mutable trie always stores 32-bit values.
1587
    // When we build a UCPTrie for a smaller value width, we first mask off unused bits
1588
    // before compacting the data.
1589
0
    switch (valueWidth) {
1590
0
    case UCPTRIE_VALUE_BITS_32:
1591
0
        break;
1592
0
    case UCPTRIE_VALUE_BITS_16:
1593
0
        maskValues(0xffff);
1594
0
        break;
1595
0
    case UCPTRIE_VALUE_BITS_8:
1596
0
        maskValues(0xff);
1597
0
        break;
1598
0
    default:
1599
0
        break;
1600
0
    }
1601
1602
0
    UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT;
1603
0
    int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode);
1604
0
    if (U_FAILURE(errorCode)) {
1605
0
        clear();
1606
0
        return nullptr;
1607
0
    }
1608
1609
    // Ensure data table alignment: The index length must be even for uint32_t data.
1610
0
    if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) {
1611
0
        index16[indexLength++] = 0xffee;  // arbitrary value
1612
0
    }
1613
1614
    // Make the total trie structure length a multiple of 4 bytes by padding the data table,
1615
    // and store special values as the last two data values.
1616
0
    int32_t length = indexLength * 2;
1617
0
    if (valueWidth == UCPTRIE_VALUE_BITS_16) {
1618
0
        if (((indexLength ^ dataLength) & 1) != 0) {
1619
            // padding
1620
0
            data[dataLength++] = errorValue;
1621
0
        }
1622
0
        if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1623
0
            data[dataLength++] = highValue;
1624
0
            data[dataLength++] = errorValue;
1625
0
        }
1626
0
        length += dataLength * 2;
1627
0
    } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
1628
        // 32-bit data words never need padding to a multiple of 4 bytes.
1629
0
        if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
1630
0
            if (data[dataLength - 1] != highValue) {
1631
0
                data[dataLength++] = highValue;
1632
0
            }
1633
0
            data[dataLength++] = errorValue;
1634
0
        }
1635
0
        length += dataLength * 4;
1636
0
    } else {
1637
0
        int32_t and3 = (length + dataLength) & 3;
1638
0
        if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) {
1639
            // all set
1640
0
        } else if(and3 == 3 && data[dataLength - 1] == highValue) {
1641
0
            data[dataLength++] = errorValue;
1642
0
        } else {
1643
0
            while (and3 != 2) {
1644
0
                data[dataLength++] = highValue;
1645
0
                and3 = (and3 + 1) & 3;
1646
0
            }
1647
0
            data[dataLength++] = highValue;
1648
0
            data[dataLength++] = errorValue;
1649
0
        }
1650
0
        length += dataLength;
1651
0
    }
1652
1653
    // Calculate the total length of the UCPTrie as a single memory block.
1654
0
    length += sizeof(UCPTrie);
1655
0
    U_ASSERT((length & 3) == 0);
1656
1657
0
    uint8_t *bytes = (uint8_t *)uprv_malloc(length);
1658
0
    if (bytes == nullptr) {
1659
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
1660
0
        clear();
1661
0
        return nullptr;
1662
0
    }
1663
0
    UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes);
1664
0
    uprv_memset(trie, 0, sizeof(UCPTrie));
1665
0
    trie->indexLength = indexLength;
1666
0
    trie->dataLength = dataLength;
1667
1668
0
    trie->highStart = highStart;
1669
    // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes.
1670
    // Runtime code needs to then test for the real highStart as well.
1671
0
    trie->shifted12HighStart = (highStart + 0xfff) >> 12;
1672
0
    trie->type = type;
1673
0
    trie->valueWidth = valueWidth;
1674
1675
0
    trie->index3NullOffset = index3NullOffset;
1676
0
    trie->dataNullOffset = dataNullOffset;
1677
0
    trie->nullValue = initialValue;
1678
1679
0
    bytes += sizeof(UCPTrie);
1680
1681
    // Fill the index and data arrays.
1682
0
    uint16_t *dest16 = (uint16_t *)bytes;
1683
0
    trie->index = dest16;
1684
1685
0
    if (highStart <= fastLimit) {
1686
        // Condense only the fast index from the mutable-trie index.
1687
0
        for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
1688
0
            *dest16++ = (uint16_t)index[i];  // dest16[j]
1689
0
        }
1690
0
    } else {
1691
0
        uprv_memcpy(dest16, index16, indexLength * 2);
1692
0
        dest16 += indexLength;
1693
0
    }
1694
0
    bytes += indexLength * 2;
1695
1696
    // Write the data array.
1697
0
    const uint32_t *p = data;
1698
0
    switch (valueWidth) {
1699
0
    case UCPTRIE_VALUE_BITS_16:
1700
        // Write 16-bit data values.
1701
0
        trie->data.ptr16 = dest16;
1702
0
        for (int32_t i = dataLength; i > 0; --i) {
1703
0
            *dest16++ = (uint16_t)*p++;
1704
0
        }
1705
0
        break;
1706
0
    case UCPTRIE_VALUE_BITS_32:
1707
        // Write 32-bit data values.
1708
0
        trie->data.ptr32 = (uint32_t *)bytes;
1709
0
        uprv_memcpy(bytes, p, (size_t)dataLength * 4);
1710
0
        break;
1711
0
    case UCPTRIE_VALUE_BITS_8:
1712
        // Write 8-bit data values.
1713
0
        trie->data.ptr8 = bytes;
1714
0
        for (int32_t i = dataLength; i > 0; --i) {
1715
0
            *bytes++ = (uint8_t)*p++;
1716
0
        }
1717
0
        break;
1718
0
    default:
1719
        // Will not occur, valueWidth checked at the beginning.
1720
0
        break;
1721
0
    }
1722
1723
#ifdef UCPTRIE_DEBUG
1724
    trie->name = name;
1725
1726
    ucptrie_printLengths(trie, "");
1727
#endif
1728
1729
0
    clear();
1730
0
    return trie;
1731
0
}
1732
1733
}  // namespace
1734
1735
U_NAMESPACE_END
1736
1737
U_NAMESPACE_USE
1738
1739
U_CAPI UMutableCPTrie * U_EXPORT2
1740
0
umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
1741
0
    if (U_FAILURE(*pErrorCode)) {
1742
0
        return nullptr;
1743
0
    }
1744
0
    LocalPointer<MutableCodePointTrie> trie(
1745
0
        new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode);
1746
0
    if (U_FAILURE(*pErrorCode)) {
1747
0
        return nullptr;
1748
0
    }
1749
0
    return reinterpret_cast<UMutableCPTrie *>(trie.orphan());
1750
0
}
1751
1752
U_CAPI UMutableCPTrie * U_EXPORT2
1753
0
umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) {
1754
0
    if (U_FAILURE(*pErrorCode)) {
1755
0
        return nullptr;
1756
0
    }
1757
0
    if (other == nullptr) {
1758
0
        return nullptr;
1759
0
    }
1760
0
    LocalPointer<MutableCodePointTrie> clone(
1761
0
        new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode);
1762
0
    if (U_FAILURE(*pErrorCode)) {
1763
0
        return nullptr;
1764
0
    }
1765
0
    return reinterpret_cast<UMutableCPTrie *>(clone.orphan());
1766
0
}
1767
1768
U_CAPI void U_EXPORT2
1769
0
umutablecptrie_close(UMutableCPTrie *trie) {
1770
0
    delete reinterpret_cast<MutableCodePointTrie *>(trie);
1771
0
}
1772
1773
U_CAPI UMutableCPTrie * U_EXPORT2
1774
0
umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) {
1775
0
    if (U_FAILURE(*pErrorCode)) {
1776
0
        return nullptr;
1777
0
    }
1778
0
    if (map == nullptr) {
1779
0
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1780
0
        return nullptr;
1781
0
    }
1782
0
    return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));
1783
0
}
1784
1785
U_CAPI UMutableCPTrie * U_EXPORT2
1786
0
umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
1787
0
    if (U_FAILURE(*pErrorCode)) {
1788
0
        return nullptr;
1789
0
    }
1790
0
    if (trie == nullptr) {
1791
0
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
1792
0
        return nullptr;
1793
0
    }
1794
0
    return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode));
1795
0
}
1796
1797
U_CAPI uint32_t U_EXPORT2
1798
0
umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) {
1799
0
    return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c);
1800
0
}
1801
1802
namespace {
1803
1804
UChar32 getRange(const void *trie, UChar32 start,
1805
0
                 UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1806
0
    return reinterpret_cast<const MutableCodePointTrie *>(trie)->
1807
0
        getRange(start, filter, context, pValue);
1808
0
}
1809
1810
}  // namespace
1811
1812
U_CAPI UChar32 U_EXPORT2
1813
umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
1814
                        UCPMapRangeOption option, uint32_t surrogateValue,
1815
0
                        UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
1816
0
    return ucptrie_internalGetRange(getRange, trie, start,
1817
0
                                    option, surrogateValue,
1818
0
                                    filter, context, pValue);
1819
0
}
1820
1821
U_CAPI void U_EXPORT2
1822
0
umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
1823
0
    if (U_FAILURE(*pErrorCode)) {
1824
0
        return;
1825
0
    }
1826
0
    reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode);
1827
0
}
1828
1829
U_CAPI void U_EXPORT2
1830
umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end,
1831
0
                   uint32_t value, UErrorCode *pErrorCode) {
1832
0
    if (U_FAILURE(*pErrorCode)) {
1833
0
        return;
1834
0
    }
1835
0
    reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode);
1836
0
}
1837
1838
/* Compact and internally serialize the trie. */
1839
U_CAPI UCPTrie * U_EXPORT2
1840
umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
1841
0
                              UErrorCode *pErrorCode) {
1842
0
    if (U_FAILURE(*pErrorCode)) {
1843
0
        return nullptr;
1844
0
    }
1845
0
    return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode);
1846
0
}
1847
1848
#ifdef UCPTRIE_DEBUG
1849
U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) {
1850
    reinterpret_cast<MutableCodePointTrie *>(trie)->name = name;
1851
}
1852
#endif