/src/icu/source/common/umutablecptrie.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2017 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | // umutablecptrie.cpp (inspired by utrie2_builder.cpp)  | 
5  |  | // created: 2017dec29 Markus W. Scherer  | 
6  |  |  | 
7  |  | // #define UCPTRIE_DEBUG  | 
8  |  | #ifdef UCPTRIE_DEBUG  | 
9  |  | #   include <stdio.h>  | 
10  |  | #endif  | 
11  |  |  | 
12  |  | #include "unicode/utypes.h"  | 
13  |  | #include "unicode/ucptrie.h"  | 
14  |  | #include "unicode/umutablecptrie.h"  | 
15  |  | #include "unicode/uobject.h"  | 
16  |  | #include "unicode/utf16.h"  | 
17  |  | #include "cmemory.h"  | 
18  |  | #include "uassert.h"  | 
19  |  | #include "ucptrie_impl.h"  | 
20  |  |  | 
21  |  | // ICU-20235 In case Microsoft math.h has defined this, undefine it.  | 
22  |  | #ifdef OVERFLOW  | 
23  |  | #undef OVERFLOW  | 
24  |  | #endif  | 
25  |  |  | 
26  |  | U_NAMESPACE_BEGIN  | 
27  |  |  | 
28  |  | namespace { | 
29  |  |  | 
30  |  | constexpr int32_t MAX_UNICODE = 0x10ffff;  | 
31  |  |  | 
32  |  | constexpr int32_t UNICODE_LIMIT = 0x110000;  | 
33  |  | constexpr int32_t BMP_LIMIT = 0x10000;  | 
34  |  | constexpr int32_t ASCII_LIMIT = 0x80;  | 
35  |  |  | 
36  |  | constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3;  | 
37  |  | constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3;  | 
38  |  | constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3;  | 
39  |  |  | 
40  |  | constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3));  | 
41  |  |  | 
42  |  | // Flag values for data blocks.  | 
43  |  | constexpr uint8_t ALL_SAME = 0;  | 
44  |  | constexpr uint8_t MIXED = 1;  | 
45  |  | constexpr uint8_t SAME_AS = 2;  | 
46  |  |  | 
47  |  | /** Start with allocation of 16k data entries. */  | 
48  |  | constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14);  | 
49  |  |  | 
50  |  | /** Grow about 8x each time. */  | 
51  |  | constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17);  | 
52  |  |  | 
53  |  | /**  | 
54  |  |  * Maximum length of the build-time data array.  | 
55  |  |  * One entry per 0x110000 code points.  | 
56  |  |  */  | 
57  |  | constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT;  | 
58  |  |  | 
59  |  | // Flag values for index-3 blocks while compacting/building.  | 
60  |  | constexpr uint8_t I3_NULL = 0;  | 
61  |  | constexpr uint8_t I3_BMP = 1;  | 
62  |  | constexpr uint8_t I3_16 = 2;  | 
63  |  | constexpr uint8_t I3_18 = 3;  | 
64  |  |  | 
65  |  | constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;  | 
66  |  |  | 
67  |  | class AllSameBlocks;  | 
68  |  | class MixedBlocks;  | 
69  |  |  | 
70  |  | class MutableCodePointTrie : public UMemory { | 
71  |  | public:  | 
72  |  |     MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode);  | 
73  |  |     MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode);  | 
74  |  |     MutableCodePointTrie(const MutableCodePointTrie &other) = delete;  | 
75  |  |     ~MutableCodePointTrie();  | 
76  |  |  | 
77  |  |     MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;  | 
78  |  |  | 
79  |  |     static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);  | 
80  |  |     static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);  | 
81  |  |  | 
82  |  |     uint32_t get(UChar32 c) const;  | 
83  |  |     int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,  | 
84  |  |                      uint32_t *pValue) const;  | 
85  |  |  | 
86  |  |     void set(UChar32 c, uint32_t value, UErrorCode &errorCode);  | 
87  |  |     void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode);  | 
88  |  |  | 
89  |  |     UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode);  | 
90  |  |  | 
91  |  | private:  | 
92  |  |     void clear();  | 
93  |  |  | 
94  |  |     bool ensureHighStart(UChar32 c);  | 
95  |  |     int32_t allocDataBlock(int32_t blockLength);  | 
96  |  |     int32_t getDataBlock(int32_t i);  | 
97  |  |  | 
98  |  |     void maskValues(uint32_t mask);  | 
99  |  |     UChar32 findHighStart() const;  | 
100  |  |     int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);  | 
101  |  |     int32_t compactData(  | 
102  |  |             int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,  | 
103  |  |             int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode);  | 
104  |  |     int32_t compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks, UErrorCode &errorCode);  | 
105  |  |     int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);  | 
106  |  |  | 
107  |  |     uint32_t *index = nullptr;  | 
108  |  |     int32_t indexCapacity = 0;  | 
109  |  |     int32_t index3NullOffset = -1;  | 
110  |  |     uint32_t *data = nullptr;  | 
111  |  |     int32_t dataCapacity = 0;  | 
112  |  |     int32_t dataLength = 0;  | 
113  |  |     int32_t dataNullOffset = -1;  | 
114  |  |  | 
115  |  |     uint32_t origInitialValue;  | 
116  |  |     uint32_t initialValue;  | 
117  |  |     uint32_t errorValue;  | 
118  |  |     UChar32 highStart;  | 
119  |  |     uint32_t highValue;  | 
120  |  | #ifdef UCPTRIE_DEBUG  | 
121  |  | public:  | 
122  |  |     const char *name;  | 
123  |  | #endif  | 
124  |  | private:  | 
125  |  |     /** Temporary array while building the final data. */  | 
126  |  |     uint16_t *index16 = nullptr;  | 
127  |  |     uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3];  | 
128  |  | };  | 
129  |  |  | 
130  |  | MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) :  | 
131  | 0  |         origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue),  | 
132  | 0  |         highStart(0), highValue(initialValue)  | 
133  |  | #ifdef UCPTRIE_DEBUG  | 
134  |  |         , name("open") | 
135  |  | #endif  | 
136  | 0  |         { | 
137  | 0  |     if (U_FAILURE(errorCode)) { return; } | 
138  | 0  |     index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4);  | 
139  | 0  |     data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4);  | 
140  | 0  |     if (index == nullptr || data == nullptr) { | 
141  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
142  | 0  |         return;  | 
143  | 0  |     }  | 
144  | 0  |     indexCapacity = BMP_I_LIMIT;  | 
145  | 0  |     dataCapacity = INITIAL_DATA_LENGTH;  | 
146  | 0  | }  | 
147  |  |  | 
148  |  | MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) :  | 
149  | 0  |         index3NullOffset(other.index3NullOffset),  | 
150  | 0  |         dataNullOffset(other.dataNullOffset),  | 
151  | 0  |         origInitialValue(other.origInitialValue), initialValue(other.initialValue),  | 
152  | 0  |         errorValue(other.errorValue),  | 
153  | 0  |         highStart(other.highStart), highValue(other.highValue)  | 
154  |  | #ifdef UCPTRIE_DEBUG  | 
155  |  |         , name("mutable clone") | 
156  |  | #endif  | 
157  | 0  |         { | 
158  | 0  |     if (U_FAILURE(errorCode)) { return; } | 
159  | 0  |     int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;  | 
160  | 0  |     index = (uint32_t *)uprv_malloc(iCapacity * 4);  | 
161  | 0  |     data = (uint32_t *)uprv_malloc(other.dataCapacity * 4);  | 
162  | 0  |     if (index == nullptr || data == nullptr) { | 
163  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
164  | 0  |         return;  | 
165  | 0  |     }  | 
166  | 0  |     indexCapacity = iCapacity;  | 
167  | 0  |     dataCapacity = other.dataCapacity;  | 
168  |  | 
  | 
169  | 0  |     int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;  | 
170  | 0  |     uprv_memcpy(flags, other.flags, iLimit);  | 
171  | 0  |     uprv_memcpy(index, other.index, iLimit * 4);  | 
172  | 0  |     uprv_memcpy(data, other.data, (size_t)other.dataLength * 4);  | 
173  | 0  |     dataLength = other.dataLength;  | 
174  | 0  |     U_ASSERT(other.index16 == nullptr);  | 
175  | 0  | }  | 
176  |  |  | 
177  | 0  | MutableCodePointTrie::~MutableCodePointTrie() { | 
178  | 0  |     uprv_free(index);  | 
179  | 0  |     uprv_free(data);  | 
180  | 0  |     uprv_free(index16);  | 
181  | 0  | }  | 
182  |  |  | 
183  | 0  | MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) { | 
184  |  |     // Use the highValue as the initialValue to reduce the highStart.  | 
185  | 0  |     uint32_t errorValue = ucpmap_get(map, -1);  | 
186  | 0  |     uint32_t initialValue = ucpmap_get(map, 0x10ffff);  | 
187  | 0  |     LocalPointer<MutableCodePointTrie> mutableTrie(  | 
188  | 0  |         new MutableCodePointTrie(initialValue, errorValue, errorCode),  | 
189  | 0  |         errorCode);  | 
190  | 0  |     if (U_FAILURE(errorCode)) { | 
191  | 0  |         return nullptr;  | 
192  | 0  |     }  | 
193  | 0  |     UChar32 start = 0, end;  | 
194  | 0  |     uint32_t value;  | 
195  | 0  |     while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,  | 
196  | 0  |                                   nullptr, nullptr, &value)) >= 0) { | 
197  | 0  |         if (value != initialValue) { | 
198  | 0  |             if (start == end) { | 
199  | 0  |                 mutableTrie->set(start, value, errorCode);  | 
200  | 0  |             } else { | 
201  | 0  |                 mutableTrie->setRange(start, end, value, errorCode);  | 
202  | 0  |             }  | 
203  | 0  |         }  | 
204  | 0  |         start = end + 1;  | 
205  | 0  |     }  | 
206  | 0  |     if (U_SUCCESS(errorCode)) { | 
207  | 0  |         return mutableTrie.orphan();  | 
208  | 0  |     } else { | 
209  | 0  |         return nullptr;  | 
210  | 0  |     }  | 
211  | 0  | }  | 
212  |  |  | 
213  | 0  | MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) { | 
214  |  |     // Use the highValue as the initialValue to reduce the highStart.  | 
215  | 0  |     uint32_t errorValue;  | 
216  | 0  |     uint32_t initialValue;  | 
217  | 0  |     switch (trie->valueWidth) { | 
218  | 0  |     case UCPTRIE_VALUE_BITS_16:  | 
219  | 0  |         errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];  | 
220  | 0  |         initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];  | 
221  | 0  |         break;  | 
222  | 0  |     case UCPTRIE_VALUE_BITS_32:  | 
223  | 0  |         errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];  | 
224  | 0  |         initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];  | 
225  | 0  |         break;  | 
226  | 0  |     case UCPTRIE_VALUE_BITS_8:  | 
227  | 0  |         errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];  | 
228  | 0  |         initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];  | 
229  | 0  |         break;  | 
230  | 0  |     default:  | 
231  |  |         // Unreachable if the trie is properly initialized.  | 
232  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
233  | 0  |         return nullptr;  | 
234  | 0  |     }  | 
235  | 0  |     LocalPointer<MutableCodePointTrie> mutableTrie(  | 
236  | 0  |         new MutableCodePointTrie(initialValue, errorValue, errorCode),  | 
237  | 0  |         errorCode);  | 
238  | 0  |     if (U_FAILURE(errorCode)) { | 
239  | 0  |         return nullptr;  | 
240  | 0  |     }  | 
241  | 0  |     UChar32 start = 0, end;  | 
242  | 0  |     uint32_t value;  | 
243  | 0  |     while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,  | 
244  | 0  |                                    nullptr, nullptr, &value)) >= 0) { | 
245  | 0  |         if (value != initialValue) { | 
246  | 0  |             if (start == end) { | 
247  | 0  |                 mutableTrie->set(start, value, errorCode);  | 
248  | 0  |             } else { | 
249  | 0  |                 mutableTrie->setRange(start, end, value, errorCode);  | 
250  | 0  |             }  | 
251  | 0  |         }  | 
252  | 0  |         start = end + 1;  | 
253  | 0  |     }  | 
254  | 0  |     if (U_SUCCESS(errorCode)) { | 
255  | 0  |         return mutableTrie.orphan();  | 
256  | 0  |     } else { | 
257  | 0  |         return nullptr;  | 
258  | 0  |     }  | 
259  | 0  | }  | 
260  |  |  | 
261  | 0  | void MutableCodePointTrie::clear() { | 
262  | 0  |     index3NullOffset = dataNullOffset = -1;  | 
263  | 0  |     dataLength = 0;  | 
264  | 0  |     highValue = initialValue = origInitialValue;  | 
265  | 0  |     highStart = 0;  | 
266  | 0  |     uprv_free(index16);  | 
267  | 0  |     index16 = nullptr;  | 
268  | 0  | }  | 
269  |  |  | 
270  | 0  | uint32_t MutableCodePointTrie::get(UChar32 c) const { | 
271  | 0  |     if ((uint32_t)c > MAX_UNICODE) { | 
272  | 0  |         return errorValue;  | 
273  | 0  |     }  | 
274  | 0  |     if (c >= highStart) { | 
275  | 0  |         return highValue;  | 
276  | 0  |     }  | 
277  | 0  |     int32_t i = c >> UCPTRIE_SHIFT_3;  | 
278  | 0  |     if (flags[i] == ALL_SAME) { | 
279  | 0  |         return index[i];  | 
280  | 0  |     } else { | 
281  | 0  |         return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)];  | 
282  | 0  |     }  | 
283  | 0  | }  | 
284  |  |  | 
285  |  | inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,  | 
286  | 0  |                                  UCPMapValueFilter *filter, const void *context) { | 
287  | 0  |     if (value == initialValue) { | 
288  | 0  |         value = nullValue;  | 
289  | 0  |     } else if (filter != nullptr) { | 
290  | 0  |         value = filter(context, value);  | 
291  | 0  |     }  | 
292  | 0  |     return value;  | 
293  | 0  | }  | 
294  |  |  | 
295  |  | UChar32 MutableCodePointTrie::getRange(  | 
296  |  |         UChar32 start, UCPMapValueFilter *filter, const void *context,  | 
297  | 0  |         uint32_t *pValue) const { | 
298  | 0  |     if ((uint32_t)start > MAX_UNICODE) { | 
299  | 0  |         return U_SENTINEL;  | 
300  | 0  |     }  | 
301  | 0  |     if (start >= highStart) { | 
302  | 0  |         if (pValue != nullptr) { | 
303  | 0  |             uint32_t value = highValue;  | 
304  | 0  |             if (filter != nullptr) { value = filter(context, value); } | 
305  | 0  |             *pValue = value;  | 
306  | 0  |         }  | 
307  | 0  |         return MAX_UNICODE;  | 
308  | 0  |     }  | 
309  | 0  |     uint32_t nullValue = initialValue;  | 
310  | 0  |     if (filter != nullptr) { nullValue = filter(context, nullValue); } | 
311  | 0  |     UChar32 c = start;  | 
312  | 0  |     uint32_t trieValue, value;  | 
313  | 0  |     bool haveValue = false;  | 
314  | 0  |     int32_t i = c >> UCPTRIE_SHIFT_3;  | 
315  | 0  |     do { | 
316  | 0  |         if (flags[i] == ALL_SAME) { | 
317  | 0  |             uint32_t trieValue2 = index[i];  | 
318  | 0  |             if (haveValue) { | 
319  | 0  |                 if (trieValue2 != trieValue) { | 
320  | 0  |                     if (filter == nullptr ||  | 
321  | 0  |                             maybeFilterValue(trieValue2, initialValue, nullValue,  | 
322  | 0  |                                              filter, context) != value) { | 
323  | 0  |                         return c - 1;  | 
324  | 0  |                     }  | 
325  | 0  |                     trieValue = trieValue2;  // may or may not help  | 
326  | 0  |                 }  | 
327  | 0  |             } else { | 
328  | 0  |                 trieValue = trieValue2;  | 
329  | 0  |                 value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);  | 
330  | 0  |                 if (pValue != nullptr) { *pValue = value; } | 
331  | 0  |                 haveValue = true;  | 
332  | 0  |             }  | 
333  | 0  |             c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;  | 
334  | 0  |         } else /* MIXED */ { | 
335  | 0  |             int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);  | 
336  | 0  |             uint32_t trieValue2 = data[di];  | 
337  | 0  |             if (haveValue) { | 
338  | 0  |                 if (trieValue2 != trieValue) { | 
339  | 0  |                     if (filter == nullptr ||  | 
340  | 0  |                             maybeFilterValue(trieValue2, initialValue, nullValue,  | 
341  | 0  |                                              filter, context) != value) { | 
342  | 0  |                         return c - 1;  | 
343  | 0  |                     }  | 
344  | 0  |                     trieValue = trieValue2;  // may or may not help  | 
345  | 0  |                 }  | 
346  | 0  |             } else { | 
347  | 0  |                 trieValue = trieValue2;  | 
348  | 0  |                 value = maybeFilterValue(trieValue2, initialValue, nullValue, filter, context);  | 
349  | 0  |                 if (pValue != nullptr) { *pValue = value; } | 
350  | 0  |                 haveValue = true;  | 
351  | 0  |             }  | 
352  | 0  |             while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) { | 
353  | 0  |                 trieValue2 = data[++di];  | 
354  | 0  |                 if (trieValue2 != trieValue) { | 
355  | 0  |                     if (filter == nullptr ||  | 
356  | 0  |                             maybeFilterValue(trieValue2, initialValue, nullValue,  | 
357  | 0  |                                              filter, context) != value) { | 
358  | 0  |                         return c - 1;  | 
359  | 0  |                     }  | 
360  | 0  |                 }  | 
361  | 0  |                 trieValue = trieValue2;  // may or may not help  | 
362  | 0  |             }  | 
363  | 0  |         }  | 
364  | 0  |         ++i;  | 
365  | 0  |     } while (c < highStart);  | 
366  | 0  |     U_ASSERT(haveValue);  | 
367  | 0  |     if (maybeFilterValue(highValue, initialValue, nullValue,  | 
368  | 0  |                          filter, context) != value) { | 
369  | 0  |         return c - 1;  | 
370  | 0  |     } else { | 
371  | 0  |         return MAX_UNICODE;  | 
372  | 0  |     }  | 
373  | 0  | }  | 
374  |  |  | 
375  |  | void  | 
376  | 0  | writeBlock(uint32_t *block, uint32_t value) { | 
377  | 0  |     uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH;  | 
378  | 0  |     while (block < limit) { | 
379  | 0  |         *block++ = value;  | 
380  | 0  |     }  | 
381  | 0  | }  | 
382  |  |  | 
383  | 0  | bool MutableCodePointTrie::ensureHighStart(UChar32 c) { | 
384  | 0  |     if (c >= highStart) { | 
385  |  |         // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction.  | 
386  | 0  |         c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);  | 
387  | 0  |         int32_t i = highStart >> UCPTRIE_SHIFT_3;  | 
388  | 0  |         int32_t iLimit = c >> UCPTRIE_SHIFT_3;  | 
389  | 0  |         if (iLimit > indexCapacity) { | 
390  | 0  |             uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4);  | 
391  | 0  |             if (newIndex == nullptr) { return false; } | 
392  | 0  |             uprv_memcpy(newIndex, index, i * 4);  | 
393  | 0  |             uprv_free(index);  | 
394  | 0  |             index = newIndex;  | 
395  | 0  |             indexCapacity = I_LIMIT;  | 
396  | 0  |         }  | 
397  | 0  |         do { | 
398  | 0  |             flags[i] = ALL_SAME;  | 
399  | 0  |             index[i] = initialValue;  | 
400  | 0  |         } while(++i < iLimit);  | 
401  | 0  |         highStart = c;  | 
402  | 0  |     }  | 
403  | 0  |     return true;  | 
404  | 0  | }  | 
405  |  |  | 
406  | 0  | int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) { | 
407  | 0  |     int32_t newBlock = dataLength;  | 
408  | 0  |     int32_t newTop = newBlock + blockLength;  | 
409  | 0  |     if (newTop > dataCapacity) { | 
410  | 0  |         int32_t capacity;  | 
411  | 0  |         if (dataCapacity < MEDIUM_DATA_LENGTH) { | 
412  | 0  |             capacity = MEDIUM_DATA_LENGTH;  | 
413  | 0  |         } else if (dataCapacity < MAX_DATA_LENGTH) { | 
414  | 0  |             capacity = MAX_DATA_LENGTH;  | 
415  | 0  |         } else { | 
416  |  |             // Should never occur.  | 
417  |  |             // Either MAX_DATA_LENGTH is incorrect,  | 
418  |  |             // or the code writes more values than should be possible.  | 
419  | 0  |             return -1;  | 
420  | 0  |         }  | 
421  | 0  |         uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4);  | 
422  | 0  |         if (newData == nullptr) { | 
423  | 0  |             return -1;  | 
424  | 0  |         }  | 
425  | 0  |         uprv_memcpy(newData, data, (size_t)dataLength * 4);  | 
426  | 0  |         uprv_free(data);  | 
427  | 0  |         data = newData;  | 
428  | 0  |         dataCapacity = capacity;  | 
429  | 0  |     }  | 
430  | 0  |     dataLength = newTop;  | 
431  | 0  |     return newBlock;  | 
432  | 0  | }  | 
433  |  |  | 
434  |  | /**  | 
435  |  |  * No error checking for illegal arguments.  | 
436  |  |  *  | 
437  |  |  * @return -1 if no new data block available (out of memory in data array)  | 
438  |  |  * @internal  | 
439  |  |  */  | 
440  | 0  | int32_t MutableCodePointTrie::getDataBlock(int32_t i) { | 
441  | 0  |     if (flags[i] == MIXED) { | 
442  | 0  |         return index[i];  | 
443  | 0  |     }  | 
444  | 0  |     if (i < BMP_I_LIMIT) { | 
445  | 0  |         int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH);  | 
446  | 0  |         if (newBlock < 0) { return newBlock; } | 
447  | 0  |         int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);  | 
448  | 0  |         int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;  | 
449  | 0  |         do { | 
450  | 0  |             U_ASSERT(flags[iStart] == ALL_SAME);  | 
451  | 0  |             writeBlock(data + newBlock, index[iStart]);  | 
452  | 0  |             flags[iStart] = MIXED;  | 
453  | 0  |             index[iStart++] = newBlock;  | 
454  | 0  |             newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;  | 
455  | 0  |         } while (iStart < iLimit);  | 
456  | 0  |         return index[i];  | 
457  | 0  |     } else { | 
458  | 0  |         int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH);  | 
459  | 0  |         if (newBlock < 0) { return newBlock; } | 
460  | 0  |         writeBlock(data + newBlock, index[i]);  | 
461  | 0  |         flags[i] = MIXED;  | 
462  | 0  |         index[i] = newBlock;  | 
463  | 0  |         return newBlock;  | 
464  | 0  |     }  | 
465  | 0  | }  | 
466  |  |  | 
467  | 0  | void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) { | 
468  | 0  |     if (U_FAILURE(errorCode)) { | 
469  | 0  |         return;  | 
470  | 0  |     }  | 
471  | 0  |     if ((uint32_t)c > MAX_UNICODE) { | 
472  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
473  | 0  |         return;  | 
474  | 0  |     }  | 
475  |  |  | 
476  | 0  |     int32_t block;  | 
477  | 0  |     if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) { | 
478  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
479  | 0  |         return;  | 
480  | 0  |     }  | 
481  |  |  | 
482  | 0  |     data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value;  | 
483  | 0  | }  | 
484  |  |  | 
485  |  | void  | 
486  | 0  | fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) { | 
487  | 0  |     uint32_t *pLimit = block + limit;  | 
488  | 0  |     block += start;  | 
489  | 0  |     while (block < pLimit) { | 
490  | 0  |         *block++ = value;  | 
491  | 0  |     }  | 
492  | 0  | }  | 
493  |  |  | 
494  | 0  | void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) { | 
495  | 0  |     if (U_FAILURE(errorCode)) { | 
496  | 0  |         return;  | 
497  | 0  |     }  | 
498  | 0  |     if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) { | 
499  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
500  | 0  |         return;  | 
501  | 0  |     }  | 
502  | 0  |     if (!ensureHighStart(end)) { | 
503  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
504  | 0  |         return;  | 
505  | 0  |     }  | 
506  |  |  | 
507  | 0  |     UChar32 limit = end + 1;  | 
508  | 0  |     if (start & UCPTRIE_SMALL_DATA_MASK) { | 
509  |  |         // Set partial block at [start..following block boundary[.  | 
510  | 0  |         int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);  | 
511  | 0  |         if (block < 0) { | 
512  | 0  |             errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
513  | 0  |             return;  | 
514  | 0  |         }  | 
515  |  |  | 
516  | 0  |         UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK;  | 
517  | 0  |         if (nextStart <= limit) { | 
518  | 0  |             fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH,  | 
519  | 0  |                       value);  | 
520  | 0  |             start = nextStart;  | 
521  | 0  |         } else { | 
522  | 0  |             fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK,  | 
523  | 0  |                       value);  | 
524  | 0  |             return;  | 
525  | 0  |         }  | 
526  | 0  |     }  | 
527  |  |  | 
528  |  |     // Number of positions in the last, partial block.  | 
529  | 0  |     int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK;  | 
530  |  |  | 
531  |  |     // Round down limit to a block boundary.  | 
532  | 0  |     limit &= ~UCPTRIE_SMALL_DATA_MASK;  | 
533  |  |  | 
534  |  |     // Iterate over all-value blocks.  | 
535  | 0  |     while (start < limit) { | 
536  | 0  |         int32_t i = start >> UCPTRIE_SHIFT_3;  | 
537  | 0  |         if (flags[i] == ALL_SAME) { | 
538  | 0  |             index[i] = value;  | 
539  | 0  |         } else /* MIXED */ { | 
540  | 0  |             fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value);  | 
541  | 0  |         }  | 
542  | 0  |         start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;  | 
543  | 0  |     }  | 
544  |  | 
  | 
545  | 0  |     if (rest > 0) { | 
546  |  |         // Set partial block at [last block boundary..limit[.  | 
547  | 0  |         int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);  | 
548  | 0  |         if (block < 0) { | 
549  | 0  |             errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
550  | 0  |             return;  | 
551  | 0  |         }  | 
552  |  |  | 
553  | 0  |         fillBlock(data + block, 0, rest, value);  | 
554  | 0  |     }  | 
555  | 0  | }  | 
556  |  |  | 
557  |  | /* compaction --------------------------------------------------------------- */  | 
558  |  |  | 
559  | 0  | void MutableCodePointTrie::maskValues(uint32_t mask) { | 
560  | 0  |     initialValue &= mask;  | 
561  | 0  |     errorValue &= mask;  | 
562  | 0  |     highValue &= mask;  | 
563  | 0  |     int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;  | 
564  | 0  |     for (int32_t i = 0; i < iLimit; ++i) { | 
565  | 0  |         if (flags[i] == ALL_SAME) { | 
566  | 0  |             index[i] &= mask;  | 
567  | 0  |         }  | 
568  | 0  |     }  | 
569  | 0  |     for (int32_t i = 0; i < dataLength; ++i) { | 
570  | 0  |         data[i] &= mask;  | 
571  | 0  |     }  | 
572  | 0  | }  | 
573  |  |  | 
574  |  | template<typename UIntA, typename UIntB>  | 
575  | 0  | bool equalBlocks(const UIntA *s, const UIntB *t, int32_t length) { | 
576  | 0  |     while (length > 0 && *s == *t) { | 
577  | 0  |         ++s;  | 
578  | 0  |         ++t;  | 
579  | 0  |         --length;  | 
580  | 0  |     }  | 
581  | 0  |     return length == 0;  | 
582  | 0  | } Unexecuted instantiation: umutablecptrie.cpp:bool icu_70::(anonymous namespace)::equalBlocks<unsigned int, unsigned int>(unsigned int const*, unsigned int const*, int) Unexecuted instantiation: umutablecptrie.cpp:bool icu_70::(anonymous namespace)::equalBlocks<unsigned short, unsigned short>(unsigned short const*, unsigned short const*, int) Unexecuted instantiation: umutablecptrie.cpp:bool icu_70::(anonymous namespace)::equalBlocks<unsigned short, unsigned int>(unsigned short const*, unsigned int const*, int)  | 
583  |  |  | 
584  | 0  | bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) { | 
585  | 0  |     const uint32_t *pLimit = p + length;  | 
586  | 0  |     while (p < pLimit && *p == value) { ++p; } | 
587  | 0  |     return p == pLimit;  | 
588  | 0  | }  | 
589  |  |  | 
590  |  | /** Search for an identical block. */  | 
591  |  | int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,  | 
592  | 0  |                       const uint16_t *q, int32_t qStart, int32_t blockLength) { | 
593  |  |     // Ensure that we do not even partially get past length.  | 
594  | 0  |     length -= blockLength;  | 
595  |  | 
  | 
596  | 0  |     q += qStart;  | 
597  | 0  |     while (pStart <= length) { | 
598  | 0  |         if (equalBlocks(p + pStart, q, blockLength)) { | 
599  | 0  |             return pStart;  | 
600  | 0  |         }  | 
601  | 0  |         ++pStart;  | 
602  | 0  |     }  | 
603  | 0  |     return -1;  | 
604  | 0  | }  | 
605  |  |  | 
606  |  | int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,  | 
607  | 0  |                          uint32_t value, int32_t blockLength) { | 
608  |  |     // Ensure that we do not even partially get past limit.  | 
609  | 0  |     limit -= blockLength;  | 
610  |  | 
  | 
611  | 0  |     for (int32_t block = start; block <= limit; ++block) { | 
612  | 0  |         if (p[block] == value) { | 
613  | 0  |             for (int32_t i = 1;; ++i) { | 
614  | 0  |                 if (i == blockLength) { | 
615  | 0  |                     return block;  | 
616  | 0  |                 }  | 
617  | 0  |                 if (p[block + i] != value) { | 
618  | 0  |                     block += i;  | 
619  | 0  |                     break;  | 
620  | 0  |                 }  | 
621  | 0  |             }  | 
622  | 0  |         }  | 
623  | 0  |     }  | 
624  | 0  |     return -1;  | 
625  | 0  | }  | 
626  |  |  | 
627  |  | /**  | 
628  |  |  * Look for maximum overlap of the beginning of the other block  | 
629  |  |  * with the previous, adjacent block.  | 
630  |  |  */  | 
631  |  | template<typename UIntA, typename UIntB>  | 
632  |  | int32_t getOverlap(const UIntA *p, int32_t length,  | 
633  | 0  |                    const UIntB *q, int32_t qStart, int32_t blockLength) { | 
634  | 0  |     int32_t overlap = blockLength - 1;  | 
635  | 0  |     U_ASSERT(overlap <= length);  | 
636  | 0  |     q += qStart;  | 
637  | 0  |     while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) { | 
638  | 0  |         --overlap;  | 
639  | 0  |     }  | 
640  | 0  |     return overlap;  | 
641  | 0  | } Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::getOverlap<unsigned int, unsigned int>(unsigned int const*, int, unsigned int const*, int, int) Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::getOverlap<unsigned short, unsigned int>(unsigned short const*, int, unsigned int const*, int, int) Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::getOverlap<unsigned short, unsigned short>(unsigned short const*, int, unsigned short const*, int, int)  | 
642  |  |  | 
643  |  | int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,  | 
644  | 0  |                           int32_t blockLength) { | 
645  | 0  |     int32_t min = length - (blockLength - 1);  | 
646  | 0  |     int32_t i = length;  | 
647  | 0  |     while (min < i && p[i - 1] == value) { --i; } | 
648  | 0  |     return length - i;  | 
649  | 0  | }  | 
650  |  |  | 
651  | 0  | bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) { | 
652  | 0  |     for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { | 
653  | 0  |         if (index[i] == dataOffset) { | 
654  | 0  |             return true;  | 
655  | 0  |         }  | 
656  | 0  |     }  | 
657  | 0  |     return false;  | 
658  | 0  | }  | 
659  |  |  | 
660  |  | /**  | 
661  |  |  * Finds the start of the last range in the trie by enumerating backward.  | 
662  |  |  * Indexes for code points higher than this will be omitted.  | 
663  |  |  */  | 
664  | 0  | UChar32 MutableCodePointTrie::findHighStart() const { | 
665  | 0  |     int32_t i = highStart >> UCPTRIE_SHIFT_3;  | 
666  | 0  |     while (i > 0) { | 
667  | 0  |         bool match;  | 
668  | 0  |         if (flags[--i] == ALL_SAME) { | 
669  | 0  |             match = index[i] == highValue;  | 
670  | 0  |         } else /* MIXED */ { | 
671  | 0  |             const uint32_t *p = data + index[i];  | 
672  | 0  |             for (int32_t j = 0;; ++j) { | 
673  | 0  |                 if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) { | 
674  | 0  |                     match = true;  | 
675  | 0  |                     break;  | 
676  | 0  |                 }  | 
677  | 0  |                 if (p[j] != highValue) { | 
678  | 0  |                     match = false;  | 
679  | 0  |                     break;  | 
680  | 0  |                 }  | 
681  | 0  |             }  | 
682  | 0  |         }  | 
683  | 0  |         if (!match) { | 
684  | 0  |             return (i + 1) << UCPTRIE_SHIFT_3;  | 
685  | 0  |         }  | 
686  | 0  |     }  | 
687  | 0  |     return 0;  | 
688  | 0  | }  | 
689  |  |  | 
690  |  | class AllSameBlocks { | 
691  |  | public:  | 
692  |  |     static constexpr int32_t NEW_UNIQUE = -1;  | 
693  |  |     static constexpr int32_t OVERFLOW = -2;  | 
694  |  |  | 
695  | 0  |     AllSameBlocks() : length(0), mostRecent(-1) {} | 
696  |  |  | 
697  | 0  |     int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) { | 
698  | 0  |         if (mostRecent >= 0 && values[mostRecent] == value) { | 
699  | 0  |             refCounts[mostRecent] += count;  | 
700  | 0  |             return indexes[mostRecent];  | 
701  | 0  |         }  | 
702  | 0  |         for (int32_t i = 0; i < length; ++i) { | 
703  | 0  |             if (values[i] == value) { | 
704  | 0  |                 mostRecent = i;  | 
705  | 0  |                 refCounts[i] += count;  | 
706  | 0  |                 return indexes[i];  | 
707  | 0  |             }  | 
708  | 0  |         }  | 
709  | 0  |         if (length == CAPACITY) { | 
710  | 0  |             return OVERFLOW;  | 
711  | 0  |         }  | 
712  | 0  |         mostRecent = length;  | 
713  | 0  |         indexes[length] = index;  | 
714  | 0  |         values[length] = value;  | 
715  | 0  |         refCounts[length++] = count;  | 
716  | 0  |         return NEW_UNIQUE;  | 
717  | 0  |     }  | 
718  |  |  | 
719  |  |     /** Replaces the block which has the lowest reference count. */  | 
720  | 0  |     void add(int32_t index, int32_t count, uint32_t value) { | 
721  | 0  |         U_ASSERT(length == CAPACITY);  | 
722  | 0  |         int32_t least = -1;  | 
723  | 0  |         int32_t leastCount = I_LIMIT;  | 
724  | 0  |         for (int32_t i = 0; i < length; ++i) { | 
725  | 0  |             U_ASSERT(values[i] != value);  | 
726  | 0  |             if (refCounts[i] < leastCount) { | 
727  | 0  |                 least = i;  | 
728  | 0  |                 leastCount = refCounts[i];  | 
729  | 0  |             }  | 
730  | 0  |         }  | 
731  | 0  |         U_ASSERT(least >= 0);  | 
732  | 0  |         mostRecent = least;  | 
733  | 0  |         indexes[least] = index;  | 
734  | 0  |         values[least] = value;  | 
735  | 0  |         refCounts[least] = count;  | 
736  | 0  |     }  | 
737  |  |  | 
738  | 0  |     int32_t findMostUsed() const { | 
739  | 0  |         if (length == 0) { return -1; } | 
740  | 0  |         int32_t max = -1;  | 
741  | 0  |         int32_t maxCount = 0;  | 
742  | 0  |         for (int32_t i = 0; i < length; ++i) { | 
743  | 0  |             if (refCounts[i] > maxCount) { | 
744  | 0  |                 max = i;  | 
745  | 0  |                 maxCount = refCounts[i];  | 
746  | 0  |             }  | 
747  | 0  |         }  | 
748  | 0  |         return indexes[max];  | 
749  | 0  |     }  | 
750  |  |  | 
751  |  | private:  | 
752  |  |     static constexpr int32_t CAPACITY = 32;  | 
753  |  |  | 
754  |  |     int32_t length;  | 
755  |  |     int32_t mostRecent;  | 
756  |  |  | 
757  |  |     int32_t indexes[CAPACITY];  | 
758  |  |     uint32_t values[CAPACITY];  | 
759  |  |     int32_t refCounts[CAPACITY];  | 
760  |  | };  | 
761  |  |  | 
762  |  | // Custom hash table for mixed-value blocks to be found anywhere in the  | 
763  |  | // compacted data or index so far.  | 
764  |  | class MixedBlocks { | 
765  |  | public:  | 
766  | 0  |     MixedBlocks() {} | 
767  | 0  |     ~MixedBlocks() { | 
768  | 0  |         uprv_free(table);  | 
769  | 0  |     }  | 
770  |  |  | 
771  | 0  |     bool init(int32_t maxLength, int32_t newBlockLength) { | 
772  |  |         // We store actual data indexes + 1 to reserve 0 for empty entries.  | 
773  | 0  |         int32_t maxDataIndex = maxLength - newBlockLength + 1;  | 
774  | 0  |         int32_t newLength;  | 
775  | 0  |         if (maxDataIndex <= 0xfff) {  // 4k | 
776  | 0  |             newLength = 6007;  | 
777  | 0  |             shift = 12;  | 
778  | 0  |             mask = 0xfff;  | 
779  | 0  |         } else if (maxDataIndex <= 0x7fff) {  // 32k | 
780  | 0  |             newLength = 50021;  | 
781  | 0  |             shift = 15;  | 
782  | 0  |             mask = 0x7fff;  | 
783  | 0  |         } else if (maxDataIndex <= 0x1ffff) {  // 128k | 
784  | 0  |             newLength = 200003;  | 
785  | 0  |             shift = 17;  | 
786  | 0  |             mask = 0x1ffff;  | 
787  | 0  |         } else { | 
788  |  |             // maxDataIndex up to around MAX_DATA_LENGTH, ca. 1.1M  | 
789  | 0  |             newLength = 1500007;  | 
790  | 0  |             shift = 21;  | 
791  | 0  |             mask = 0x1fffff;  | 
792  | 0  |         }  | 
793  | 0  |         if (newLength > capacity) { | 
794  | 0  |             uprv_free(table);  | 
795  | 0  |             table = (uint32_t *)uprv_malloc(newLength * 4);  | 
796  | 0  |             if (table == nullptr) { | 
797  | 0  |                 return false;  | 
798  | 0  |             }  | 
799  | 0  |             capacity = newLength;  | 
800  | 0  |         }  | 
801  | 0  |         length = newLength;  | 
802  | 0  |         uprv_memset(table, 0, length * 4);  | 
803  |  | 
  | 
804  | 0  |         blockLength = newBlockLength;  | 
805  | 0  |         return true;  | 
806  | 0  |     }  | 
807  |  |  | 
808  |  |     template<typename UInt>  | 
809  | 0  |     void extend(const UInt *data, int32_t minStart, int32_t prevDataLength, int32_t newDataLength) { | 
810  | 0  |         int32_t start = prevDataLength - blockLength;  | 
811  | 0  |         if (start >= minStart) { | 
812  | 0  |             ++start;  // Skip the last block that we added last time.  | 
813  | 0  |         } else { | 
814  | 0  |             start = minStart;  // Begin with the first full block.  | 
815  | 0  |         }  | 
816  | 0  |         for (int32_t end = newDataLength - blockLength; start <= end; ++start) { | 
817  | 0  |             uint32_t hashCode = makeHashCode(data, start);  | 
818  | 0  |             addEntry(data, start, hashCode, start);  | 
819  | 0  |         }  | 
820  | 0  |     } Unexecuted instantiation: umutablecptrie.cpp:void icu_70::(anonymous namespace)::MixedBlocks::extend<unsigned int>(unsigned int const*, int, int, int) Unexecuted instantiation: umutablecptrie.cpp:void icu_70::(anonymous namespace)::MixedBlocks::extend<unsigned short>(unsigned short const*, int, int, int)  | 
821  |  |  | 
822  |  |     template<typename UIntA, typename UIntB>  | 
823  | 0  |     int32_t findBlock(const UIntA *data, const UIntB *blockData, int32_t blockStart) const { | 
824  | 0  |         uint32_t hashCode = makeHashCode(blockData, blockStart);  | 
825  | 0  |         int32_t entryIndex = findEntry(data, blockData, blockStart, hashCode);  | 
826  | 0  |         if (entryIndex >= 0) { | 
827  | 0  |             return (table[entryIndex] & mask) - 1;  | 
828  | 0  |         } else { | 
829  | 0  |             return -1;  | 
830  | 0  |         }  | 
831  | 0  |     } Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findBlock<unsigned int, unsigned int>(unsigned int const*, unsigned int const*, int) const Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findBlock<unsigned short, unsigned int>(unsigned short const*, unsigned int const*, int) const Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findBlock<unsigned short, unsigned short>(unsigned short const*, unsigned short const*, int) const  | 
832  |  |  | 
833  | 0  |     int32_t findAllSameBlock(const uint32_t *data, uint32_t blockValue) const { | 
834  | 0  |         uint32_t hashCode = makeHashCode(blockValue);  | 
835  | 0  |         int32_t entryIndex = findEntry(data, blockValue, hashCode);  | 
836  | 0  |         if (entryIndex >= 0) { | 
837  | 0  |             return (table[entryIndex] & mask) - 1;  | 
838  | 0  |         } else { | 
839  | 0  |             return -1;  | 
840  | 0  |         }  | 
841  | 0  |     }  | 
842  |  |  | 
843  |  | private:  | 
844  |  |     template<typename UInt>  | 
845  | 0  |     uint32_t makeHashCode(const UInt *blockData, int32_t blockStart) const { | 
846  | 0  |         int32_t blockLimit = blockStart + blockLength;  | 
847  | 0  |         uint32_t hashCode = blockData[blockStart++];  | 
848  | 0  |         do { | 
849  | 0  |             hashCode = 37 * hashCode + blockData[blockStart++];  | 
850  | 0  |         } while (blockStart < blockLimit);  | 
851  | 0  |         return hashCode;  | 
852  | 0  |     } Unexecuted instantiation: umutablecptrie.cpp:unsigned int icu_70::(anonymous namespace)::MixedBlocks::makeHashCode<unsigned int>(unsigned int const*, int) const Unexecuted instantiation: umutablecptrie.cpp:unsigned int icu_70::(anonymous namespace)::MixedBlocks::makeHashCode<unsigned short>(unsigned short const*, int) const  | 
853  |  |  | 
854  | 0  |     uint32_t makeHashCode(uint32_t blockValue) const { | 
855  | 0  |         uint32_t hashCode = blockValue;  | 
856  | 0  |         for (int32_t i = 1; i < blockLength; ++i) { | 
857  | 0  |             hashCode = 37 * hashCode + blockValue;  | 
858  | 0  |         }  | 
859  | 0  |         return hashCode;  | 
860  | 0  |     }  | 
861  |  |  | 
862  |  |     template<typename UInt>  | 
863  | 0  |     void addEntry(const UInt *data, int32_t blockStart, uint32_t hashCode, int32_t dataIndex) { | 
864  | 0  |         U_ASSERT(0 <= dataIndex && dataIndex < (int32_t)mask);  | 
865  | 0  |         int32_t entryIndex = findEntry(data, data, blockStart, hashCode);  | 
866  | 0  |         if (entryIndex < 0) { | 
867  | 0  |             table[~entryIndex] = (hashCode << shift) | (dataIndex + 1);  | 
868  | 0  |         }  | 
869  | 0  |     } Unexecuted instantiation: umutablecptrie.cpp:void icu_70::(anonymous namespace)::MixedBlocks::addEntry<unsigned int>(unsigned int const*, int, unsigned int, int) Unexecuted instantiation: umutablecptrie.cpp:void icu_70::(anonymous namespace)::MixedBlocks::addEntry<unsigned short>(unsigned short const*, int, unsigned int, int)  | 
870  |  |  | 
871  |  |     template<typename UIntA, typename UIntB>  | 
872  |  |     int32_t findEntry(const UIntA *data, const UIntB *blockData, int32_t blockStart,  | 
873  | 0  |                       uint32_t hashCode) const { | 
874  | 0  |         uint32_t shiftedHashCode = hashCode << shift;  | 
875  | 0  |         int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1  | 
876  | 0  |         for (int32_t entryIndex = initialEntryIndex;;) { | 
877  | 0  |             uint32_t entry = table[entryIndex];  | 
878  | 0  |             if (entry == 0) { | 
879  | 0  |                 return ~entryIndex;  | 
880  | 0  |             }  | 
881  | 0  |             if ((entry & ~mask) == shiftedHashCode) { | 
882  | 0  |                 int32_t dataIndex = (entry & mask) - 1;  | 
883  | 0  |                 if (equalBlocks(data + dataIndex, blockData + blockStart, blockLength)) { | 
884  | 0  |                     return entryIndex;  | 
885  | 0  |                 }  | 
886  | 0  |             }  | 
887  | 0  |             entryIndex = nextIndex(initialEntryIndex, entryIndex);  | 
888  | 0  |         }  | 
889  | 0  |     } Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findEntry<unsigned int, unsigned int>(unsigned int const*, unsigned int const*, int, unsigned int) const Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findEntry<unsigned short, unsigned short>(unsigned short const*, unsigned short const*, int, unsigned int) const Unexecuted instantiation: umutablecptrie.cpp:int icu_70::(anonymous namespace)::MixedBlocks::findEntry<unsigned short, unsigned int>(unsigned short const*, unsigned int const*, int, unsigned int) const  | 
890  |  |  | 
891  | 0  |     int32_t findEntry(const uint32_t *data, uint32_t blockValue, uint32_t hashCode) const { | 
892  | 0  |         uint32_t shiftedHashCode = hashCode << shift;  | 
893  | 0  |         int32_t initialEntryIndex = (hashCode % (length - 1)) + 1;  // 1..length-1  | 
894  | 0  |         for (int32_t entryIndex = initialEntryIndex;;) { | 
895  | 0  |             uint32_t entry = table[entryIndex];  | 
896  | 0  |             if (entry == 0) { | 
897  | 0  |                 return ~entryIndex;  | 
898  | 0  |             }  | 
899  | 0  |             if ((entry & ~mask) == shiftedHashCode) { | 
900  | 0  |                 int32_t dataIndex = (entry & mask) - 1;  | 
901  | 0  |                 if (allValuesSameAs(data + dataIndex, blockLength, blockValue)) { | 
902  | 0  |                     return entryIndex;  | 
903  | 0  |                 }  | 
904  | 0  |             }  | 
905  | 0  |             entryIndex = nextIndex(initialEntryIndex, entryIndex);  | 
906  | 0  |         }  | 
907  | 0  |     }  | 
908  |  |  | 
909  | 0  |     inline int32_t nextIndex(int32_t initialEntryIndex, int32_t entryIndex) const { | 
910  |  |         // U_ASSERT(0 < initialEntryIndex && initialEntryIndex < length);  | 
911  | 0  |         return (entryIndex + initialEntryIndex) % length;  | 
912  | 0  |     }  | 
913  |  |  | 
914  |  |     // Hash table.  | 
915  |  |     // The length is a prime number, larger than the maximum data length.  | 
916  |  |     // The "shift" lower bits store a data index + 1.  | 
917  |  |     // The remaining upper bits store a partial hashCode of the block data values.  | 
918  |  |     uint32_t *table = nullptr;  | 
919  |  |     int32_t capacity = 0;  | 
920  |  |     int32_t length = 0;  | 
921  |  |     int32_t shift = 0;  | 
922  |  |     uint32_t mask = 0;  | 
923  |  |  | 
924  |  |     int32_t blockLength = 0;  | 
925  |  | };  | 
926  |  |  | 
927  | 0  | int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) { | 
928  |  | #ifdef UCPTRIE_DEBUG  | 
929  |  |     bool overflow = false;  | 
930  |  | #endif  | 
931  |  |  | 
932  |  |     // ASCII data will be stored as a linear table, even if the following code  | 
933  |  |     // does not yet count it that way.  | 
934  | 0  |     int32_t newDataCapacity = ASCII_LIMIT;  | 
935  |  |     // Add room for a small data null block in case it would match the start of  | 
936  |  |     // a fast data block where dataNullOffset must not be set in that case.  | 
937  | 0  |     newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;  | 
938  |  |     // Add room for special values (errorValue, highValue) and padding.  | 
939  | 0  |     newDataCapacity += 4;  | 
940  | 0  |     int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;  | 
941  | 0  |     int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;  | 
942  | 0  |     int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;  | 
943  | 0  |     for (int32_t i = 0; i < iLimit; i += inc) { | 
944  | 0  |         if (i == fastILimit) { | 
945  | 0  |             blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;  | 
946  | 0  |             inc = 1;  | 
947  | 0  |         }  | 
948  | 0  |         uint32_t value = index[i];  | 
949  | 0  |         if (flags[i] == MIXED) { | 
950  |  |             // Really mixed?  | 
951  | 0  |             const uint32_t *p = data + value;  | 
952  | 0  |             value = *p;  | 
953  | 0  |             if (allValuesSameAs(p + 1, blockLength - 1, value)) { | 
954  | 0  |                 flags[i] = ALL_SAME;  | 
955  | 0  |                 index[i] = value;  | 
956  |  |                 // Fall through to ALL_SAME handling.  | 
957  | 0  |             } else { | 
958  | 0  |                 newDataCapacity += blockLength;  | 
959  | 0  |                 continue;  | 
960  | 0  |             }  | 
961  | 0  |         } else { | 
962  | 0  |             U_ASSERT(flags[i] == ALL_SAME);  | 
963  | 0  |             if (inc > 1) { | 
964  |  |                 // Do all of the fast-range data block's ALL_SAME parts have the same value?  | 
965  | 0  |                 bool allSame = true;  | 
966  | 0  |                 int32_t next_i = i + inc;  | 
967  | 0  |                 for (int32_t j = i + 1; j < next_i; ++j) { | 
968  | 0  |                     U_ASSERT(flags[j] == ALL_SAME);  | 
969  | 0  |                     if (index[j] != value) { | 
970  | 0  |                         allSame = false;  | 
971  | 0  |                         break;  | 
972  | 0  |                     }  | 
973  | 0  |                 }  | 
974  | 0  |                 if (!allSame) { | 
975  |  |                     // Turn it into a MIXED block.  | 
976  | 0  |                     if (getDataBlock(i) < 0) { | 
977  | 0  |                         return -1;  | 
978  | 0  |                     }  | 
979  | 0  |                     newDataCapacity += blockLength;  | 
980  | 0  |                     continue;  | 
981  | 0  |                 }  | 
982  | 0  |             }  | 
983  | 0  |         }  | 
984  |  |         // Is there another ALL_SAME block with the same value?  | 
985  | 0  |         int32_t other = allSameBlocks.findOrAdd(i, inc, value);  | 
986  | 0  |         if (other == AllSameBlocks::OVERFLOW) { | 
987  |  |             // The fixed-size array overflowed. Slow check for a duplicate block.  | 
988  |  | #ifdef UCPTRIE_DEBUG  | 
989  |  |             if (!overflow) { | 
990  |  |                 puts("UCPTrie AllSameBlocks overflow"); | 
991  |  |                 overflow = true;  | 
992  |  |             }  | 
993  |  | #endif  | 
994  | 0  |             int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;  | 
995  | 0  |             for (int32_t j = 0;; j += jInc) { | 
996  | 0  |                 if (j == i) { | 
997  | 0  |                     allSameBlocks.add(i, inc, value);  | 
998  | 0  |                     break;  | 
999  | 0  |                 }  | 
1000  | 0  |                 if (j == fastILimit) { | 
1001  | 0  |                     jInc = 1;  | 
1002  | 0  |                 }  | 
1003  | 0  |                 if (flags[j] == ALL_SAME && index[j] == value) { | 
1004  | 0  |                     allSameBlocks.add(j, jInc + inc, value);  | 
1005  | 0  |                     other = j;  | 
1006  | 0  |                     break;  | 
1007  |  |                     // We could keep counting blocks with the same value  | 
1008  |  |                     // before we add the first one, which may improve compaction in rare cases,  | 
1009  |  |                     // but it would make it slower.  | 
1010  | 0  |                 }  | 
1011  | 0  |             }  | 
1012  | 0  |         }  | 
1013  | 0  |         if (other >= 0) { | 
1014  | 0  |             flags[i] = SAME_AS;  | 
1015  | 0  |             index[i] = other;  | 
1016  | 0  |         } else { | 
1017  |  |             // New unique same-value block.  | 
1018  | 0  |             newDataCapacity += blockLength;  | 
1019  | 0  |         }  | 
1020  | 0  |     }  | 
1021  | 0  |     return newDataCapacity;  | 
1022  | 0  | }  | 
1023  |  |  | 
1024  |  | #ifdef UCPTRIE_DEBUG  | 
1025  |  | #   define DEBUG_DO(expr) expr  | 
1026  |  | #else  | 
1027  |  | #   define DEBUG_DO(expr)  | 
1028  |  | #endif  | 
1029  |  |  | 
1030  |  | #ifdef UCPTRIE_DEBUG  | 
1031  |  | // Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF  | 
1032  |  | int32_t appendValue(char s[], int32_t length, uint32_t value) { | 
1033  |  |     value ^= value >> 16;  | 
1034  |  |     value ^= value >> 8;  | 
1035  |  |     s[length] = 0xE2;  | 
1036  |  |     s[length + 1] = (char)(0xA0 + ((value >> 6) & 3));  | 
1037  |  |     s[length + 2] = (char)(0x80 + (value & 0x3F));  | 
1038  |  |     return length + 3;  | 
1039  |  | }  | 
1040  |  |  | 
1041  |  | void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,  | 
1042  |  |                 UChar32 start, int32_t overlap, uint32_t initialValue) { | 
1043  |  |     char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3];  | 
1044  |  |     int32_t length = 0;  | 
1045  |  |     int32_t i;  | 
1046  |  |     for (i = 0; i < overlap; ++i) { | 
1047  |  |         length = appendValue(s, length, 0);  // Braille blank  | 
1048  |  |     }  | 
1049  |  |     s[length++] = '|';  | 
1050  |  |     for (; i < blockLength; ++i) { | 
1051  |  |         if (block != nullptr) { | 
1052  |  |             value = block[i];  | 
1053  |  |         }  | 
1054  |  |         if (value == initialValue) { | 
1055  |  |             value = 0x40;  // Braille lower left dot  | 
1056  |  |         }  | 
1057  |  |         length = appendValue(s, length, value);  | 
1058  |  |     }  | 
1059  |  |     s[length] = 0;  | 
1060  |  |     start += overlap;  | 
1061  |  |     if (start <= 0xffff) { | 
1062  |  |         printf("    %04lX  %s|\n", (long)start, s); | 
1063  |  |     } else if (start <= 0xfffff) { | 
1064  |  |         printf("   %5lX  %s|\n", (long)start, s); | 
1065  |  |     } else { | 
1066  |  |         printf("  %6lX  %s|\n", (long)start, s); | 
1067  |  |     }  | 
1068  |  | }  | 
1069  |  | #endif  | 
1070  |  |  | 
1071  |  | /**  | 
1072  |  |  * Compacts a build-time trie.  | 
1073  |  |  *  | 
1074  |  |  * The compaction  | 
1075  |  |  * - removes blocks that are identical with earlier ones  | 
1076  |  |  * - overlaps each new non-duplicate block as much as possible with the previously-written one  | 
1077  |  |  * - works with fast-range data blocks whose length is a multiple of that of  | 
1078  |  |  *   higher-code-point data blocks  | 
1079  |  |  *  | 
1080  |  |  * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.  | 
1081  |  |  */  | 
1082  |  | int32_t MutableCodePointTrie::compactData(  | 
1083  |  |         int32_t fastILimit, uint32_t *newData, int32_t newDataCapacity,  | 
1084  | 0  |         int32_t dataNullIndex, MixedBlocks &mixedBlocks, UErrorCode &errorCode) { | 
1085  |  | #ifdef UCPTRIE_DEBUG  | 
1086  |  |     int32_t countSame=0, sumOverlaps=0;  | 
1087  |  |     bool printData = dataLength == 29088 /* line.brk */ ||  | 
1088  |  |         // dataLength == 30048 /* CanonIterData */ ||  | 
1089  |  |         dataLength == 50400 /* zh.txt~stroke */;  | 
1090  |  | #endif  | 
1091  |  |  | 
1092  |  |     // The linear ASCII data has been copied into newData already.  | 
1093  | 0  |     int32_t newDataLength = 0;  | 
1094  | 0  |     for (int32_t i = 0; newDataLength < ASCII_LIMIT;  | 
1095  | 0  |             newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) { | 
1096  | 0  |         index[i] = newDataLength;  | 
1097  |  | #ifdef UCPTRIE_DEBUG  | 
1098  |  |         if (printData) { | 
1099  |  |             printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);  | 
1100  |  |         }  | 
1101  |  | #endif  | 
1102  | 0  |     }  | 
1103  |  | 
  | 
1104  | 0  |     int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;  | 
1105  | 0  |     if (!mixedBlocks.init(newDataCapacity, blockLength)) { | 
1106  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1107  | 0  |         return 0;  | 
1108  | 0  |     }  | 
1109  | 0  |     mixedBlocks.extend(newData, 0, 0, newDataLength);  | 
1110  |  | 
  | 
1111  | 0  |     int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;  | 
1112  | 0  |     int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;  | 
1113  | 0  |     int32_t fastLength = 0;  | 
1114  | 0  |     for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) { | 
1115  | 0  |         if (i == fastILimit) { | 
1116  | 0  |             blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;  | 
1117  | 0  |             inc = 1;  | 
1118  | 0  |             fastLength = newDataLength;  | 
1119  | 0  |             if (!mixedBlocks.init(newDataCapacity, blockLength)) { | 
1120  | 0  |                 errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1121  | 0  |                 return 0;  | 
1122  | 0  |             }  | 
1123  | 0  |             mixedBlocks.extend(newData, 0, 0, newDataLength);  | 
1124  | 0  |         }  | 
1125  | 0  |         if (flags[i] == ALL_SAME) { | 
1126  | 0  |             uint32_t value = index[i];  | 
1127  |  |             // Find an earlier part of the data array of length blockLength  | 
1128  |  |             // that is filled with this value.  | 
1129  | 0  |             int32_t n = mixedBlocks.findAllSameBlock(newData, value);  | 
1130  |  |             // If we find a match, and the current block is the data null block,  | 
1131  |  |             // and it is not a fast block but matches the start of a fast block,  | 
1132  |  |             // then we need to continue looking.  | 
1133  |  |             // This is because this small block is shorter than the fast block,  | 
1134  |  |             // and not all of the rest of the fast block is filled with this value.  | 
1135  |  |             // Otherwise trie.getRange() would detect that the fast block starts at  | 
1136  |  |             // dataNullOffset and assume incorrectly that it is filled with the null value.  | 
1137  | 0  |             while (n >= 0 && i == dataNullIndex && i >= fastILimit && n < fastLength &&  | 
1138  | 0  |                     isStartOfSomeFastBlock(n, index, fastILimit)) { | 
1139  | 0  |                 n = findAllSameBlock(newData, n + 1, newDataLength, value, blockLength);  | 
1140  | 0  |             }  | 
1141  | 0  |             if (n >= 0) { | 
1142  | 0  |                 DEBUG_DO(++countSame);  | 
1143  | 0  |                 index[i] = n;  | 
1144  | 0  |             } else { | 
1145  | 0  |                 n = getAllSameOverlap(newData, newDataLength, value, blockLength);  | 
1146  | 0  |                 DEBUG_DO(sumOverlaps += n);  | 
1147  |  | #ifdef UCPTRIE_DEBUG  | 
1148  |  |                 if (printData) { | 
1149  |  |                     printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);  | 
1150  |  |                 }  | 
1151  |  | #endif  | 
1152  | 0  |                 index[i] = newDataLength - n;  | 
1153  | 0  |                 int32_t prevDataLength = newDataLength;  | 
1154  | 0  |                 while (n < blockLength) { | 
1155  | 0  |                     newData[newDataLength++] = value;  | 
1156  | 0  |                     ++n;  | 
1157  | 0  |                 }  | 
1158  | 0  |                 mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);  | 
1159  | 0  |             }  | 
1160  | 0  |         } else if (flags[i] == MIXED) { | 
1161  | 0  |             const uint32_t *block = data + index[i];  | 
1162  | 0  |             int32_t n = mixedBlocks.findBlock(newData, block, 0);  | 
1163  | 0  |             if (n >= 0) { | 
1164  | 0  |                 DEBUG_DO(++countSame);  | 
1165  | 0  |                 index[i] = n;  | 
1166  | 0  |             } else { | 
1167  | 0  |                 n = getOverlap(newData, newDataLength, block, 0, blockLength);  | 
1168  | 0  |                 DEBUG_DO(sumOverlaps += n);  | 
1169  |  | #ifdef UCPTRIE_DEBUG  | 
1170  |  |                 if (printData) { | 
1171  |  |                     printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);  | 
1172  |  |                 }  | 
1173  |  | #endif  | 
1174  | 0  |                 index[i] = newDataLength - n;  | 
1175  | 0  |                 int32_t prevDataLength = newDataLength;  | 
1176  | 0  |                 while (n < blockLength) { | 
1177  | 0  |                     newData[newDataLength++] = block[n++];  | 
1178  | 0  |                 }  | 
1179  | 0  |                 mixedBlocks.extend(newData, 0, prevDataLength, newDataLength);  | 
1180  | 0  |             }  | 
1181  | 0  |         } else /* SAME_AS */ { | 
1182  | 0  |             uint32_t j = index[i];  | 
1183  | 0  |             index[i] = index[j];  | 
1184  | 0  |         }  | 
1185  | 0  |     }  | 
1186  |  |  | 
1187  |  | #ifdef UCPTRIE_DEBUG  | 
1188  |  |     /* we saved some space */  | 
1189  |  |     printf("compacting UCPTrie: count of 32-bit data words %lu->%lu  countSame=%ld  sumOverlaps=%ld\n", | 
1190  |  |             (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps);  | 
1191  |  | #endif  | 
1192  | 0  |     return newDataLength;  | 
1193  | 0  | }  | 
1194  |  |  | 
1195  |  | int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, MixedBlocks &mixedBlocks,  | 
1196  | 0  |                                            UErrorCode &errorCode) { | 
1197  | 0  |     int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);  | 
1198  | 0  |     if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) { | 
1199  |  |         // Only the linear fast index, no multi-stage index tables.  | 
1200  | 0  |         index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;  | 
1201  | 0  |         return fastIndexLength;  | 
1202  | 0  |     }  | 
1203  |  |  | 
1204  |  |     // Condense the fast index table.  | 
1205  |  |     // Also, does it contain an index-3 block with all dataNullOffset?  | 
1206  | 0  |     uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH];  // fastIndexLength  | 
1207  | 0  |     int32_t i3FirstNull = -1;  | 
1208  | 0  |     for (int32_t i = 0, j = 0; i < fastILimit; ++j) { | 
1209  | 0  |         uint32_t i3 = index[i];  | 
1210  | 0  |         fastIndex[j] = (uint16_t)i3;  | 
1211  | 0  |         if (i3 == (uint32_t)dataNullOffset) { | 
1212  | 0  |             if (i3FirstNull < 0) { | 
1213  | 0  |                 i3FirstNull = j;  | 
1214  | 0  |             } else if (index3NullOffset < 0 &&  | 
1215  | 0  |                     (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) { | 
1216  | 0  |                 index3NullOffset = i3FirstNull;  | 
1217  | 0  |             }  | 
1218  | 0  |         } else { | 
1219  | 0  |             i3FirstNull = -1;  | 
1220  | 0  |         }  | 
1221  |  |         // Set the index entries that compactData() skipped.  | 
1222  |  |         // Needed when the multi-stage index covers the fast index range as well.  | 
1223  | 0  |         int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;  | 
1224  | 0  |         while (++i < iNext) { | 
1225  | 0  |             i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;  | 
1226  | 0  |             index[i] = i3;  | 
1227  | 0  |         }  | 
1228  | 0  |     }  | 
1229  |  | 
  | 
1230  | 0  |     if (!mixedBlocks.init(fastIndexLength, UCPTRIE_INDEX_3_BLOCK_LENGTH)) { | 
1231  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1232  | 0  |         return 0;  | 
1233  | 0  |     }  | 
1234  | 0  |     mixedBlocks.extend(fastIndex, 0, 0, fastIndexLength);  | 
1235  |  |  | 
1236  |  |     // Examine index-3 blocks. For each determine one of:  | 
1237  |  |     // - same as the index-3 null block  | 
1238  |  |     // - same as a fast-index block  | 
1239  |  |     // - 16-bit indexes  | 
1240  |  |     // - 18-bit indexes  | 
1241  |  |     // We store this in the first flags entry for the index-3 block.  | 
1242  |  |     //  | 
1243  |  |     // Also determine an upper limit for the index-3 table length.  | 
1244  | 0  |     int32_t index3Capacity = 0;  | 
1245  | 0  |     i3FirstNull = index3NullOffset;  | 
1246  | 0  |     bool hasLongI3Blocks = false;  | 
1247  |  |     // If the fast index covers the whole BMP, then  | 
1248  |  |     // the multi-stage index is only for supplementary code points.  | 
1249  |  |     // Otherwise, the multi-stage index covers all of Unicode.  | 
1250  | 0  |     int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;  | 
1251  | 0  |     int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;  | 
1252  | 0  |     for (int32_t i = iStart; i < iLimit;) { | 
1253  | 0  |         int32_t j = i;  | 
1254  | 0  |         int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;  | 
1255  | 0  |         uint32_t oredI3 = 0;  | 
1256  | 0  |         bool isNull = true;  | 
1257  | 0  |         do { | 
1258  | 0  |             uint32_t i3 = index[j];  | 
1259  | 0  |             oredI3 |= i3;  | 
1260  | 0  |             if (i3 != (uint32_t)dataNullOffset) { | 
1261  | 0  |                 isNull = false;  | 
1262  | 0  |             }  | 
1263  | 0  |         } while (++j < jLimit);  | 
1264  | 0  |         if (isNull) { | 
1265  | 0  |             flags[i] = I3_NULL;  | 
1266  | 0  |             if (i3FirstNull < 0) { | 
1267  | 0  |                 if (oredI3 <= 0xffff) { | 
1268  | 0  |                     index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;  | 
1269  | 0  |                 } else { | 
1270  | 0  |                     index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;  | 
1271  | 0  |                     hasLongI3Blocks = true;  | 
1272  | 0  |                 }  | 
1273  | 0  |                 i3FirstNull = 0;  | 
1274  | 0  |             }  | 
1275  | 0  |         } else { | 
1276  | 0  |             if (oredI3 <= 0xffff) { | 
1277  | 0  |                 int32_t n = mixedBlocks.findBlock(fastIndex, index, i);  | 
1278  | 0  |                 if (n >= 0) { | 
1279  | 0  |                     flags[i] = I3_BMP;  | 
1280  | 0  |                     index[i] = n;  | 
1281  | 0  |                 } else { | 
1282  | 0  |                     flags[i] = I3_16;  | 
1283  | 0  |                     index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;  | 
1284  | 0  |                 }  | 
1285  | 0  |             } else { | 
1286  | 0  |                 flags[i] = I3_18;  | 
1287  | 0  |                 index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;  | 
1288  | 0  |                 hasLongI3Blocks = true;  | 
1289  | 0  |             }  | 
1290  | 0  |         }  | 
1291  | 0  |         i = j;  | 
1292  | 0  |     }  | 
1293  |  | 
  | 
1294  | 0  |     int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3;  | 
1295  |  |  | 
1296  |  |     // Length of the index-1 table, rounded up.  | 
1297  | 0  |     int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;  | 
1298  |  |  | 
1299  |  |     // Index table: Fast index, index-1, index-3, index-2.  | 
1300  |  |     // +1 for possible index table padding.  | 
1301  | 0  |     int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;  | 
1302  | 0  |     index16 = (uint16_t *)uprv_malloc(index16Capacity * 2);  | 
1303  | 0  |     if (index16 == nullptr) { | 
1304  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1305  | 0  |         return 0;  | 
1306  | 0  |     }  | 
1307  | 0  |     uprv_memcpy(index16, fastIndex, fastIndexLength * 2);  | 
1308  |  | 
  | 
1309  | 0  |     if (!mixedBlocks.init(index16Capacity, UCPTRIE_INDEX_3_BLOCK_LENGTH)) { | 
1310  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1311  | 0  |         return 0;  | 
1312  | 0  |     }  | 
1313  | 0  |     MixedBlocks longI3Blocks;  | 
1314  | 0  |     if (hasLongI3Blocks) { | 
1315  | 0  |         if (!longI3Blocks.init(index16Capacity, INDEX_3_18BIT_BLOCK_LENGTH)) { | 
1316  | 0  |             errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1317  | 0  |             return 0;  | 
1318  | 0  |         }  | 
1319  | 0  |     }  | 
1320  |  |  | 
1321  |  |     // Compact the index-3 table and write an uncompacted version of the index-2 table.  | 
1322  | 0  |     uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2];  // index2Capacity  | 
1323  | 0  |     int32_t i2Length = 0;  | 
1324  | 0  |     i3FirstNull = index3NullOffset;  | 
1325  | 0  |     int32_t index3Start = fastIndexLength + index1Length;  | 
1326  | 0  |     int32_t indexLength = index3Start;  | 
1327  | 0  |     for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) { | 
1328  | 0  |         int32_t i3;  | 
1329  | 0  |         uint8_t f = flags[i];  | 
1330  | 0  |         if (f == I3_NULL && i3FirstNull < 0) { | 
1331  |  |             // First index-3 null block. Write & overlap it like a normal block, then remember it.  | 
1332  | 0  |             f = dataNullOffset <= 0xffff ? I3_16 : I3_18;  | 
1333  | 0  |             i3FirstNull = 0;  | 
1334  | 0  |         }  | 
1335  | 0  |         if (f == I3_NULL) { | 
1336  | 0  |             i3 = index3NullOffset;  | 
1337  | 0  |         } else if (f == I3_BMP) { | 
1338  | 0  |             i3 = index[i];  | 
1339  | 0  |         } else if (f == I3_16) { | 
1340  | 0  |             int32_t n = mixedBlocks.findBlock(index16, index, i);  | 
1341  | 0  |             if (n >= 0) { | 
1342  | 0  |                 i3 = n;  | 
1343  | 0  |             } else { | 
1344  | 0  |                 if (indexLength == index3Start) { | 
1345  |  |                     // No overlap at the boundary between the index-1 and index-3 tables.  | 
1346  | 0  |                     n = 0;  | 
1347  | 0  |                 } else { | 
1348  | 0  |                     n = getOverlap(index16, indexLength,  | 
1349  | 0  |                                    index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);  | 
1350  | 0  |                 }  | 
1351  | 0  |                 i3 = indexLength - n;  | 
1352  | 0  |                 int32_t prevIndexLength = indexLength;  | 
1353  | 0  |                 while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) { | 
1354  | 0  |                     index16[indexLength++] = index[i + n++];  | 
1355  | 0  |                 }  | 
1356  | 0  |                 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);  | 
1357  | 0  |                 if (hasLongI3Blocks) { | 
1358  | 0  |                     longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);  | 
1359  | 0  |                 }  | 
1360  | 0  |             }  | 
1361  | 0  |         } else { | 
1362  | 0  |             U_ASSERT(f == I3_18);  | 
1363  | 0  |             U_ASSERT(hasLongI3Blocks);  | 
1364  |  |             // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.  | 
1365  | 0  |             int32_t j = i;  | 
1366  | 0  |             int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;  | 
1367  | 0  |             int32_t k = indexLength;  | 
1368  | 0  |             do { | 
1369  | 0  |                 ++k;  | 
1370  | 0  |                 uint32_t v = index[j++];  | 
1371  | 0  |                 uint32_t upperBits = (v & 0x30000) >> 2;  | 
1372  | 0  |                 index16[k++] = v;  | 
1373  | 0  |                 v = index[j++];  | 
1374  | 0  |                 upperBits |= (v & 0x30000) >> 4;  | 
1375  | 0  |                 index16[k++] = v;  | 
1376  | 0  |                 v = index[j++];  | 
1377  | 0  |                 upperBits |= (v & 0x30000) >> 6;  | 
1378  | 0  |                 index16[k++] = v;  | 
1379  | 0  |                 v = index[j++];  | 
1380  | 0  |                 upperBits |= (v & 0x30000) >> 8;  | 
1381  | 0  |                 index16[k++] = v;  | 
1382  | 0  |                 v = index[j++];  | 
1383  | 0  |                 upperBits |= (v & 0x30000) >> 10;  | 
1384  | 0  |                 index16[k++] = v;  | 
1385  | 0  |                 v = index[j++];  | 
1386  | 0  |                 upperBits |= (v & 0x30000) >> 12;  | 
1387  | 0  |                 index16[k++] = v;  | 
1388  | 0  |                 v = index[j++];  | 
1389  | 0  |                 upperBits |= (v & 0x30000) >> 14;  | 
1390  | 0  |                 index16[k++] = v;  | 
1391  | 0  |                 v = index[j++];  | 
1392  | 0  |                 upperBits |= (v & 0x30000) >> 16;  | 
1393  | 0  |                 index16[k++] = v;  | 
1394  | 0  |                 index16[k - 9] = upperBits;  | 
1395  | 0  |             } while (j < jLimit);  | 
1396  | 0  |             int32_t n = longI3Blocks.findBlock(index16, index16, indexLength);  | 
1397  | 0  |             if (n >= 0) { | 
1398  | 0  |                 i3 = n | 0x8000;  | 
1399  | 0  |             } else { | 
1400  | 0  |                 if (indexLength == index3Start) { | 
1401  |  |                     // No overlap at the boundary between the index-1 and index-3 tables.  | 
1402  | 0  |                     n = 0;  | 
1403  | 0  |                 } else { | 
1404  | 0  |                     n = getOverlap(index16, indexLength,  | 
1405  | 0  |                                    index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);  | 
1406  | 0  |                 }  | 
1407  | 0  |                 i3 = (indexLength - n) | 0x8000;  | 
1408  | 0  |                 int32_t prevIndexLength = indexLength;  | 
1409  | 0  |                 if (n > 0) { | 
1410  | 0  |                     int32_t start = indexLength;  | 
1411  | 0  |                     while (n < INDEX_3_18BIT_BLOCK_LENGTH) { | 
1412  | 0  |                         index16[indexLength++] = index16[start + n++];  | 
1413  | 0  |                     }  | 
1414  | 0  |                 } else { | 
1415  | 0  |                     indexLength += INDEX_3_18BIT_BLOCK_LENGTH;  | 
1416  | 0  |                 }  | 
1417  | 0  |                 mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);  | 
1418  | 0  |                 if (hasLongI3Blocks) { | 
1419  | 0  |                     longI3Blocks.extend(index16, index3Start, prevIndexLength, indexLength);  | 
1420  | 0  |                 }  | 
1421  | 0  |             }  | 
1422  | 0  |         }  | 
1423  | 0  |         if (index3NullOffset < 0 && i3FirstNull >= 0) { | 
1424  | 0  |             index3NullOffset = i3;  | 
1425  | 0  |         }  | 
1426  |  |         // Set the index-2 table entry.  | 
1427  | 0  |         index2[i2Length++] = i3;  | 
1428  | 0  |     }  | 
1429  | 0  |     U_ASSERT(i2Length == index2Capacity);  | 
1430  | 0  |     U_ASSERT(indexLength <= index3Start + index3Capacity);  | 
1431  |  | 
  | 
1432  | 0  |     if (index3NullOffset < 0) { | 
1433  | 0  |         index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;  | 
1434  | 0  |     }  | 
1435  | 0  |     if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) { | 
1436  |  |         // The index-3 offsets exceed 15 bits, or  | 
1437  |  |         // the last one cannot be distinguished from the no-null-block value.  | 
1438  | 0  |         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
1439  | 0  |         return 0;  | 
1440  | 0  |     }  | 
1441  |  |  | 
1442  |  |     // Compact the index-2 table and write the index-1 table.  | 
1443  | 0  |     static_assert(UCPTRIE_INDEX_2_BLOCK_LENGTH == UCPTRIE_INDEX_3_BLOCK_LENGTH,  | 
1444  | 0  |                   "must re-init mixedBlocks");  | 
1445  | 0  |     int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;  | 
1446  | 0  |     int32_t i1 = fastIndexLength;  | 
1447  | 0  |     for (int32_t i = 0; i < i2Length; i += blockLength) { | 
1448  | 0  |         int32_t n;  | 
1449  | 0  |         if ((i2Length - i) >= blockLength) { | 
1450  |  |             // normal block  | 
1451  | 0  |             U_ASSERT(blockLength == UCPTRIE_INDEX_2_BLOCK_LENGTH);  | 
1452  | 0  |             n = mixedBlocks.findBlock(index16, index2, i);  | 
1453  | 0  |         } else { | 
1454  |  |             // highStart is inside the last index-2 block. Shorten it.  | 
1455  | 0  |             blockLength = i2Length - i;  | 
1456  | 0  |             n = findSameBlock(index16, index3Start, indexLength,  | 
1457  | 0  |                               index2, i, blockLength);  | 
1458  | 0  |         }  | 
1459  | 0  |         int32_t i2;  | 
1460  | 0  |         if (n >= 0) { | 
1461  | 0  |             i2 = n;  | 
1462  | 0  |         } else { | 
1463  | 0  |             if (indexLength == index3Start) { | 
1464  |  |                 // No overlap at the boundary between the index-1 and index-3/2 tables.  | 
1465  | 0  |                 n = 0;  | 
1466  | 0  |             } else { | 
1467  | 0  |                 n = getOverlap(index16, indexLength, index2, i, blockLength);  | 
1468  | 0  |             }  | 
1469  | 0  |             i2 = indexLength - n;  | 
1470  | 0  |             int32_t prevIndexLength = indexLength;  | 
1471  | 0  |             while (n < blockLength) { | 
1472  | 0  |                 index16[indexLength++] = index2[i + n++];  | 
1473  | 0  |             }  | 
1474  | 0  |             mixedBlocks.extend(index16, index3Start, prevIndexLength, indexLength);  | 
1475  | 0  |         }  | 
1476  |  |         // Set the index-1 table entry.  | 
1477  | 0  |         index16[i1++] = i2;  | 
1478  | 0  |     }  | 
1479  | 0  |     U_ASSERT(i1 == index3Start);  | 
1480  | 0  |     U_ASSERT(indexLength <= index16Capacity);  | 
1481  |  | 
  | 
1482  |  | #ifdef UCPTRIE_DEBUG  | 
1483  |  |     /* we saved some space */  | 
1484  |  |     printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n", | 
1485  |  |             (long)iLimit, (long)indexLength);  | 
1486  |  | #endif  | 
1487  |  | 
  | 
1488  | 0  |     return indexLength;  | 
1489  | 0  | }  | 
1490  |  |  | 
1491  | 0  | int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) { | 
1492  |  |     // Find the real highStart and round it up.  | 
1493  | 0  |     U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);  | 
1494  | 0  |     highValue = get(MAX_UNICODE);  | 
1495  | 0  |     int32_t realHighStart = findHighStart();  | 
1496  | 0  |     realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &  | 
1497  | 0  |         ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);  | 
1498  | 0  |     if (realHighStart == UNICODE_LIMIT) { | 
1499  | 0  |         highValue = initialValue;  | 
1500  | 0  |     }  | 
1501  |  | 
  | 
1502  |  | #ifdef UCPTRIE_DEBUG  | 
1503  |  |     printf("UCPTrie: highStart U+%06lx  highValue 0x%lx  initialValue 0x%lx\n", | 
1504  |  |             (long)realHighStart, (long)highValue, (long)initialValue);  | 
1505  |  | #endif  | 
1506  |  |  | 
1507  |  |     // We always store indexes and data values for the fast range.  | 
1508  |  |     // Pin highStart to the top of that range while building.  | 
1509  | 0  |     UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3;  | 
1510  | 0  |     if (realHighStart < fastLimit) { | 
1511  | 0  |         for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) { | 
1512  | 0  |             flags[i] = ALL_SAME;  | 
1513  | 0  |             index[i] = highValue;  | 
1514  | 0  |         }  | 
1515  | 0  |         highStart = fastLimit;  | 
1516  | 0  |     } else { | 
1517  | 0  |         highStart = realHighStart;  | 
1518  | 0  |     }  | 
1519  |  | 
  | 
1520  | 0  |     uint32_t asciiData[ASCII_LIMIT];  | 
1521  | 0  |     for (int32_t i = 0; i < ASCII_LIMIT; ++i) { | 
1522  | 0  |         asciiData[i] = get(i);  | 
1523  | 0  |     }  | 
1524  |  |  | 
1525  |  |     // First we look for which data blocks have the same value repeated over the whole block,  | 
1526  |  |     // deduplicate such blocks, find a good null data block (for faster enumeration),  | 
1527  |  |     // and get an upper bound for the necessary data array length.  | 
1528  | 0  |     AllSameBlocks allSameBlocks;  | 
1529  | 0  |     int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);  | 
1530  | 0  |     if (newDataCapacity < 0) { | 
1531  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1532  | 0  |         return 0;  | 
1533  | 0  |     }  | 
1534  | 0  |     uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4);  | 
1535  | 0  |     if (newData == nullptr) { | 
1536  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1537  | 0  |         return 0;  | 
1538  | 0  |     }  | 
1539  | 0  |     uprv_memcpy(newData, asciiData, sizeof(asciiData));  | 
1540  |  | 
  | 
1541  | 0  |     int32_t dataNullIndex = allSameBlocks.findMostUsed();  | 
1542  |  | 
  | 
1543  | 0  |     MixedBlocks mixedBlocks;  | 
1544  | 0  |     int32_t newDataLength = compactData(fastILimit, newData, newDataCapacity,  | 
1545  | 0  |                                         dataNullIndex, mixedBlocks, errorCode);  | 
1546  | 0  |     if (U_FAILURE(errorCode)) { return 0; } | 
1547  | 0  |     U_ASSERT(newDataLength <= newDataCapacity);  | 
1548  | 0  |     uprv_free(data);  | 
1549  | 0  |     data = newData;  | 
1550  | 0  |     dataCapacity = newDataCapacity;  | 
1551  | 0  |     dataLength = newDataLength;  | 
1552  | 0  |     if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) { | 
1553  |  |         // The offset of the last data block is too high to be stored in the index table.  | 
1554  | 0  |         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
1555  | 0  |         return 0;  | 
1556  | 0  |     }  | 
1557  |  |  | 
1558  | 0  |     if (dataNullIndex >= 0) { | 
1559  | 0  |         dataNullOffset = index[dataNullIndex];  | 
1560  |  | #ifdef UCPTRIE_DEBUG  | 
1561  |  |         if (data[dataNullOffset] != initialValue) { | 
1562  |  |             printf("UCPTrie initialValue %lx -> more common nullValue %lx\n", | 
1563  |  |                    (long)initialValue, (long)data[dataNullOffset]);  | 
1564  |  |         }  | 
1565  |  | #endif  | 
1566  | 0  |         initialValue = data[dataNullOffset];  | 
1567  | 0  |     } else { | 
1568  | 0  |         dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;  | 
1569  | 0  |     }  | 
1570  |  | 
  | 
1571  | 0  |     int32_t indexLength = compactIndex(fastILimit, mixedBlocks, errorCode);  | 
1572  | 0  |     highStart = realHighStart;  | 
1573  | 0  |     return indexLength;  | 
1574  | 0  | }  | 
1575  |  |  | 
1576  | 0  | UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) { | 
1577  | 0  |     if (U_FAILURE(errorCode)) { | 
1578  | 0  |         return nullptr;  | 
1579  | 0  |     }  | 
1580  | 0  |     if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||  | 
1581  | 0  |             valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) { | 
1582  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
1583  | 0  |         return nullptr;  | 
1584  | 0  |     }  | 
1585  |  |  | 
1586  |  |     // The mutable trie always stores 32-bit values.  | 
1587  |  |     // When we build a UCPTrie for a smaller value width, we first mask off unused bits  | 
1588  |  |     // before compacting the data.  | 
1589  | 0  |     switch (valueWidth) { | 
1590  | 0  |     case UCPTRIE_VALUE_BITS_32:  | 
1591  | 0  |         break;  | 
1592  | 0  |     case UCPTRIE_VALUE_BITS_16:  | 
1593  | 0  |         maskValues(0xffff);  | 
1594  | 0  |         break;  | 
1595  | 0  |     case UCPTRIE_VALUE_BITS_8:  | 
1596  | 0  |         maskValues(0xff);  | 
1597  | 0  |         break;  | 
1598  | 0  |     default:  | 
1599  | 0  |         break;  | 
1600  | 0  |     }  | 
1601  |  |  | 
1602  | 0  |     UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT;  | 
1603  | 0  |     int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode);  | 
1604  | 0  |     if (U_FAILURE(errorCode)) { | 
1605  | 0  |         clear();  | 
1606  | 0  |         return nullptr;  | 
1607  | 0  |     }  | 
1608  |  |  | 
1609  |  |     // Ensure data table alignment: The index length must be even for uint32_t data.  | 
1610  | 0  |     if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) { | 
1611  | 0  |         index16[indexLength++] = 0xffee;  // arbitrary value  | 
1612  | 0  |     }  | 
1613  |  |  | 
1614  |  |     // Make the total trie structure length a multiple of 4 bytes by padding the data table,  | 
1615  |  |     // and store special values as the last two data values.  | 
1616  | 0  |     int32_t length = indexLength * 2;  | 
1617  | 0  |     if (valueWidth == UCPTRIE_VALUE_BITS_16) { | 
1618  | 0  |         if (((indexLength ^ dataLength) & 1) != 0) { | 
1619  |  |             // padding  | 
1620  | 0  |             data[dataLength++] = errorValue;  | 
1621  | 0  |         }  | 
1622  | 0  |         if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { | 
1623  | 0  |             data[dataLength++] = highValue;  | 
1624  | 0  |             data[dataLength++] = errorValue;  | 
1625  | 0  |         }  | 
1626  | 0  |         length += dataLength * 2;  | 
1627  | 0  |     } else if (valueWidth == UCPTRIE_VALUE_BITS_32) { | 
1628  |  |         // 32-bit data words never need padding to a multiple of 4 bytes.  | 
1629  | 0  |         if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) { | 
1630  | 0  |             if (data[dataLength - 1] != highValue) { | 
1631  | 0  |                 data[dataLength++] = highValue;  | 
1632  | 0  |             }  | 
1633  | 0  |             data[dataLength++] = errorValue;  | 
1634  | 0  |         }  | 
1635  | 0  |         length += dataLength * 4;  | 
1636  | 0  |     } else { | 
1637  | 0  |         int32_t and3 = (length + dataLength) & 3;  | 
1638  | 0  |         if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) { | 
1639  |  |             // all set  | 
1640  | 0  |         } else if(and3 == 3 && data[dataLength - 1] == highValue) { | 
1641  | 0  |             data[dataLength++] = errorValue;  | 
1642  | 0  |         } else { | 
1643  | 0  |             while (and3 != 2) { | 
1644  | 0  |                 data[dataLength++] = highValue;  | 
1645  | 0  |                 and3 = (and3 + 1) & 3;  | 
1646  | 0  |             }  | 
1647  | 0  |             data[dataLength++] = highValue;  | 
1648  | 0  |             data[dataLength++] = errorValue;  | 
1649  | 0  |         }  | 
1650  | 0  |         length += dataLength;  | 
1651  | 0  |     }  | 
1652  |  |  | 
1653  |  |     // Calculate the total length of the UCPTrie as a single memory block.  | 
1654  | 0  |     length += sizeof(UCPTrie);  | 
1655  | 0  |     U_ASSERT((length & 3) == 0);  | 
1656  |  | 
  | 
1657  | 0  |     uint8_t *bytes = (uint8_t *)uprv_malloc(length);  | 
1658  | 0  |     if (bytes == nullptr) { | 
1659  | 0  |         errorCode = U_MEMORY_ALLOCATION_ERROR;  | 
1660  | 0  |         clear();  | 
1661  | 0  |         return nullptr;  | 
1662  | 0  |     }  | 
1663  | 0  |     UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes);  | 
1664  | 0  |     uprv_memset(trie, 0, sizeof(UCPTrie));  | 
1665  | 0  |     trie->indexLength = indexLength;  | 
1666  | 0  |     trie->dataLength = dataLength;  | 
1667  |  | 
  | 
1668  | 0  |     trie->highStart = highStart;  | 
1669  |  |     // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes.  | 
1670  |  |     // Runtime code needs to then test for the real highStart as well.  | 
1671  | 0  |     trie->shifted12HighStart = (highStart + 0xfff) >> 12;  | 
1672  | 0  |     trie->type = type;  | 
1673  | 0  |     trie->valueWidth = valueWidth;  | 
1674  |  | 
  | 
1675  | 0  |     trie->index3NullOffset = index3NullOffset;  | 
1676  | 0  |     trie->dataNullOffset = dataNullOffset;  | 
1677  | 0  |     trie->nullValue = initialValue;  | 
1678  |  | 
  | 
1679  | 0  |     bytes += sizeof(UCPTrie);  | 
1680  |  |  | 
1681  |  |     // Fill the index and data arrays.  | 
1682  | 0  |     uint16_t *dest16 = (uint16_t *)bytes;  | 
1683  | 0  |     trie->index = dest16;  | 
1684  |  | 
  | 
1685  | 0  |     if (highStart <= fastLimit) { | 
1686  |  |         // Condense only the fast index from the mutable-trie index.  | 
1687  | 0  |         for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) { | 
1688  | 0  |             *dest16++ = (uint16_t)index[i];  // dest16[j]  | 
1689  | 0  |         }  | 
1690  | 0  |     } else { | 
1691  | 0  |         uprv_memcpy(dest16, index16, indexLength * 2);  | 
1692  | 0  |         dest16 += indexLength;  | 
1693  | 0  |     }  | 
1694  | 0  |     bytes += indexLength * 2;  | 
1695  |  |  | 
1696  |  |     // Write the data array.  | 
1697  | 0  |     const uint32_t *p = data;  | 
1698  | 0  |     switch (valueWidth) { | 
1699  | 0  |     case UCPTRIE_VALUE_BITS_16:  | 
1700  |  |         // Write 16-bit data values.  | 
1701  | 0  |         trie->data.ptr16 = dest16;  | 
1702  | 0  |         for (int32_t i = dataLength; i > 0; --i) { | 
1703  | 0  |             *dest16++ = (uint16_t)*p++;  | 
1704  | 0  |         }  | 
1705  | 0  |         break;  | 
1706  | 0  |     case UCPTRIE_VALUE_BITS_32:  | 
1707  |  |         // Write 32-bit data values.  | 
1708  | 0  |         trie->data.ptr32 = (uint32_t *)bytes;  | 
1709  | 0  |         uprv_memcpy(bytes, p, (size_t)dataLength * 4);  | 
1710  | 0  |         break;  | 
1711  | 0  |     case UCPTRIE_VALUE_BITS_8:  | 
1712  |  |         // Write 8-bit data values.  | 
1713  | 0  |         trie->data.ptr8 = bytes;  | 
1714  | 0  |         for (int32_t i = dataLength; i > 0; --i) { | 
1715  | 0  |             *bytes++ = (uint8_t)*p++;  | 
1716  | 0  |         }  | 
1717  | 0  |         break;  | 
1718  | 0  |     default:  | 
1719  |  |         // Will not occur, valueWidth checked at the beginning.  | 
1720  | 0  |         break;  | 
1721  | 0  |     }  | 
1722  |  |  | 
1723  |  | #ifdef UCPTRIE_DEBUG  | 
1724  |  |     trie->name = name;  | 
1725  |  |  | 
1726  |  |     ucptrie_printLengths(trie, "");  | 
1727  |  | #endif  | 
1728  |  |  | 
1729  | 0  |     clear();  | 
1730  | 0  |     return trie;  | 
1731  | 0  | }  | 
1732  |  |  | 
1733  |  | }  // namespace  | 
1734  |  |  | 
1735  |  | U_NAMESPACE_END  | 
1736  |  |  | 
1737  |  | U_NAMESPACE_USE  | 
1738  |  |  | 
1739  |  | U_CAPI UMutableCPTrie * U_EXPORT2  | 
1740  | 0  | umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { | 
1741  | 0  |     if (U_FAILURE(*pErrorCode)) { | 
1742  | 0  |         return nullptr;  | 
1743  | 0  |     }  | 
1744  | 0  |     LocalPointer<MutableCodePointTrie> trie(  | 
1745  | 0  |         new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode);  | 
1746  | 0  |     if (U_FAILURE(*pErrorCode)) { | 
1747  | 0  |         return nullptr;  | 
1748  | 0  |     }  | 
1749  | 0  |     return reinterpret_cast<UMutableCPTrie *>(trie.orphan());  | 
1750  | 0  | }  | 
1751  |  |  | 
1752  |  | U_CAPI UMutableCPTrie * U_EXPORT2  | 
1753  | 0  | umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) { | 
1754  | 0  |     if (U_FAILURE(*pErrorCode)) { | 
1755  | 0  |         return nullptr;  | 
1756  | 0  |     }  | 
1757  | 0  |     if (other == nullptr) { | 
1758  | 0  |         return nullptr;  | 
1759  | 0  |     }  | 
1760  | 0  |     LocalPointer<MutableCodePointTrie> clone(  | 
1761  | 0  |         new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode);  | 
1762  | 0  |     if (U_FAILURE(*pErrorCode)) { | 
1763  | 0  |         return nullptr;  | 
1764  | 0  |     }  | 
1765  | 0  |     return reinterpret_cast<UMutableCPTrie *>(clone.orphan());  | 
1766  | 0  | }  | 
1767  |  |  | 
1768  |  | U_CAPI void U_EXPORT2  | 
1769  | 0  | umutablecptrie_close(UMutableCPTrie *trie) { | 
1770  | 0  |     delete reinterpret_cast<MutableCodePointTrie *>(trie);  | 
1771  | 0  | }  | 
1772  |  |  | 
1773  |  | U_CAPI UMutableCPTrie * U_EXPORT2  | 
1774  | 0  | umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) { | 
1775  | 0  |     if (U_FAILURE(*pErrorCode)) { | 
1776  | 0  |         return nullptr;  | 
1777  | 0  |     }  | 
1778  | 0  |     if (map == nullptr) { | 
1779  | 0  |         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
1780  | 0  |         return nullptr;  | 
1781  | 0  |     }  | 
1782  | 0  |     return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));  | 
1783  | 0  | }  | 
1784  |  |  | 
1785  |  | U_CAPI UMutableCPTrie * U_EXPORT2  | 
1786  | 0  | umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) { | 
1787  | 0  |     if (U_FAILURE(*pErrorCode)) { | 
1788  | 0  |         return nullptr;  | 
1789  | 0  |     }  | 
1790  | 0  |     if (trie == nullptr) { | 
1791  | 0  |         *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
1792  | 0  |         return nullptr;  | 
1793  | 0  |     }  | 
1794  | 0  |     return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode));  | 
1795  | 0  | }  | 
1796  |  |  | 
1797  |  | U_CAPI uint32_t U_EXPORT2  | 
1798  | 0  | umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) { | 
1799  | 0  |     return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c);  | 
1800  | 0  | }  | 
1801  |  |  | 
1802  |  | namespace { | 
1803  |  |  | 
1804  |  | UChar32 getRange(const void *trie, UChar32 start,  | 
1805  | 0  |                  UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { | 
1806  | 0  |     return reinterpret_cast<const MutableCodePointTrie *>(trie)->  | 
1807  | 0  |         getRange(start, filter, context, pValue);  | 
1808  | 0  | }  | 
1809  |  |  | 
1810  |  | }  // namespace  | 
1811  |  |  | 
1812  |  | U_CAPI UChar32 U_EXPORT2  | 
1813  |  | umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,  | 
1814  |  |                         UCPMapRangeOption option, uint32_t surrogateValue,  | 
1815  | 0  |                         UCPMapValueFilter *filter, const void *context, uint32_t *pValue) { | 
1816  | 0  |     return ucptrie_internalGetRange(getRange, trie, start,  | 
1817  | 0  |                                     option, surrogateValue,  | 
1818  | 0  |                                     filter, context, pValue);  | 
1819  | 0  | }  | 
1820  |  |  | 
1821  |  | U_CAPI void U_EXPORT2  | 
1822  | 0  | umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { | 
1823  | 0  |     if (U_FAILURE(*pErrorCode)) { | 
1824  | 0  |         return;  | 
1825  | 0  |     }  | 
1826  | 0  |     reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode);  | 
1827  | 0  | }  | 
1828  |  |  | 
1829  |  | U_CAPI void U_EXPORT2  | 
1830  |  | umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end,  | 
1831  | 0  |                    uint32_t value, UErrorCode *pErrorCode) { | 
1832  | 0  |     if (U_FAILURE(*pErrorCode)) { | 
1833  | 0  |         return;  | 
1834  | 0  |     }  | 
1835  | 0  |     reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode);  | 
1836  | 0  | }  | 
1837  |  |  | 
1838  |  | /* Compact and internally serialize the trie. */  | 
1839  |  | U_CAPI UCPTrie * U_EXPORT2  | 
1840  |  | umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,  | 
1841  | 0  |                               UErrorCode *pErrorCode) { | 
1842  | 0  |     if (U_FAILURE(*pErrorCode)) { | 
1843  | 0  |         return nullptr;  | 
1844  | 0  |     }  | 
1845  | 0  |     return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode);  | 
1846  | 0  | }  | 
1847  |  |  | 
1848  |  | #ifdef UCPTRIE_DEBUG  | 
1849  |  | U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) { | 
1850  |  |     reinterpret_cast<MutableCodePointTrie *>(trie)->name = name;  | 
1851  |  | }  | 
1852  |  | #endif  |