/src/icu/source/common/utrie2_builder.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ****************************************************************************** |
5 | | * |
6 | | * Copyright (C) 2001-2014, International Business Machines |
7 | | * Corporation and others. All Rights Reserved. |
8 | | * |
9 | | ****************************************************************************** |
10 | | * file name: utrie2_builder.cpp |
11 | | * encoding: UTF-8 |
12 | | * tab size: 8 (not used) |
13 | | * indentation:4 |
14 | | * |
15 | | * created on: 2008sep26 (split off from utrie2.c) |
16 | | * created by: Markus W. Scherer |
17 | | * |
18 | | * This is a common implementation of a Unicode trie. |
19 | | * It is a kind of compressed, serializable table of 16- or 32-bit values associated with |
20 | | * Unicode code points (0..0x10ffff). |
21 | | * This is the second common version of a Unicode trie (hence the name UTrie2). |
22 | | * See utrie2.h for a comparison. |
23 | | * |
24 | | * This file contains only the builder code. |
25 | | * See utrie2.c for the runtime and enumeration code. |
26 | | */ |
27 | | // #define UTRIE2_DEBUG |
28 | | #ifdef UTRIE2_DEBUG |
29 | | # include <stdio.h> |
30 | | #endif |
31 | | // #define UCPTRIE_DEBUG |
32 | | |
33 | | #include "unicode/utypes.h" |
34 | | #ifdef UCPTRIE_DEBUG |
35 | | #include "unicode/ucptrie.h" |
36 | | #include "unicode/umutablecptrie.h" |
37 | | #include "ucptrie_impl.h" |
38 | | #endif |
39 | | #include "cmemory.h" |
40 | | #include "utrie2.h" |
41 | | #include "utrie2_impl.h" |
42 | | |
43 | | #include "utrie.h" // for utrie2_fromUTrie() |
44 | | |
45 | | /* Implementation notes ----------------------------------------------------- */ |
46 | | |
47 | | /* |
48 | | * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values |
49 | | * have been chosen to minimize trie sizes overall. |
50 | | * Most of the code is flexible enough to work with a range of values, |
51 | | * within certain limits. |
52 | | * |
53 | | * Exception: Support for separate values for lead surrogate code _units_ |
54 | | * vs. code _points_ was added after the constants were fixed, |
55 | | * and has not been tested nor particularly designed for different constant values. |
56 | | * (Especially the utrie2_enum() code that jumps to the special LSCP index-2 |
57 | | * part and back.) |
58 | | * |
59 | | * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data |
60 | | * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH |
61 | | * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction |
62 | | * remapping) stops working. |
63 | | * |
64 | | * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate() |
65 | | * assumes that a single index-2 block is used for 0x400 code points |
66 | | * corresponding to one lead surrogate. |
67 | | * |
68 | | * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains |
69 | | * more than one Unicode plane, and the split of the index-2 table into a BMP |
70 | | * part and a supplementary part, with a gap in between, would not work. |
71 | | * |
72 | | * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because |
73 | | * there is data with more than 64k distinct values, |
74 | | * for example for Unihan collation with a separate collation weight per |
75 | | * Han character. |
76 | | */ |
77 | | |
78 | | /* Building a trie ----------------------------------------------------------*/ |
79 | | |
80 | | enum { |
81 | | /** The null index-2 block, following the gap in the index-2 table. */ |
82 | | UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH, |
83 | | |
84 | | /** The start of allocated index-2 blocks. */ |
85 | | UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH, |
86 | | |
87 | | /** |
88 | | * The null data block. |
89 | | * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller, |
90 | | * to work with 6-bit trail bytes from 2-byte UTF-8. |
91 | | */ |
92 | | UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET, |
93 | | |
94 | | /** The start of allocated data blocks. */ |
95 | | UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40, |
96 | | |
97 | | /** |
98 | | * The start of data blocks for U+0800 and above. |
99 | | * Below, compaction uses a block length of 64 for 2-byte UTF-8. |
100 | | * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH. |
101 | | * Data values for 0x780 code points beyond ASCII. |
102 | | */ |
103 | | UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780 |
104 | | }; |
105 | | |
106 | | /* Start with allocation of 16k data entries. */ |
107 | 0 | #define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14) |
108 | | |
109 | | /* Grow about 8x each time. */ |
110 | 0 | #define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17) |
111 | | |
112 | | static int32_t |
113 | | allocIndex2Block(UNewTrie2 *trie); |
114 | | |
115 | | U_CAPI UTrie2 * U_EXPORT2 |
116 | 0 | utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { |
117 | 0 | UTrie2 *trie; |
118 | 0 | UNewTrie2 *newTrie; |
119 | 0 | uint32_t *data; |
120 | 0 | int32_t i, j; |
121 | |
|
122 | 0 | if(U_FAILURE(*pErrorCode)) { |
123 | 0 | return NULL; |
124 | 0 | } |
125 | | |
126 | 0 | trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); |
127 | 0 | newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); |
128 | 0 | data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4); |
129 | 0 | if(trie==NULL || newTrie==NULL || data==NULL) { |
130 | 0 | uprv_free(trie); |
131 | 0 | uprv_free(newTrie); |
132 | 0 | uprv_free(data); |
133 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
134 | 0 | return 0; |
135 | 0 | } |
136 | | |
137 | 0 | uprv_memset(trie, 0, sizeof(UTrie2)); |
138 | 0 | trie->initialValue=initialValue; |
139 | 0 | trie->errorValue=errorValue; |
140 | 0 | trie->highStart=0x110000; |
141 | 0 | trie->newTrie=newTrie; |
142 | | #ifdef UTRIE2_DEBUG |
143 | | trie->name="open"; |
144 | | #endif |
145 | |
|
146 | 0 | newTrie->data=data; |
147 | | #ifdef UCPTRIE_DEBUG |
148 | | newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode); |
149 | | #endif |
150 | 0 | newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH; |
151 | 0 | newTrie->initialValue=initialValue; |
152 | 0 | newTrie->errorValue=errorValue; |
153 | 0 | newTrie->highStart=0x110000; |
154 | 0 | newTrie->firstFreeBlock=0; /* no free block in the list */ |
155 | 0 | newTrie->isCompacted=FALSE; |
156 | | |
157 | | /* |
158 | | * preallocate and reset |
159 | | * - ASCII |
160 | | * - the bad-UTF-8-data block |
161 | | * - the null data block |
162 | | */ |
163 | 0 | for(i=0; i<0x80; ++i) { |
164 | 0 | newTrie->data[i]=initialValue; |
165 | 0 | } |
166 | 0 | for(; i<0xc0; ++i) { |
167 | 0 | newTrie->data[i]=errorValue; |
168 | 0 | } |
169 | 0 | for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) { |
170 | 0 | newTrie->data[i]=initialValue; |
171 | 0 | } |
172 | 0 | newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET; |
173 | 0 | newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET; |
174 | | |
175 | | /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */ |
176 | 0 | for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { |
177 | 0 | newTrie->index2[i]=j; |
178 | 0 | newTrie->map[i]=1; |
179 | 0 | } |
180 | | /* reference counts for the bad-UTF-8-data block */ |
181 | 0 | for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { |
182 | 0 | newTrie->map[i]=0; |
183 | 0 | } |
184 | | /* |
185 | | * Reference counts for the null data block: all blocks except for the ASCII blocks. |
186 | | * Plus 1 so that we don't drop this block during compaction. |
187 | | * Plus as many as needed for lead surrogate code points. |
188 | | */ |
189 | | /* i==newTrie->dataNullOffset */ |
190 | 0 | newTrie->map[i++]= |
191 | 0 | (0x110000>>UTRIE2_SHIFT_2)- |
192 | 0 | (0x80>>UTRIE2_SHIFT_2)+ |
193 | 0 | 1+ |
194 | 0 | UTRIE2_LSCP_INDEX_2_LENGTH; |
195 | 0 | j+=UTRIE2_DATA_BLOCK_LENGTH; |
196 | 0 | for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { |
197 | 0 | newTrie->map[i]=0; |
198 | 0 | } |
199 | | |
200 | | /* |
201 | | * set the remaining indexes in the BMP index-2 block |
202 | | * to the null data block |
203 | | */ |
204 | 0 | for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { |
205 | 0 | newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET; |
206 | 0 | } |
207 | | |
208 | | /* |
209 | | * Fill the index gap with impossible values so that compaction |
210 | | * does not overlap other index-2 blocks with the gap. |
211 | | */ |
212 | 0 | for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) { |
213 | 0 | newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1; |
214 | 0 | } |
215 | | |
216 | | /* set the indexes in the null index-2 block */ |
217 | 0 | for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) { |
218 | 0 | newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET; |
219 | 0 | } |
220 | 0 | newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET; |
221 | 0 | newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET; |
222 | | |
223 | | /* set the index-1 indexes for the linear index-2 block */ |
224 | 0 | for(i=0, j=0; |
225 | 0 | i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; |
226 | 0 | ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH |
227 | 0 | ) { |
228 | 0 | newTrie->index1[i]=j; |
229 | 0 | } |
230 | | |
231 | | /* set the remaining index-1 indexes to the null index-2 block */ |
232 | 0 | for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { |
233 | 0 | newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET; |
234 | 0 | } |
235 | | |
236 | | /* |
237 | | * Preallocate and reset data for U+0080..U+07ff, |
238 | | * for 2-byte UTF-8 which will be compacted in 64-blocks |
239 | | * even if UTRIE2_DATA_BLOCK_LENGTH is smaller. |
240 | | */ |
241 | 0 | for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) { |
242 | 0 | utrie2_set32(trie, i, initialValue, pErrorCode); |
243 | 0 | } |
244 | |
|
245 | 0 | return trie; |
246 | 0 | } |
247 | | |
248 | | static UNewTrie2 * |
249 | 0 | cloneBuilder(const UNewTrie2 *other) { |
250 | 0 | UNewTrie2 *trie; |
251 | |
|
252 | 0 | trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2)); |
253 | 0 | if(trie==NULL) { |
254 | 0 | return NULL; |
255 | 0 | } |
256 | | |
257 | 0 | trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4); |
258 | 0 | if(trie->data==NULL) { |
259 | 0 | uprv_free(trie); |
260 | 0 | return NULL; |
261 | 0 | } |
262 | | #ifdef UCPTRIE_DEBUG |
263 | | if(other->t3==nullptr) { |
264 | | trie->t3=nullptr; |
265 | | } else { |
266 | | UErrorCode errorCode=U_ZERO_ERROR; |
267 | | trie->t3=umutablecptrie_clone(other->t3, &errorCode); |
268 | | } |
269 | | #endif |
270 | 0 | trie->dataCapacity=other->dataCapacity; |
271 | | |
272 | | /* clone data */ |
273 | 0 | uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1)); |
274 | 0 | uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4); |
275 | 0 | trie->index2NullOffset=other->index2NullOffset; |
276 | 0 | trie->index2Length=other->index2Length; |
277 | |
|
278 | 0 | uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4); |
279 | 0 | trie->dataNullOffset=other->dataNullOffset; |
280 | 0 | trie->dataLength=other->dataLength; |
281 | | |
282 | | /* reference counters */ |
283 | 0 | if(other->isCompacted) { |
284 | 0 | trie->firstFreeBlock=0; |
285 | 0 | } else { |
286 | 0 | uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4); |
287 | 0 | trie->firstFreeBlock=other->firstFreeBlock; |
288 | 0 | } |
289 | |
|
290 | 0 | trie->initialValue=other->initialValue; |
291 | 0 | trie->errorValue=other->errorValue; |
292 | 0 | trie->highStart=other->highStart; |
293 | 0 | trie->isCompacted=other->isCompacted; |
294 | |
|
295 | 0 | return trie; |
296 | 0 | } |
297 | | |
298 | | U_CAPI UTrie2 * U_EXPORT2 |
299 | 0 | utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) { |
300 | 0 | UTrie2 *trie; |
301 | |
|
302 | 0 | if(U_FAILURE(*pErrorCode)) { |
303 | 0 | return NULL; |
304 | 0 | } |
305 | 0 | if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { |
306 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
307 | 0 | return NULL; |
308 | 0 | } |
309 | | |
310 | 0 | trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2)); |
311 | 0 | if(trie==NULL) { |
312 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
313 | 0 | return NULL; |
314 | 0 | } |
315 | 0 | uprv_memcpy(trie, other, sizeof(UTrie2)); |
316 | |
|
317 | 0 | if(other->memory!=NULL) { |
318 | 0 | trie->memory=uprv_malloc(other->length); |
319 | 0 | if(trie->memory!=NULL) { |
320 | 0 | trie->isMemoryOwned=TRUE; |
321 | 0 | uprv_memcpy(trie->memory, other->memory, other->length); |
322 | | |
323 | | /* make the clone's pointers point to its own memory */ |
324 | 0 | trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory); |
325 | 0 | if(other->data16!=NULL) { |
326 | 0 | trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory); |
327 | 0 | } |
328 | 0 | if(other->data32!=NULL) { |
329 | 0 | trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory); |
330 | 0 | } |
331 | 0 | } |
332 | 0 | } else /* other->newTrie!=NULL */ { |
333 | 0 | trie->newTrie=cloneBuilder(other->newTrie); |
334 | 0 | } |
335 | |
|
336 | 0 | if(trie->memory==NULL && trie->newTrie==NULL) { |
337 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
338 | 0 | uprv_free(trie); |
339 | 0 | trie=NULL; |
340 | 0 | } |
341 | 0 | return trie; |
342 | 0 | } |
343 | | |
344 | | typedef struct NewTrieAndStatus { |
345 | | UTrie2 *trie; |
346 | | UErrorCode errorCode; |
347 | | UBool exclusiveLimit; /* rather than inclusive range end */ |
348 | | } NewTrieAndStatus; |
349 | | |
350 | | static UBool U_CALLCONV |
351 | 0 | copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { |
352 | 0 | NewTrieAndStatus *nt=(NewTrieAndStatus *)context; |
353 | 0 | if(value!=nt->trie->initialValue) { |
354 | 0 | if(nt->exclusiveLimit) { |
355 | 0 | --end; |
356 | 0 | } |
357 | 0 | if(start==end) { |
358 | 0 | utrie2_set32(nt->trie, start, value, &nt->errorCode); |
359 | 0 | } else { |
360 | 0 | utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode); |
361 | 0 | } |
362 | 0 | return U_SUCCESS(nt->errorCode); |
363 | 0 | } else { |
364 | 0 | return TRUE; |
365 | 0 | } |
366 | 0 | } |
367 | | |
368 | | #ifdef UTRIE2_DEBUG |
369 | | static long countInitial(const UTrie2 *trie) { |
370 | | uint32_t initialValue=trie->initialValue; |
371 | | int32_t length=trie->dataLength; |
372 | | long count=0; |
373 | | if(trie->data16!=nullptr) { |
374 | | for(int32_t i=0; i<length; ++i) { |
375 | | if(trie->data16[i]==initialValue) { ++count; } |
376 | | } |
377 | | } else { |
378 | | for(int32_t i=0; i<length; ++i) { |
379 | | if(trie->data32[i]==initialValue) { ++count; } |
380 | | } |
381 | | } |
382 | | return count; |
383 | | } |
384 | | |
385 | | static void |
386 | | utrie_printLengths(const UTrie *trie) { |
387 | | long indexLength=trie->indexLength; |
388 | | long dataLength=(long)trie->dataLength; |
389 | | long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); |
390 | | printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n", |
391 | | indexLength, dataLength, totalLength); |
392 | | } |
393 | | |
394 | | static void |
395 | | utrie2_printLengths(const UTrie2 *trie, const char *which) { |
396 | | long indexLength=trie->indexLength; |
397 | | long dataLength=(long)trie->dataLength; |
398 | | long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2); |
399 | | printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n", |
400 | | which, trie->name, indexLength, dataLength, countInitial(trie), totalLength); |
401 | | } |
402 | | #endif |
403 | | |
404 | | U_CAPI UTrie2 * U_EXPORT2 |
405 | 0 | utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) { |
406 | 0 | NewTrieAndStatus context; |
407 | 0 | UChar lead; |
408 | |
|
409 | 0 | if(U_FAILURE(*pErrorCode)) { |
410 | 0 | return NULL; |
411 | 0 | } |
412 | 0 | if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { |
413 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
414 | 0 | return NULL; |
415 | 0 | } |
416 | 0 | if(other->newTrie!=NULL && !other->newTrie->isCompacted) { |
417 | 0 | return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */ |
418 | 0 | } |
419 | | |
420 | | /* Clone the frozen trie by enumerating it and building a new one. */ |
421 | 0 | context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode); |
422 | 0 | if(U_FAILURE(*pErrorCode)) { |
423 | 0 | return NULL; |
424 | 0 | } |
425 | 0 | context.exclusiveLimit=FALSE; |
426 | 0 | context.errorCode=*pErrorCode; |
427 | 0 | utrie2_enum(other, NULL, copyEnumRange, &context); |
428 | 0 | *pErrorCode=context.errorCode; |
429 | 0 | for(lead=0xd800; lead<0xdc00; ++lead) { |
430 | 0 | uint32_t value; |
431 | 0 | if(other->data32==NULL) { |
432 | 0 | value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead); |
433 | 0 | } else { |
434 | 0 | value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead); |
435 | 0 | } |
436 | 0 | if(value!=other->initialValue) { |
437 | 0 | utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); |
438 | 0 | } |
439 | 0 | } |
440 | 0 | if(U_FAILURE(*pErrorCode)) { |
441 | 0 | utrie2_close(context.trie); |
442 | 0 | context.trie=NULL; |
443 | 0 | } |
444 | 0 | return context.trie; |
445 | 0 | } |
446 | | |
447 | | /* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */ |
448 | | U_CAPI UTrie2 * U_EXPORT2 |
449 | 0 | utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) { |
450 | 0 | NewTrieAndStatus context; |
451 | 0 | UChar lead; |
452 | |
|
453 | 0 | if(U_FAILURE(*pErrorCode)) { |
454 | 0 | return NULL; |
455 | 0 | } |
456 | 0 | if(trie1==NULL) { |
457 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
458 | 0 | return NULL; |
459 | 0 | } |
460 | 0 | context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode); |
461 | 0 | if(U_FAILURE(*pErrorCode)) { |
462 | 0 | return NULL; |
463 | 0 | } |
464 | 0 | context.exclusiveLimit=TRUE; |
465 | 0 | context.errorCode=*pErrorCode; |
466 | 0 | utrie_enum(trie1, NULL, copyEnumRange, &context); |
467 | 0 | *pErrorCode=context.errorCode; |
468 | 0 | for(lead=0xd800; lead<0xdc00; ++lead) { |
469 | 0 | uint32_t value; |
470 | 0 | if(trie1->data32==NULL) { |
471 | 0 | value=UTRIE_GET16_FROM_LEAD(trie1, lead); |
472 | 0 | } else { |
473 | 0 | value=UTRIE_GET32_FROM_LEAD(trie1, lead); |
474 | 0 | } |
475 | 0 | if(value!=trie1->initialValue) { |
476 | 0 | utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode); |
477 | 0 | } |
478 | 0 | } |
479 | 0 | if(U_SUCCESS(*pErrorCode)) { |
480 | 0 | utrie2_freeze(context.trie, |
481 | 0 | trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS, |
482 | 0 | pErrorCode); |
483 | 0 | } |
484 | | #ifdef UTRIE2_DEBUG |
485 | | if(U_SUCCESS(*pErrorCode)) { |
486 | | utrie_printLengths(trie1); |
487 | | utrie2_printLengths(context.trie, "fromUTrie"); |
488 | | } |
489 | | #endif |
490 | 0 | if(U_FAILURE(*pErrorCode)) { |
491 | 0 | utrie2_close(context.trie); |
492 | 0 | context.trie=NULL; |
493 | 0 | } |
494 | 0 | return context.trie; |
495 | 0 | } |
496 | | |
497 | | static inline UBool |
498 | 0 | isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { |
499 | 0 | int32_t i2, block; |
500 | |
|
501 | 0 | if(U_IS_LEAD(c) && forLSCP) { |
502 | 0 | i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+ |
503 | 0 | (c>>UTRIE2_SHIFT_2); |
504 | 0 | } else { |
505 | 0 | i2=trie->index1[c>>UTRIE2_SHIFT_1]+ |
506 | 0 | ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK); |
507 | 0 | } |
508 | 0 | block=trie->index2[i2]; |
509 | 0 | return (UBool)(block==trie->dataNullOffset); |
510 | 0 | } |
511 | | |
512 | | static int32_t |
513 | 0 | allocIndex2Block(UNewTrie2 *trie) { |
514 | 0 | int32_t newBlock, newTop; |
515 | |
|
516 | 0 | newBlock=trie->index2Length; |
517 | 0 | newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH; |
518 | 0 | if(newTop>UPRV_LENGTHOF(trie->index2)) { |
519 | | /* |
520 | | * Should never occur. |
521 | | * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect, |
522 | | * or the code writes more values than should be possible. |
523 | | */ |
524 | 0 | return -1; |
525 | 0 | } |
526 | 0 | trie->index2Length=newTop; |
527 | 0 | uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4); |
528 | 0 | return newBlock; |
529 | 0 | } |
530 | | |
531 | | static int32_t |
532 | 0 | getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { |
533 | 0 | int32_t i1, i2; |
534 | |
|
535 | 0 | if(U_IS_LEAD(c) && forLSCP) { |
536 | 0 | return UTRIE2_LSCP_INDEX_2_OFFSET; |
537 | 0 | } |
538 | | |
539 | 0 | i1=c>>UTRIE2_SHIFT_1; |
540 | 0 | i2=trie->index1[i1]; |
541 | 0 | if(i2==trie->index2NullOffset) { |
542 | 0 | i2=allocIndex2Block(trie); |
543 | 0 | if(i2<0) { |
544 | 0 | return -1; /* program error */ |
545 | 0 | } |
546 | 0 | trie->index1[i1]=i2; |
547 | 0 | } |
548 | 0 | return i2; |
549 | 0 | } |
550 | | |
551 | | static int32_t |
552 | 0 | allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) { |
553 | 0 | int32_t newBlock, newTop; |
554 | |
|
555 | 0 | if(trie->firstFreeBlock!=0) { |
556 | | /* get the first free block */ |
557 | 0 | newBlock=trie->firstFreeBlock; |
558 | 0 | trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2]; |
559 | 0 | } else { |
560 | | /* get a new block from the high end */ |
561 | 0 | newBlock=trie->dataLength; |
562 | 0 | newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH; |
563 | 0 | if(newTop>trie->dataCapacity) { |
564 | | /* out of memory in the data array */ |
565 | 0 | int32_t capacity; |
566 | 0 | uint32_t *data; |
567 | |
|
568 | 0 | if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) { |
569 | 0 | capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH; |
570 | 0 | } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) { |
571 | 0 | capacity=UNEWTRIE2_MAX_DATA_LENGTH; |
572 | 0 | } else { |
573 | | /* |
574 | | * Should never occur. |
575 | | * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect, |
576 | | * or the code writes more values than should be possible. |
577 | | */ |
578 | 0 | return -1; |
579 | 0 | } |
580 | 0 | data=(uint32_t *)uprv_malloc(capacity*4); |
581 | 0 | if(data==NULL) { |
582 | 0 | return -1; |
583 | 0 | } |
584 | 0 | uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4); |
585 | 0 | uprv_free(trie->data); |
586 | 0 | trie->data=data; |
587 | 0 | trie->dataCapacity=capacity; |
588 | 0 | } |
589 | 0 | trie->dataLength=newTop; |
590 | 0 | } |
591 | 0 | uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4); |
592 | 0 | trie->map[newBlock>>UTRIE2_SHIFT_2]=0; |
593 | 0 | return newBlock; |
594 | 0 | } |
595 | | |
596 | | /* call when the block's reference counter reaches 0 */ |
597 | | static void |
598 | 0 | releaseDataBlock(UNewTrie2 *trie, int32_t block) { |
599 | | /* put this block at the front of the free-block chain */ |
600 | 0 | trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock; |
601 | 0 | trie->firstFreeBlock=block; |
602 | 0 | } |
603 | | |
604 | | static inline UBool |
605 | 0 | isWritableBlock(UNewTrie2 *trie, int32_t block) { |
606 | 0 | return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]); |
607 | 0 | } |
608 | | |
609 | | static inline void |
610 | 0 | setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) { |
611 | 0 | int32_t oldBlock; |
612 | 0 | ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */ |
613 | 0 | oldBlock=trie->index2[i2]; |
614 | 0 | if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) { |
615 | 0 | releaseDataBlock(trie, oldBlock); |
616 | 0 | } |
617 | 0 | trie->index2[i2]=block; |
618 | 0 | } |
619 | | |
620 | | /** |
621 | | * No error checking for illegal arguments. |
622 | | * |
623 | | * @return -1 if no new data block available (out of memory in data array) |
624 | | * @internal |
625 | | */ |
626 | | static int32_t |
627 | 0 | getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { |
628 | 0 | int32_t i2, oldBlock, newBlock; |
629 | |
|
630 | 0 | i2=getIndex2Block(trie, c, forLSCP); |
631 | 0 | if(i2<0) { |
632 | 0 | return -1; /* program error */ |
633 | 0 | } |
634 | | |
635 | 0 | i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; |
636 | 0 | oldBlock=trie->index2[i2]; |
637 | 0 | if(isWritableBlock(trie, oldBlock)) { |
638 | 0 | return oldBlock; |
639 | 0 | } |
640 | | |
641 | | /* allocate a new data block */ |
642 | 0 | newBlock=allocDataBlock(trie, oldBlock); |
643 | 0 | if(newBlock<0) { |
644 | | /* out of memory in the data array */ |
645 | 0 | return -1; |
646 | 0 | } |
647 | 0 | setIndex2Entry(trie, i2, newBlock); |
648 | 0 | return newBlock; |
649 | 0 | } |
650 | | |
651 | | /** |
652 | | * @return TRUE if the value was successfully set |
653 | | */ |
654 | | static void |
655 | | set32(UNewTrie2 *trie, |
656 | | UChar32 c, UBool forLSCP, uint32_t value, |
657 | 0 | UErrorCode *pErrorCode) { |
658 | 0 | int32_t block; |
659 | |
|
660 | 0 | if(trie==NULL || trie->isCompacted) { |
661 | 0 | *pErrorCode=U_NO_WRITE_PERMISSION; |
662 | 0 | return; |
663 | 0 | } |
664 | | #ifdef UCPTRIE_DEBUG |
665 | | umutablecptrie_set(trie->t3, c, value, pErrorCode); |
666 | | #endif |
667 | | |
668 | 0 | block=getDataBlock(trie, c, forLSCP); |
669 | 0 | if(block<0) { |
670 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
671 | 0 | return; |
672 | 0 | } |
673 | | |
674 | 0 | trie->data[block+(c&UTRIE2_DATA_MASK)]=value; |
675 | 0 | } |
676 | | |
677 | | U_CAPI void U_EXPORT2 |
678 | 0 | utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { |
679 | 0 | if(U_FAILURE(*pErrorCode)) { |
680 | 0 | return; |
681 | 0 | } |
682 | 0 | if((uint32_t)c>0x10ffff) { |
683 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
684 | 0 | return; |
685 | 0 | } |
686 | 0 | set32(trie->newTrie, c, TRUE, value, pErrorCode); |
687 | 0 | } |
688 | | |
689 | | U_CAPI void U_EXPORT2 |
690 | | utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie, |
691 | | UChar32 c, uint32_t value, |
692 | 0 | UErrorCode *pErrorCode) { |
693 | 0 | if(U_FAILURE(*pErrorCode)) { |
694 | 0 | return; |
695 | 0 | } |
696 | 0 | if(!U_IS_LEAD(c)) { |
697 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
698 | 0 | return; |
699 | 0 | } |
700 | 0 | set32(trie->newTrie, c, FALSE, value, pErrorCode); |
701 | 0 | } |
702 | | |
703 | | static void |
704 | 0 | writeBlock(uint32_t *block, uint32_t value) { |
705 | 0 | uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH; |
706 | 0 | while(block<limit) { |
707 | 0 | *block++=value; |
708 | 0 | } |
709 | 0 | } |
710 | | |
711 | | /** |
712 | | * initialValue is ignored if overwrite=TRUE |
713 | | * @internal |
714 | | */ |
715 | | static void |
716 | | fillBlock(uint32_t *block, UChar32 start, UChar32 limit, |
717 | 0 | uint32_t value, uint32_t initialValue, UBool overwrite) { |
718 | 0 | uint32_t *pLimit; |
719 | |
|
720 | 0 | pLimit=block+limit; |
721 | 0 | block+=start; |
722 | 0 | if(overwrite) { |
723 | 0 | while(block<pLimit) { |
724 | 0 | *block++=value; |
725 | 0 | } |
726 | 0 | } else { |
727 | 0 | while(block<pLimit) { |
728 | 0 | if(*block==initialValue) { |
729 | 0 | *block=value; |
730 | 0 | } |
731 | 0 | ++block; |
732 | 0 | } |
733 | 0 | } |
734 | 0 | } |
735 | | |
736 | | U_CAPI void U_EXPORT2 |
737 | | utrie2_setRange32(UTrie2 *trie, |
738 | | UChar32 start, UChar32 end, |
739 | | uint32_t value, UBool overwrite, |
740 | 0 | UErrorCode *pErrorCode) { |
741 | | /* |
742 | | * repeat value in [start..end] |
743 | | * mark index values for repeat-data blocks by setting bit 31 of the index values |
744 | | * fill around existing values if any, if(overwrite) |
745 | | */ |
746 | 0 | UNewTrie2 *newTrie; |
747 | 0 | int32_t block, rest, repeatBlock; |
748 | 0 | UChar32 limit; |
749 | |
|
750 | 0 | if(U_FAILURE(*pErrorCode)) { |
751 | 0 | return; |
752 | 0 | } |
753 | 0 | if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) { |
754 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
755 | 0 | return; |
756 | 0 | } |
757 | 0 | newTrie=trie->newTrie; |
758 | 0 | if(newTrie==NULL || newTrie->isCompacted) { |
759 | 0 | *pErrorCode=U_NO_WRITE_PERMISSION; |
760 | 0 | return; |
761 | 0 | } |
762 | | #ifdef UCPTRIE_DEBUG |
763 | | umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode); |
764 | | #endif |
765 | 0 | if(!overwrite && value==newTrie->initialValue) { |
766 | 0 | return; /* nothing to do */ |
767 | 0 | } |
768 | | |
769 | 0 | limit=end+1; |
770 | 0 | if(start&UTRIE2_DATA_MASK) { |
771 | 0 | UChar32 nextStart; |
772 | | |
773 | | /* set partial block at [start..following block boundary[ */ |
774 | 0 | block=getDataBlock(newTrie, start, TRUE); |
775 | 0 | if(block<0) { |
776 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
777 | 0 | return; |
778 | 0 | } |
779 | | |
780 | 0 | nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK; |
781 | 0 | if(nextStart<=limit) { |
782 | 0 | fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH, |
783 | 0 | value, newTrie->initialValue, overwrite); |
784 | 0 | start=nextStart; |
785 | 0 | } else { |
786 | 0 | fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK, |
787 | 0 | value, newTrie->initialValue, overwrite); |
788 | 0 | return; |
789 | 0 | } |
790 | 0 | } |
791 | | |
792 | | /* number of positions in the last, partial block */ |
793 | 0 | rest=limit&UTRIE2_DATA_MASK; |
794 | | |
795 | | /* round down limit to a block boundary */ |
796 | 0 | limit&=~UTRIE2_DATA_MASK; |
797 | | |
798 | | /* iterate over all-value blocks */ |
799 | 0 | if(value==newTrie->initialValue) { |
800 | 0 | repeatBlock=newTrie->dataNullOffset; |
801 | 0 | } else { |
802 | 0 | repeatBlock=-1; |
803 | 0 | } |
804 | |
|
805 | 0 | while(start<limit) { |
806 | 0 | int32_t i2; |
807 | 0 | UBool setRepeatBlock=FALSE; |
808 | |
|
809 | 0 | if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) { |
810 | 0 | start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */ |
811 | 0 | continue; |
812 | 0 | } |
813 | | |
814 | | /* get index value */ |
815 | 0 | i2=getIndex2Block(newTrie, start, TRUE); |
816 | 0 | if(i2<0) { |
817 | 0 | *pErrorCode=U_INTERNAL_PROGRAM_ERROR; |
818 | 0 | return; |
819 | 0 | } |
820 | 0 | i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK; |
821 | 0 | block=newTrie->index2[i2]; |
822 | 0 | if(isWritableBlock(newTrie, block)) { |
823 | | /* already allocated */ |
824 | 0 | if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) { |
825 | | /* |
826 | | * We overwrite all values, and it's not a |
827 | | * protected (ASCII-linear or 2-byte UTF-8) block: |
828 | | * replace with the repeatBlock. |
829 | | */ |
830 | 0 | setRepeatBlock=TRUE; |
831 | 0 | } else { |
832 | | /* !overwrite, or protected block: just write the values into this block */ |
833 | 0 | fillBlock(newTrie->data+block, |
834 | 0 | 0, UTRIE2_DATA_BLOCK_LENGTH, |
835 | 0 | value, newTrie->initialValue, overwrite); |
836 | 0 | } |
837 | 0 | } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) { |
838 | | /* |
839 | | * Set the repeatBlock instead of the null block or previous repeat block: |
840 | | * |
841 | | * If !isWritableBlock() then all entries in the block have the same value |
842 | | * because it's the null block or a range block (the repeatBlock from a previous |
843 | | * call to utrie2_setRange32()). |
844 | | * No other blocks are used multiple times before compacting. |
845 | | * |
846 | | * The null block is the only non-writable block with the initialValue because |
847 | | * of the repeatBlock initialization above. (If value==initialValue, then |
848 | | * the repeatBlock will be the null data block.) |
849 | | * |
850 | | * We set our repeatBlock if the desired value differs from the block's value, |
851 | | * and if we overwrite any data or if the data is all initial values |
852 | | * (which is the same as the block being the null block, see above). |
853 | | */ |
854 | 0 | setRepeatBlock=TRUE; |
855 | 0 | } |
856 | 0 | if(setRepeatBlock) { |
857 | 0 | if(repeatBlock>=0) { |
858 | 0 | setIndex2Entry(newTrie, i2, repeatBlock); |
859 | 0 | } else { |
860 | | /* create and set and fill the repeatBlock */ |
861 | 0 | repeatBlock=getDataBlock(newTrie, start, TRUE); |
862 | 0 | if(repeatBlock<0) { |
863 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
864 | 0 | return; |
865 | 0 | } |
866 | 0 | writeBlock(newTrie->data+repeatBlock, value); |
867 | 0 | } |
868 | 0 | } |
869 | | |
870 | 0 | start+=UTRIE2_DATA_BLOCK_LENGTH; |
871 | 0 | } |
872 | | |
873 | 0 | if(rest>0) { |
874 | | /* set partial block at [last block boundary..limit[ */ |
875 | 0 | block=getDataBlock(newTrie, start, TRUE); |
876 | 0 | if(block<0) { |
877 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
878 | 0 | return; |
879 | 0 | } |
880 | | |
881 | 0 | fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite); |
882 | 0 | } |
883 | | |
884 | 0 | return; |
885 | 0 | } |
886 | | |
887 | | /* compaction --------------------------------------------------------------- */ |
888 | | |
889 | | static inline UBool |
890 | 0 | equal_int32(const int32_t *s, const int32_t *t, int32_t length) { |
891 | 0 | while(length>0 && *s==*t) { |
892 | 0 | ++s; |
893 | 0 | ++t; |
894 | 0 | --length; |
895 | 0 | } |
896 | 0 | return (UBool)(length==0); |
897 | 0 | } |
898 | | |
899 | | static inline UBool |
900 | 0 | equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { |
901 | 0 | while(length>0 && *s==*t) { |
902 | 0 | ++s; |
903 | 0 | ++t; |
904 | 0 | --length; |
905 | 0 | } |
906 | 0 | return (UBool)(length==0); |
907 | 0 | } |
908 | | |
909 | | static int32_t |
910 | 0 | findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) { |
911 | 0 | int32_t block; |
912 | | |
913 | | /* ensure that we do not even partially get past index2Length */ |
914 | 0 | index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH; |
915 | |
|
916 | 0 | for(block=0; block<=index2Length; ++block) { |
917 | 0 | if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) { |
918 | 0 | return block; |
919 | 0 | } |
920 | 0 | } |
921 | 0 | return -1; |
922 | 0 | } |
923 | | |
924 | | static int32_t |
925 | 0 | findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) { |
926 | 0 | int32_t block; |
927 | | |
928 | | /* ensure that we do not even partially get past dataLength */ |
929 | 0 | dataLength-=blockLength; |
930 | |
|
931 | 0 | for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) { |
932 | 0 | if(equal_uint32(data+block, data+otherBlock, blockLength)) { |
933 | 0 | return block; |
934 | 0 | } |
935 | 0 | } |
936 | 0 | return -1; |
937 | 0 | } |
938 | | |
939 | | /* |
940 | | * Find the start of the last range in the trie by enumerating backward. |
941 | | * Indexes for supplementary code points higher than this will be omitted. |
942 | | */ |
943 | | static UChar32 |
944 | 0 | findHighStart(UNewTrie2 *trie, uint32_t highValue) { |
945 | 0 | const uint32_t *data32; |
946 | |
|
947 | 0 | uint32_t value, initialValue; |
948 | 0 | UChar32 c, prev; |
949 | 0 | int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock; |
950 | |
|
951 | 0 | data32=trie->data; |
952 | 0 | initialValue=trie->initialValue; |
953 | |
|
954 | 0 | index2NullOffset=trie->index2NullOffset; |
955 | 0 | nullBlock=trie->dataNullOffset; |
956 | | |
957 | | /* set variables for previous range */ |
958 | 0 | if(highValue==initialValue) { |
959 | 0 | prevI2Block=index2NullOffset; |
960 | 0 | prevBlock=nullBlock; |
961 | 0 | } else { |
962 | 0 | prevI2Block=-1; |
963 | 0 | prevBlock=-1; |
964 | 0 | } |
965 | 0 | prev=0x110000; |
966 | | |
967 | | /* enumerate index-2 blocks */ |
968 | 0 | i1=UNEWTRIE2_INDEX_1_LENGTH; |
969 | 0 | c=prev; |
970 | 0 | while(c>0) { |
971 | 0 | i2Block=trie->index1[--i1]; |
972 | 0 | if(i2Block==prevI2Block) { |
973 | | /* the index-2 block is the same as the previous one, and filled with highValue */ |
974 | 0 | c-=UTRIE2_CP_PER_INDEX_1_ENTRY; |
975 | 0 | continue; |
976 | 0 | } |
977 | 0 | prevI2Block=i2Block; |
978 | 0 | if(i2Block==index2NullOffset) { |
979 | | /* this is the null index-2 block */ |
980 | 0 | if(highValue!=initialValue) { |
981 | 0 | return c; |
982 | 0 | } |
983 | 0 | c-=UTRIE2_CP_PER_INDEX_1_ENTRY; |
984 | 0 | } else { |
985 | | /* enumerate data blocks for one index-2 block */ |
986 | 0 | for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) { |
987 | 0 | block=trie->index2[i2Block+ --i2]; |
988 | 0 | if(block==prevBlock) { |
989 | | /* the block is the same as the previous one, and filled with highValue */ |
990 | 0 | c-=UTRIE2_DATA_BLOCK_LENGTH; |
991 | 0 | continue; |
992 | 0 | } |
993 | 0 | prevBlock=block; |
994 | 0 | if(block==nullBlock) { |
995 | | /* this is the null data block */ |
996 | 0 | if(highValue!=initialValue) { |
997 | 0 | return c; |
998 | 0 | } |
999 | 0 | c-=UTRIE2_DATA_BLOCK_LENGTH; |
1000 | 0 | } else { |
1001 | 0 | for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) { |
1002 | 0 | value=data32[block+ --j]; |
1003 | 0 | if(value!=highValue) { |
1004 | 0 | return c; |
1005 | 0 | } |
1006 | 0 | --c; |
1007 | 0 | } |
1008 | 0 | } |
1009 | 0 | } |
1010 | 0 | } |
1011 | 0 | } |
1012 | | |
1013 | | /* deliver last range */ |
1014 | 0 | return 0; |
1015 | 0 | } |
1016 | | |
1017 | | /* |
1018 | | * Compact a build-time trie. |
1019 | | * |
1020 | | * The compaction |
1021 | | * - removes blocks that are identical with earlier ones |
1022 | | * - overlaps adjacent blocks as much as possible (if overlap==TRUE) |
1023 | | * - moves blocks in steps of the data granularity |
1024 | | * - moves and overlaps blocks that overlap with multiple values in the overlap region |
1025 | | * |
1026 | | * It does not |
1027 | | * - try to move and overlap blocks that are not already adjacent |
1028 | | */ |
1029 | | static void |
1030 | 0 | compactData(UNewTrie2 *trie) { |
1031 | | #ifdef UTRIE2_DEBUG |
1032 | | int32_t countSame=0, sumOverlaps=0; |
1033 | | #endif |
1034 | |
|
1035 | 0 | int32_t start, newStart, movedStart; |
1036 | 0 | int32_t blockLength, overlap; |
1037 | 0 | int32_t i, mapIndex, blockCount; |
1038 | | |
1039 | | /* do not compact linear-ASCII data */ |
1040 | 0 | newStart=UTRIE2_DATA_START_OFFSET; |
1041 | 0 | for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) { |
1042 | 0 | trie->map[i]=start; |
1043 | 0 | } |
1044 | | |
1045 | | /* |
1046 | | * Start with a block length of 64 for 2-byte UTF-8, |
1047 | | * then switch to UTRIE2_DATA_BLOCK_LENGTH. |
1048 | | */ |
1049 | 0 | blockLength=64; |
1050 | 0 | blockCount=blockLength>>UTRIE2_SHIFT_2; |
1051 | 0 | for(start=newStart; start<trie->dataLength;) { |
1052 | | /* |
1053 | | * start: index of first entry of current block |
1054 | | * newStart: index where the current block is to be moved |
1055 | | * (right after current end of already-compacted data) |
1056 | | */ |
1057 | 0 | if(start==UNEWTRIE2_DATA_0800_OFFSET) { |
1058 | 0 | blockLength=UTRIE2_DATA_BLOCK_LENGTH; |
1059 | 0 | blockCount=1; |
1060 | 0 | } |
1061 | | |
1062 | | /* skip blocks that are not used */ |
1063 | 0 | if(trie->map[start>>UTRIE2_SHIFT_2]<=0) { |
1064 | | /* advance start to the next block */ |
1065 | 0 | start+=blockLength; |
1066 | | |
1067 | | /* leave newStart with the previous block! */ |
1068 | 0 | continue; |
1069 | 0 | } |
1070 | | |
1071 | | /* search for an identical block */ |
1072 | 0 | if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength)) |
1073 | 0 | >=0 |
1074 | 0 | ) { |
1075 | | #ifdef UTRIE2_DEBUG |
1076 | | ++countSame; |
1077 | | #endif |
1078 | | /* found an identical block, set the other block's index value for the current block */ |
1079 | 0 | for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { |
1080 | 0 | trie->map[mapIndex++]=movedStart; |
1081 | 0 | movedStart+=UTRIE2_DATA_BLOCK_LENGTH; |
1082 | 0 | } |
1083 | | |
1084 | | /* advance start to the next block */ |
1085 | 0 | start+=blockLength; |
1086 | | |
1087 | | /* leave newStart with the previous block! */ |
1088 | 0 | continue; |
1089 | 0 | } |
1090 | | |
1091 | | /* see if the beginning of this block can be overlapped with the end of the previous block */ |
1092 | | /* look for maximum overlap (modulo granularity) with the previous, adjacent block */ |
1093 | 0 | for(overlap=blockLength-UTRIE2_DATA_GRANULARITY; |
1094 | 0 | overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap); |
1095 | 0 | overlap-=UTRIE2_DATA_GRANULARITY) {} |
1096 | |
|
1097 | | #ifdef UTRIE2_DEBUG |
1098 | | sumOverlaps+=overlap; |
1099 | | #endif |
1100 | 0 | if(overlap>0 || newStart<start) { |
1101 | | /* some overlap, or just move the whole block */ |
1102 | 0 | movedStart=newStart-overlap; |
1103 | 0 | for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { |
1104 | 0 | trie->map[mapIndex++]=movedStart; |
1105 | 0 | movedStart+=UTRIE2_DATA_BLOCK_LENGTH; |
1106 | 0 | } |
1107 | | |
1108 | | /* move the non-overlapping indexes to their new positions */ |
1109 | 0 | start+=overlap; |
1110 | 0 | for(i=blockLength-overlap; i>0; --i) { |
1111 | 0 | trie->data[newStart++]=trie->data[start++]; |
1112 | 0 | } |
1113 | 0 | } else /* no overlap && newStart==start */ { |
1114 | 0 | for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { |
1115 | 0 | trie->map[mapIndex++]=start; |
1116 | 0 | start+=UTRIE2_DATA_BLOCK_LENGTH; |
1117 | 0 | } |
1118 | 0 | newStart=start; |
1119 | 0 | } |
1120 | 0 | } |
1121 | | |
1122 | | /* now adjust the index-2 table */ |
1123 | 0 | for(i=0; i<trie->index2Length; ++i) { |
1124 | 0 | if(i==UNEWTRIE2_INDEX_GAP_OFFSET) { |
1125 | | /* Gap indexes are invalid (-1). Skip over the gap. */ |
1126 | 0 | i+=UNEWTRIE2_INDEX_GAP_LENGTH; |
1127 | 0 | } |
1128 | 0 | trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2]; |
1129 | 0 | } |
1130 | 0 | trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2]; |
1131 | | |
1132 | | /* ensure dataLength alignment */ |
1133 | 0 | while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) { |
1134 | 0 | trie->data[newStart++]=trie->initialValue; |
1135 | 0 | } |
1136 | |
|
1137 | | #ifdef UTRIE2_DEBUG |
1138 | | /* we saved some space */ |
1139 | | printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n", |
1140 | | (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps); |
1141 | | #endif |
1142 | |
|
1143 | 0 | trie->dataLength=newStart; |
1144 | 0 | } |
1145 | | |
1146 | | static void |
1147 | 0 | compactIndex2(UNewTrie2 *trie) { |
1148 | 0 | int32_t i, start, newStart, movedStart, overlap; |
1149 | | |
1150 | | /* do not compact linear-BMP index-2 blocks */ |
1151 | 0 | newStart=UTRIE2_INDEX_2_BMP_LENGTH; |
1152 | 0 | for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) { |
1153 | 0 | trie->map[i]=start; |
1154 | 0 | } |
1155 | | |
1156 | | /* Reduce the index table gap to what will be needed at runtime. */ |
1157 | 0 | newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1); |
1158 | |
|
1159 | 0 | for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) { |
1160 | | /* |
1161 | | * start: index of first entry of current block |
1162 | | * newStart: index where the current block is to be moved |
1163 | | * (right after current end of already-compacted data) |
1164 | | */ |
1165 | | |
1166 | | /* search for an identical block */ |
1167 | 0 | if( (movedStart=findSameIndex2Block(trie->index2, newStart, start)) |
1168 | 0 | >=0 |
1169 | 0 | ) { |
1170 | | /* found an identical block, set the other block's index value for the current block */ |
1171 | 0 | trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart; |
1172 | | |
1173 | | /* advance start to the next block */ |
1174 | 0 | start+=UTRIE2_INDEX_2_BLOCK_LENGTH; |
1175 | | |
1176 | | /* leave newStart with the previous block! */ |
1177 | 0 | continue; |
1178 | 0 | } |
1179 | | |
1180 | | /* see if the beginning of this block can be overlapped with the end of the previous block */ |
1181 | | /* look for maximum overlap with the previous, adjacent block */ |
1182 | 0 | for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1; |
1183 | 0 | overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap); |
1184 | 0 | --overlap) {} |
1185 | |
|
1186 | 0 | if(overlap>0 || newStart<start) { |
1187 | | /* some overlap, or just move the whole block */ |
1188 | 0 | trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap; |
1189 | | |
1190 | | /* move the non-overlapping indexes to their new positions */ |
1191 | 0 | start+=overlap; |
1192 | 0 | for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) { |
1193 | 0 | trie->index2[newStart++]=trie->index2[start++]; |
1194 | 0 | } |
1195 | 0 | } else /* no overlap && newStart==start */ { |
1196 | 0 | trie->map[start>>UTRIE2_SHIFT_1_2]=start; |
1197 | 0 | start+=UTRIE2_INDEX_2_BLOCK_LENGTH; |
1198 | 0 | newStart=start; |
1199 | 0 | } |
1200 | 0 | } |
1201 | | |
1202 | | /* now adjust the index-1 table */ |
1203 | 0 | for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { |
1204 | 0 | trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2]; |
1205 | 0 | } |
1206 | 0 | trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2]; |
1207 | | |
1208 | | /* |
1209 | | * Ensure data table alignment: |
1210 | | * Needs to be granularity-aligned for 16-bit trie |
1211 | | * (so that dataMove will be down-shiftable), |
1212 | | * and 2-aligned for uint32_t data. |
1213 | | */ |
1214 | 0 | while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) { |
1215 | | /* Arbitrary value: 0x3fffc not possible for real data. */ |
1216 | 0 | trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT; |
1217 | 0 | } |
1218 | |
|
1219 | | #ifdef UTRIE2_DEBUG |
1220 | | /* we saved some space */ |
1221 | | printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n", |
1222 | | (long)trie->index2Length, (long)newStart); |
1223 | | #endif |
1224 | |
|
1225 | 0 | trie->index2Length=newStart; |
1226 | 0 | } |
1227 | | |
1228 | | static void |
1229 | 0 | compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { |
1230 | 0 | UNewTrie2 *newTrie; |
1231 | 0 | UChar32 highStart, suppHighStart; |
1232 | 0 | uint32_t highValue; |
1233 | |
|
1234 | 0 | newTrie=trie->newTrie; |
1235 | | |
1236 | | /* find highStart and round it up */ |
1237 | 0 | highValue=utrie2_get32(trie, 0x10ffff); |
1238 | 0 | highStart=findHighStart(newTrie, highValue); |
1239 | 0 | highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1); |
1240 | 0 | if(highStart==0x110000) { |
1241 | 0 | highValue=trie->errorValue; |
1242 | 0 | } |
1243 | | |
1244 | | /* |
1245 | | * Set trie->highStart only after utrie2_get32(trie, highStart). |
1246 | | * Otherwise utrie2_get32(trie, highStart) would try to read the highValue. |
1247 | | */ |
1248 | 0 | trie->highStart=newTrie->highStart=highStart; |
1249 | |
|
1250 | | #ifdef UTRIE2_DEBUG |
1251 | | printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n", |
1252 | | (long)highStart, (long)highValue, (long)trie->initialValue); |
1253 | | #endif |
1254 | |
|
1255 | 0 | if(highStart<0x110000) { |
1256 | | /* Blank out [highStart..10ffff] to release associated data blocks. */ |
1257 | 0 | suppHighStart= highStart<=0x10000 ? 0x10000 : highStart; |
1258 | 0 | utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode); |
1259 | 0 | if(U_FAILURE(*pErrorCode)) { |
1260 | 0 | return; |
1261 | 0 | } |
1262 | 0 | } |
1263 | | |
1264 | 0 | compactData(newTrie); |
1265 | 0 | if(highStart>0x10000) { |
1266 | 0 | compactIndex2(newTrie); |
1267 | | #ifdef UTRIE2_DEBUG |
1268 | | } else { |
1269 | | printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n", |
1270 | | (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET); |
1271 | | #endif |
1272 | 0 | } |
1273 | | |
1274 | | /* |
1275 | | * Store the highValue in the data array and round up the dataLength. |
1276 | | * Must be done after compactData() because that assumes that dataLength |
1277 | | * is a multiple of UTRIE2_DATA_BLOCK_LENGTH. |
1278 | | */ |
1279 | 0 | newTrie->data[newTrie->dataLength++]=highValue; |
1280 | 0 | while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) { |
1281 | 0 | newTrie->data[newTrie->dataLength++]=trie->initialValue; |
1282 | 0 | } |
1283 | |
|
1284 | 0 | newTrie->isCompacted=TRUE; |
1285 | 0 | } |
1286 | | |
1287 | | /* serialization ------------------------------------------------------------ */ |
1288 | | |
1289 | | /** |
1290 | | * Maximum length of the runtime index array. |
1291 | | * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength. |
1292 | | * (The actual maximum length is lower, |
1293 | | * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.) |
1294 | | */ |
1295 | 0 | #define UTRIE2_MAX_INDEX_LENGTH 0xffff |
1296 | | |
1297 | | /** |
1298 | | * Maximum length of the runtime data array. |
1299 | | * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT, |
1300 | | * and by uint16_t UTrie2Header.shiftedDataLength. |
1301 | | */ |
1302 | 0 | #define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT) |
1303 | | |
1304 | | /* Compact and internally serialize the trie. */ |
1305 | | U_CAPI void U_EXPORT2 |
1306 | 0 | utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { |
1307 | 0 | UNewTrie2 *newTrie; |
1308 | 0 | UTrie2Header *header; |
1309 | 0 | uint32_t *p; |
1310 | 0 | uint16_t *dest16; |
1311 | 0 | int32_t i, length; |
1312 | 0 | int32_t allIndexesLength; |
1313 | 0 | int32_t dataMove; /* >0 if the data is moved to the end of the index array */ |
1314 | 0 | UChar32 highStart; |
1315 | | |
1316 | | /* argument check */ |
1317 | 0 | if(U_FAILURE(*pErrorCode)) { |
1318 | 0 | return; |
1319 | 0 | } |
1320 | 0 | if( trie==NULL || |
1321 | 0 | valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits |
1322 | 0 | ) { |
1323 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
1324 | 0 | return; |
1325 | 0 | } |
1326 | 0 | newTrie=trie->newTrie; |
1327 | 0 | if(newTrie==NULL) { |
1328 | | /* already frozen */ |
1329 | 0 | UTrie2ValueBits frozenValueBits= |
1330 | 0 | trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS; |
1331 | 0 | if(valueBits!=frozenValueBits) { |
1332 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
1333 | 0 | } |
1334 | 0 | return; |
1335 | 0 | } |
1336 | | |
1337 | | /* compact if necessary */ |
1338 | 0 | if(!newTrie->isCompacted) { |
1339 | 0 | compactTrie(trie, pErrorCode); |
1340 | 0 | if(U_FAILURE(*pErrorCode)) { |
1341 | 0 | return; |
1342 | 0 | } |
1343 | 0 | } |
1344 | 0 | highStart=trie->highStart; |
1345 | |
|
1346 | 0 | if(highStart<=0x10000) { |
1347 | 0 | allIndexesLength=UTRIE2_INDEX_1_OFFSET; |
1348 | 0 | } else { |
1349 | 0 | allIndexesLength=newTrie->index2Length; |
1350 | 0 | } |
1351 | 0 | if(valueBits==UTRIE2_16_VALUE_BITS) { |
1352 | 0 | dataMove=allIndexesLength; |
1353 | 0 | } else { |
1354 | 0 | dataMove=0; |
1355 | 0 | } |
1356 | | |
1357 | | /* are indexLength and dataLength within limits? */ |
1358 | 0 | if( /* for unshifted indexLength */ |
1359 | 0 | allIndexesLength>UTRIE2_MAX_INDEX_LENGTH || |
1360 | | /* for unshifted dataNullOffset */ |
1361 | 0 | (dataMove+newTrie->dataNullOffset)>0xffff || |
1362 | | /* for unshifted 2-byte UTF-8 index-2 values */ |
1363 | 0 | (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff || |
1364 | | /* for shiftedDataLength */ |
1365 | 0 | (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH |
1366 | 0 | ) { |
1367 | 0 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
1368 | 0 | return; |
1369 | 0 | } |
1370 | | |
1371 | | /* calculate the total serialized length */ |
1372 | 0 | length=sizeof(UTrie2Header)+allIndexesLength*2; |
1373 | 0 | if(valueBits==UTRIE2_16_VALUE_BITS) { |
1374 | 0 | length+=newTrie->dataLength*2; |
1375 | 0 | } else { |
1376 | 0 | length+=newTrie->dataLength*4; |
1377 | 0 | } |
1378 | |
|
1379 | 0 | trie->memory=uprv_malloc(length); |
1380 | 0 | if(trie->memory==NULL) { |
1381 | 0 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
1382 | 0 | return; |
1383 | 0 | } |
1384 | 0 | trie->length=length; |
1385 | 0 | trie->isMemoryOwned=TRUE; |
1386 | |
|
1387 | 0 | trie->indexLength=allIndexesLength; |
1388 | 0 | trie->dataLength=newTrie->dataLength; |
1389 | 0 | if(highStart<=0x10000) { |
1390 | 0 | trie->index2NullOffset=0xffff; |
1391 | 0 | } else { |
1392 | 0 | trie->index2NullOffset=static_cast<uint16_t>(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset); |
1393 | 0 | } |
1394 | 0 | trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset); |
1395 | 0 | trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY; |
1396 | | |
1397 | | /* set the header fields */ |
1398 | 0 | header=(UTrie2Header *)trie->memory; |
1399 | |
|
1400 | 0 | header->signature=UTRIE2_SIG; /* "Tri2" */ |
1401 | 0 | header->options=(uint16_t)valueBits; |
1402 | |
|
1403 | 0 | header->indexLength=(uint16_t)trie->indexLength; |
1404 | 0 | header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT); |
1405 | 0 | header->index2NullOffset=trie->index2NullOffset; |
1406 | 0 | header->dataNullOffset=trie->dataNullOffset; |
1407 | 0 | header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1); |
1408 | | |
1409 | | /* fill the index and data arrays */ |
1410 | 0 | dest16=(uint16_t *)(header+1); |
1411 | 0 | trie->index=dest16; |
1412 | | |
1413 | | /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */ |
1414 | 0 | p=(uint32_t *)newTrie->index2; |
1415 | 0 | for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) { |
1416 | 0 | *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); |
1417 | 0 | } |
1418 | | |
1419 | | /* write UTF-8 2-byte index-2 values, not right-shifted */ |
1420 | 0 | for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */ |
1421 | 0 | *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET); |
1422 | 0 | } |
1423 | 0 | for(; i<(0xe0-0xc0); ++i) { /* C2..DF */ |
1424 | 0 | *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]); |
1425 | 0 | } |
1426 | |
|
1427 | 0 | if(highStart>0x10000) { |
1428 | 0 | int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1; |
1429 | 0 | int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length; |
1430 | | |
1431 | | /* write 16-bit index-1 values for supplementary code points */ |
1432 | 0 | p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH; |
1433 | 0 | for(i=index1Length; i>0; --i) { |
1434 | 0 | *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++); |
1435 | 0 | } |
1436 | | |
1437 | | /* |
1438 | | * write the index-2 array values for supplementary code points, |
1439 | | * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove |
1440 | | */ |
1441 | 0 | p=(uint32_t *)newTrie->index2+index2Offset; |
1442 | 0 | for(i=newTrie->index2Length-index2Offset; i>0; --i) { |
1443 | 0 | *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT); |
1444 | 0 | } |
1445 | 0 | } |
1446 | | |
1447 | | /* write the 16/32-bit data array */ |
1448 | 0 | switch(valueBits) { |
1449 | 0 | case UTRIE2_16_VALUE_BITS: |
1450 | | /* write 16-bit data values */ |
1451 | 0 | trie->data16=dest16; |
1452 | 0 | trie->data32=NULL; |
1453 | 0 | p=newTrie->data; |
1454 | 0 | for(i=newTrie->dataLength; i>0; --i) { |
1455 | 0 | *dest16++=(uint16_t)*p++; |
1456 | 0 | } |
1457 | 0 | break; |
1458 | 0 | case UTRIE2_32_VALUE_BITS: |
1459 | | /* write 32-bit data values */ |
1460 | 0 | trie->data16=NULL; |
1461 | 0 | trie->data32=(uint32_t *)dest16; |
1462 | 0 | uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4); |
1463 | 0 | break; |
1464 | 0 | default: |
1465 | 0 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
1466 | 0 | return; |
1467 | 0 | } |
1468 | | |
1469 | | #ifdef UTRIE2_DEBUG |
1470 | | utrie2_printLengths(trie, ""); |
1471 | | #endif |
1472 | | |
1473 | | #ifdef UCPTRIE_DEBUG |
1474 | | umutablecptrie_setName(newTrie->t3, trie->name); |
1475 | | ucptrie_close( |
1476 | | umutablecptrie_buildImmutable( |
1477 | | newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode)); |
1478 | | #endif |
1479 | | /* Delete the UNewTrie2. */ |
1480 | 0 | uprv_free(newTrie->data); |
1481 | 0 | uprv_free(newTrie); |
1482 | 0 | trie->newTrie=NULL; |
1483 | 0 | } |