/src/icu/source/common/utrie2_builder.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | ******************************************************************************  | 
5  |  | *  | 
6  |  | *   Copyright (C) 2001-2014, International Business Machines  | 
7  |  | *   Corporation and others.  All Rights Reserved.  | 
8  |  | *  | 
9  |  | ******************************************************************************  | 
10  |  | *   file name:  utrie2_builder.cpp  | 
11  |  | *   encoding:   UTF-8  | 
12  |  | *   tab size:   8 (not used)  | 
13  |  | *   indentation:4  | 
14  |  | *  | 
15  |  | *   created on: 2008sep26 (split off from utrie2.c)  | 
16  |  | *   created by: Markus W. Scherer  | 
17  |  | *  | 
18  |  | *   This is a common implementation of a Unicode trie.  | 
19  |  | *   It is a kind of compressed, serializable table of 16- or 32-bit values associated with  | 
20  |  | *   Unicode code points (0..0x10ffff).  | 
21  |  | *   This is the second common version of a Unicode trie (hence the name UTrie2).  | 
22  |  | *   See utrie2.h for a comparison.  | 
23  |  | *  | 
24  |  | *   This file contains only the builder code.  | 
25  |  | *   See utrie2.c for the runtime and enumeration code.  | 
26  |  | */  | 
27  |  | // #define UTRIE2_DEBUG  | 
28  |  | #ifdef UTRIE2_DEBUG  | 
29  |  | #   include <stdio.h>  | 
30  |  | #endif  | 
31  |  | // #define UCPTRIE_DEBUG  | 
32  |  |  | 
33  |  | #include "unicode/utypes.h"  | 
34  |  | #ifdef UCPTRIE_DEBUG  | 
35  |  | #include "unicode/ucptrie.h"  | 
36  |  | #include "unicode/umutablecptrie.h"  | 
37  |  | #include "ucptrie_impl.h"  | 
38  |  | #endif  | 
39  |  | #include "cmemory.h"  | 
40  |  | #include "utrie2.h"  | 
41  |  | #include "utrie2_impl.h"  | 
42  |  |  | 
43  |  | #include "utrie.h"  // for utrie2_fromUTrie()  | 
44  |  |  | 
45  |  | /* Implementation notes ----------------------------------------------------- */  | 
46  |  |  | 
47  |  | /*  | 
48  |  |  * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values  | 
49  |  |  * have been chosen to minimize trie sizes overall.  | 
50  |  |  * Most of the code is flexible enough to work with a range of values,  | 
51  |  |  * within certain limits.  | 
52  |  |  *  | 
53  |  |  * Exception: Support for separate values for lead surrogate code _units_  | 
54  |  |  * vs. code _points_ was added after the constants were fixed,  | 
55  |  |  * and has not been tested nor particularly designed for different constant values.  | 
56  |  |  * (Especially the utrie2_enum() code that jumps to the special LSCP index-2  | 
57  |  |  * part and back.)  | 
58  |  |  *  | 
59  |  |  * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data  | 
60  |  |  * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH  | 
61  |  |  * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction  | 
62  |  |  * remapping) stops working.  | 
63  |  |  *  | 
64  |  |  * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()  | 
65  |  |  * assumes that a single index-2 block is used for 0x400 code points  | 
66  |  |  * corresponding to one lead surrogate.  | 
67  |  |  *  | 
68  |  |  * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains  | 
69  |  |  * more than one Unicode plane, and the split of the index-2 table into a BMP  | 
70  |  |  * part and a supplementary part, with a gap in between, would not work.  | 
71  |  |  *  | 
72  |  |  * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because  | 
73  |  |  * there is data with more than 64k distinct values,  | 
74  |  |  * for example for Unihan collation with a separate collation weight per  | 
75  |  |  * Han character.  | 
76  |  |  */  | 
77  |  |  | 
78  |  | /* Building a trie ----------------------------------------------------------*/  | 
79  |  |  | 
80  |  | enum { | 
81  |  |     /** The null index-2 block, following the gap in the index-2 table. */  | 
82  |  |     UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,  | 
83  |  |  | 
84  |  |     /** The start of allocated index-2 blocks. */  | 
85  |  |     UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,  | 
86  |  |  | 
87  |  |     /**  | 
88  |  |      * The null data block.  | 
89  |  |      * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,  | 
90  |  |      * to work with 6-bit trail bytes from 2-byte UTF-8.  | 
91  |  |      */  | 
92  |  |     UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,  | 
93  |  |  | 
94  |  |     /** The start of allocated data blocks. */  | 
95  |  |     UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,  | 
96  |  |  | 
97  |  |     /**  | 
98  |  |      * The start of data blocks for U+0800 and above.  | 
99  |  |      * Below, compaction uses a block length of 64 for 2-byte UTF-8.  | 
100  |  |      * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.  | 
101  |  |      * Data values for 0x780 code points beyond ASCII.  | 
102  |  |      */  | 
103  |  |     UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780  | 
104  |  | };  | 
105  |  |  | 
106  |  | /* Start with allocation of 16k data entries. */  | 
107  | 0  | #define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)  | 
108  |  |  | 
109  |  | /* Grow about 8x each time. */  | 
110  | 0  | #define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)  | 
111  |  |  | 
112  |  | static int32_t  | 
113  |  | allocIndex2Block(UNewTrie2 *trie);  | 
114  |  |  | 
115  |  | U_CAPI UTrie2 * U_EXPORT2  | 
116  | 0  | utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) { | 
117  | 0  |     UTrie2 *trie;  | 
118  | 0  |     UNewTrie2 *newTrie;  | 
119  | 0  |     uint32_t *data;  | 
120  | 0  |     int32_t i, j;  | 
121  |  | 
  | 
122  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
123  | 0  |         return NULL;  | 
124  | 0  |     }  | 
125  |  |  | 
126  | 0  |     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));  | 
127  | 0  |     newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));  | 
128  | 0  |     data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);  | 
129  | 0  |     if(trie==NULL || newTrie==NULL || data==NULL) { | 
130  | 0  |         uprv_free(trie);  | 
131  | 0  |         uprv_free(newTrie);  | 
132  | 0  |         uprv_free(data);  | 
133  | 0  |         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;  | 
134  | 0  |         return 0;  | 
135  | 0  |     }  | 
136  |  |  | 
137  | 0  |     uprv_memset(trie, 0, sizeof(UTrie2));  | 
138  | 0  |     trie->initialValue=initialValue;  | 
139  | 0  |     trie->errorValue=errorValue;  | 
140  | 0  |     trie->highStart=0x110000;  | 
141  | 0  |     trie->newTrie=newTrie;  | 
142  |  | #ifdef UTRIE2_DEBUG  | 
143  |  |     trie->name="open";  | 
144  |  | #endif  | 
145  |  | 
  | 
146  | 0  |     newTrie->data=data;  | 
147  |  | #ifdef UCPTRIE_DEBUG  | 
148  |  |     newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode);  | 
149  |  | #endif  | 
150  | 0  |     newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;  | 
151  | 0  |     newTrie->initialValue=initialValue;  | 
152  | 0  |     newTrie->errorValue=errorValue;  | 
153  | 0  |     newTrie->highStart=0x110000;  | 
154  | 0  |     newTrie->firstFreeBlock=0;  /* no free block in the list */  | 
155  | 0  |     newTrie->isCompacted=FALSE;  | 
156  |  |  | 
157  |  |     /*  | 
158  |  |      * preallocate and reset  | 
159  |  |      * - ASCII  | 
160  |  |      * - the bad-UTF-8-data block  | 
161  |  |      * - the null data block  | 
162  |  |      */  | 
163  | 0  |     for(i=0; i<0x80; ++i) { | 
164  | 0  |         newTrie->data[i]=initialValue;  | 
165  | 0  |     }  | 
166  | 0  |     for(; i<0xc0; ++i) { | 
167  | 0  |         newTrie->data[i]=errorValue;  | 
168  | 0  |     }  | 
169  | 0  |     for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) { | 
170  | 0  |         newTrie->data[i]=initialValue;  | 
171  | 0  |     }  | 
172  | 0  |     newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;  | 
173  | 0  |     newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;  | 
174  |  |  | 
175  |  |     /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */  | 
176  | 0  |     for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { | 
177  | 0  |         newTrie->index2[i]=j;  | 
178  | 0  |         newTrie->map[i]=1;  | 
179  | 0  |     }  | 
180  |  |     /* reference counts for the bad-UTF-8-data block */  | 
181  | 0  |     for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { | 
182  | 0  |         newTrie->map[i]=0;  | 
183  | 0  |     }  | 
184  |  |     /*  | 
185  |  |      * Reference counts for the null data block: all blocks except for the ASCII blocks.  | 
186  |  |      * Plus 1 so that we don't drop this block during compaction.  | 
187  |  |      * Plus as many as needed for lead surrogate code points.  | 
188  |  |      */  | 
189  |  |     /* i==newTrie->dataNullOffset */  | 
190  | 0  |     newTrie->map[i++]=  | 
191  | 0  |         (0x110000>>UTRIE2_SHIFT_2)-  | 
192  | 0  |         (0x80>>UTRIE2_SHIFT_2)+  | 
193  | 0  |         1+  | 
194  | 0  |         UTRIE2_LSCP_INDEX_2_LENGTH;  | 
195  | 0  |     j+=UTRIE2_DATA_BLOCK_LENGTH;  | 
196  | 0  |     for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) { | 
197  | 0  |         newTrie->map[i]=0;  | 
198  | 0  |     }  | 
199  |  |  | 
200  |  |     /*  | 
201  |  |      * set the remaining indexes in the BMP index-2 block  | 
202  |  |      * to the null data block  | 
203  |  |      */  | 
204  | 0  |     for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) { | 
205  | 0  |         newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;  | 
206  | 0  |     }  | 
207  |  |  | 
208  |  |     /*  | 
209  |  |      * Fill the index gap with impossible values so that compaction  | 
210  |  |      * does not overlap other index-2 blocks with the gap.  | 
211  |  |      */  | 
212  | 0  |     for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) { | 
213  | 0  |         newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;  | 
214  | 0  |     }  | 
215  |  |  | 
216  |  |     /* set the indexes in the null index-2 block */  | 
217  | 0  |     for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) { | 
218  | 0  |         newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;  | 
219  | 0  |     }  | 
220  | 0  |     newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;  | 
221  | 0  |     newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;  | 
222  |  |  | 
223  |  |     /* set the index-1 indexes for the linear index-2 block */  | 
224  | 0  |     for(i=0, j=0;  | 
225  | 0  |         i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;  | 
226  | 0  |         ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH  | 
227  | 0  |     ) { | 
228  | 0  |         newTrie->index1[i]=j;  | 
229  | 0  |     }  | 
230  |  |  | 
231  |  |     /* set the remaining index-1 indexes to the null index-2 block */  | 
232  | 0  |     for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { | 
233  | 0  |         newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;  | 
234  | 0  |     }  | 
235  |  |  | 
236  |  |     /*  | 
237  |  |      * Preallocate and reset data for U+0080..U+07ff,  | 
238  |  |      * for 2-byte UTF-8 which will be compacted in 64-blocks  | 
239  |  |      * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.  | 
240  |  |      */  | 
241  | 0  |     for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) { | 
242  | 0  |         utrie2_set32(trie, i, initialValue, pErrorCode);  | 
243  | 0  |     }  | 
244  |  | 
  | 
245  | 0  |     return trie;  | 
246  | 0  | }  | 
247  |  |  | 
248  |  | static UNewTrie2 *  | 
249  | 0  | cloneBuilder(const UNewTrie2 *other) { | 
250  | 0  |     UNewTrie2 *trie;  | 
251  |  | 
  | 
252  | 0  |     trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));  | 
253  | 0  |     if(trie==NULL) { | 
254  | 0  |         return NULL;  | 
255  | 0  |     }  | 
256  |  |  | 
257  | 0  |     trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);  | 
258  | 0  |     if(trie->data==NULL) { | 
259  | 0  |         uprv_free(trie);  | 
260  | 0  |         return NULL;  | 
261  | 0  |     }  | 
262  |  | #ifdef UCPTRIE_DEBUG  | 
263  |  |     if(other->t3==nullptr) { | 
264  |  |         trie->t3=nullptr;  | 
265  |  |     } else { | 
266  |  |         UErrorCode errorCode=U_ZERO_ERROR;  | 
267  |  |         trie->t3=umutablecptrie_clone(other->t3, &errorCode);  | 
268  |  |     }  | 
269  |  | #endif  | 
270  | 0  |     trie->dataCapacity=other->dataCapacity;  | 
271  |  |  | 
272  |  |     /* clone data */  | 
273  | 0  |     uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));  | 
274  | 0  |     uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4);  | 
275  | 0  |     trie->index2NullOffset=other->index2NullOffset;  | 
276  | 0  |     trie->index2Length=other->index2Length;  | 
277  |  | 
  | 
278  | 0  |     uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);  | 
279  | 0  |     trie->dataNullOffset=other->dataNullOffset;  | 
280  | 0  |     trie->dataLength=other->dataLength;  | 
281  |  |  | 
282  |  |     /* reference counters */  | 
283  | 0  |     if(other->isCompacted) { | 
284  | 0  |         trie->firstFreeBlock=0;  | 
285  | 0  |     } else { | 
286  | 0  |         uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4);  | 
287  | 0  |         trie->firstFreeBlock=other->firstFreeBlock;  | 
288  | 0  |     }  | 
289  |  | 
  | 
290  | 0  |     trie->initialValue=other->initialValue;  | 
291  | 0  |     trie->errorValue=other->errorValue;  | 
292  | 0  |     trie->highStart=other->highStart;  | 
293  | 0  |     trie->isCompacted=other->isCompacted;  | 
294  |  | 
  | 
295  | 0  |     return trie;  | 
296  | 0  | }  | 
297  |  |  | 
298  |  | U_CAPI UTrie2 * U_EXPORT2  | 
299  | 0  | utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) { | 
300  | 0  |     UTrie2 *trie;  | 
301  |  | 
  | 
302  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
303  | 0  |         return NULL;  | 
304  | 0  |     }  | 
305  | 0  |     if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { | 
306  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
307  | 0  |         return NULL;  | 
308  | 0  |     }  | 
309  |  |  | 
310  | 0  |     trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));  | 
311  | 0  |     if(trie==NULL) { | 
312  | 0  |         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;  | 
313  | 0  |         return NULL;  | 
314  | 0  |     }  | 
315  | 0  |     uprv_memcpy(trie, other, sizeof(UTrie2));  | 
316  |  | 
  | 
317  | 0  |     if(other->memory!=NULL) { | 
318  | 0  |         trie->memory=uprv_malloc(other->length);  | 
319  | 0  |         if(trie->memory!=NULL) { | 
320  | 0  |             trie->isMemoryOwned=TRUE;  | 
321  | 0  |             uprv_memcpy(trie->memory, other->memory, other->length);  | 
322  |  |  | 
323  |  |             /* make the clone's pointers point to its own memory */  | 
324  | 0  |             trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);  | 
325  | 0  |             if(other->data16!=NULL) { | 
326  | 0  |                 trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);  | 
327  | 0  |             }  | 
328  | 0  |             if(other->data32!=NULL) { | 
329  | 0  |                 trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);  | 
330  | 0  |             }  | 
331  | 0  |         }  | 
332  | 0  |     } else /* other->newTrie!=NULL */ { | 
333  | 0  |         trie->newTrie=cloneBuilder(other->newTrie);  | 
334  | 0  |     }  | 
335  |  | 
  | 
336  | 0  |     if(trie->memory==NULL && trie->newTrie==NULL) { | 
337  | 0  |         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;  | 
338  | 0  |         uprv_free(trie);  | 
339  | 0  |         trie=NULL;  | 
340  | 0  |     }  | 
341  | 0  |     return trie;  | 
342  | 0  | }  | 
343  |  |  | 
344  |  | typedef struct NewTrieAndStatus { | 
345  |  |     UTrie2 *trie;  | 
346  |  |     UErrorCode errorCode;  | 
347  |  |     UBool exclusiveLimit;  /* rather than inclusive range end */  | 
348  |  | } NewTrieAndStatus;  | 
349  |  |  | 
350  |  | static UBool U_CALLCONV  | 
351  | 0  | copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) { | 
352  | 0  |     NewTrieAndStatus *nt=(NewTrieAndStatus *)context;  | 
353  | 0  |     if(value!=nt->trie->initialValue) { | 
354  | 0  |         if(nt->exclusiveLimit) { | 
355  | 0  |             --end;  | 
356  | 0  |         }  | 
357  | 0  |         if(start==end) { | 
358  | 0  |             utrie2_set32(nt->trie, start, value, &nt->errorCode);  | 
359  | 0  |         } else { | 
360  | 0  |             utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);  | 
361  | 0  |         }  | 
362  | 0  |         return U_SUCCESS(nt->errorCode);  | 
363  | 0  |     } else { | 
364  | 0  |         return TRUE;  | 
365  | 0  |     }  | 
366  | 0  | }  | 
367  |  |  | 
368  |  | #ifdef UTRIE2_DEBUG  | 
369  |  | static long countInitial(const UTrie2 *trie) { | 
370  |  |     uint32_t initialValue=trie->initialValue;  | 
371  |  |     int32_t length=trie->dataLength;  | 
372  |  |     long count=0;  | 
373  |  |     if(trie->data16!=nullptr) { | 
374  |  |         for(int32_t i=0; i<length; ++i) { | 
375  |  |             if(trie->data16[i]==initialValue) { ++count; } | 
376  |  |         }  | 
377  |  |     } else { | 
378  |  |         for(int32_t i=0; i<length; ++i) { | 
379  |  |             if(trie->data32[i]==initialValue) { ++count; } | 
380  |  |         }  | 
381  |  |     }  | 
382  |  |     return count;  | 
383  |  | }  | 
384  |  |  | 
385  |  | static void  | 
386  |  | utrie_printLengths(const UTrie *trie) { | 
387  |  |     long indexLength=trie->indexLength;  | 
388  |  |     long dataLength=(long)trie->dataLength;  | 
389  |  |     long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);  | 
390  |  |     printf("**UTrieLengths** index:%6ld  data:%6ld  serialized:%6ld\n", | 
391  |  |            indexLength, dataLength, totalLength);  | 
392  |  | }  | 
393  |  |  | 
394  |  | static void  | 
395  |  | utrie2_printLengths(const UTrie2 *trie, const char *which) { | 
396  |  |     long indexLength=trie->indexLength;  | 
397  |  |     long dataLength=(long)trie->dataLength;  | 
398  |  |     long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);  | 
399  |  |     printf("**UTrie2Lengths(%s %s)** index:%6ld  data:%6ld  countInitial:%6ld  serialized:%6ld\n", | 
400  |  |            which, trie->name, indexLength, dataLength, countInitial(trie), totalLength);  | 
401  |  | }  | 
402  |  | #endif  | 
403  |  |  | 
404  |  | U_CAPI UTrie2 * U_EXPORT2  | 
405  | 0  | utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) { | 
406  | 0  |     NewTrieAndStatus context;  | 
407  | 0  |     UChar lead;  | 
408  |  | 
  | 
409  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
410  | 0  |         return NULL;  | 
411  | 0  |     }  | 
412  | 0  |     if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) { | 
413  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
414  | 0  |         return NULL;  | 
415  | 0  |     }  | 
416  | 0  |     if(other->newTrie!=NULL && !other->newTrie->isCompacted) { | 
417  | 0  |         return utrie2_clone(other, pErrorCode);  /* clone an unfrozen trie */  | 
418  | 0  |     }  | 
419  |  |  | 
420  |  |     /* Clone the frozen trie by enumerating it and building a new one. */  | 
421  | 0  |     context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);  | 
422  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
423  | 0  |         return NULL;  | 
424  | 0  |     }  | 
425  | 0  |     context.exclusiveLimit=FALSE;  | 
426  | 0  |     context.errorCode=*pErrorCode;  | 
427  | 0  |     utrie2_enum(other, NULL, copyEnumRange, &context);  | 
428  | 0  |     *pErrorCode=context.errorCode;  | 
429  | 0  |     for(lead=0xd800; lead<0xdc00; ++lead) { | 
430  | 0  |         uint32_t value;  | 
431  | 0  |         if(other->data32==NULL) { | 
432  | 0  |             value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);  | 
433  | 0  |         } else { | 
434  | 0  |             value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);  | 
435  | 0  |         }  | 
436  | 0  |         if(value!=other->initialValue) { | 
437  | 0  |             utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);  | 
438  | 0  |         }  | 
439  | 0  |     }  | 
440  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
441  | 0  |         utrie2_close(context.trie);  | 
442  | 0  |         context.trie=NULL;  | 
443  | 0  |     }  | 
444  | 0  |     return context.trie;  | 
445  | 0  | }  | 
446  |  |  | 
447  |  | /* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */  | 
448  |  | U_CAPI UTrie2 * U_EXPORT2  | 
449  | 0  | utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) { | 
450  | 0  |     NewTrieAndStatus context;  | 
451  | 0  |     UChar lead;  | 
452  |  | 
  | 
453  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
454  | 0  |         return NULL;  | 
455  | 0  |     }  | 
456  | 0  |     if(trie1==NULL) { | 
457  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
458  | 0  |         return NULL;  | 
459  | 0  |     }  | 
460  | 0  |     context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);  | 
461  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
462  | 0  |         return NULL;  | 
463  | 0  |     }  | 
464  | 0  |     context.exclusiveLimit=TRUE;  | 
465  | 0  |     context.errorCode=*pErrorCode;  | 
466  | 0  |     utrie_enum(trie1, NULL, copyEnumRange, &context);  | 
467  | 0  |     *pErrorCode=context.errorCode;  | 
468  | 0  |     for(lead=0xd800; lead<0xdc00; ++lead) { | 
469  | 0  |         uint32_t value;  | 
470  | 0  |         if(trie1->data32==NULL) { | 
471  | 0  |             value=UTRIE_GET16_FROM_LEAD(trie1, lead);  | 
472  | 0  |         } else { | 
473  | 0  |             value=UTRIE_GET32_FROM_LEAD(trie1, lead);  | 
474  | 0  |         }  | 
475  | 0  |         if(value!=trie1->initialValue) { | 
476  | 0  |             utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);  | 
477  | 0  |         }  | 
478  | 0  |     }  | 
479  | 0  |     if(U_SUCCESS(*pErrorCode)) { | 
480  | 0  |         utrie2_freeze(context.trie,  | 
481  | 0  |                       trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,  | 
482  | 0  |                       pErrorCode);  | 
483  | 0  |     }  | 
484  |  | #ifdef UTRIE2_DEBUG  | 
485  |  |     if(U_SUCCESS(*pErrorCode)) { | 
486  |  |         utrie_printLengths(trie1);  | 
487  |  |         utrie2_printLengths(context.trie, "fromUTrie");  | 
488  |  |     }  | 
489  |  | #endif  | 
490  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
491  | 0  |         utrie2_close(context.trie);  | 
492  | 0  |         context.trie=NULL;  | 
493  | 0  |     }  | 
494  | 0  |     return context.trie;  | 
495  | 0  | }  | 
496  |  |  | 
497  |  | static inline UBool  | 
498  | 0  | isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { | 
499  | 0  |     int32_t i2, block;  | 
500  |  | 
  | 
501  | 0  |     if(U_IS_LEAD(c) && forLSCP) { | 
502  | 0  |         i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+  | 
503  | 0  |             (c>>UTRIE2_SHIFT_2);  | 
504  | 0  |     } else { | 
505  | 0  |         i2=trie->index1[c>>UTRIE2_SHIFT_1]+  | 
506  | 0  |             ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);  | 
507  | 0  |     }  | 
508  | 0  |     block=trie->index2[i2];  | 
509  | 0  |     return (UBool)(block==trie->dataNullOffset);  | 
510  | 0  | }  | 
511  |  |  | 
512  |  | static int32_t  | 
513  | 0  | allocIndex2Block(UNewTrie2 *trie) { | 
514  | 0  |     int32_t newBlock, newTop;  | 
515  |  | 
  | 
516  | 0  |     newBlock=trie->index2Length;  | 
517  | 0  |     newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;  | 
518  | 0  |     if(newTop>UPRV_LENGTHOF(trie->index2)) { | 
519  |  |         /*  | 
520  |  |          * Should never occur.  | 
521  |  |          * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,  | 
522  |  |          * or the code writes more values than should be possible.  | 
523  |  |          */  | 
524  | 0  |         return -1;  | 
525  | 0  |     }  | 
526  | 0  |     trie->index2Length=newTop;  | 
527  | 0  |     uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);  | 
528  | 0  |     return newBlock;  | 
529  | 0  | }  | 
530  |  |  | 
531  |  | static int32_t  | 
532  | 0  | getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { | 
533  | 0  |     int32_t i1, i2;  | 
534  |  | 
  | 
535  | 0  |     if(U_IS_LEAD(c) && forLSCP) { | 
536  | 0  |         return UTRIE2_LSCP_INDEX_2_OFFSET;  | 
537  | 0  |     }  | 
538  |  |  | 
539  | 0  |     i1=c>>UTRIE2_SHIFT_1;  | 
540  | 0  |     i2=trie->index1[i1];  | 
541  | 0  |     if(i2==trie->index2NullOffset) { | 
542  | 0  |         i2=allocIndex2Block(trie);  | 
543  | 0  |         if(i2<0) { | 
544  | 0  |             return -1;  /* program error */  | 
545  | 0  |         }  | 
546  | 0  |         trie->index1[i1]=i2;  | 
547  | 0  |     }  | 
548  | 0  |     return i2;  | 
549  | 0  | }  | 
550  |  |  | 
551  |  | static int32_t  | 
552  | 0  | allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) { | 
553  | 0  |     int32_t newBlock, newTop;  | 
554  |  | 
  | 
555  | 0  |     if(trie->firstFreeBlock!=0) { | 
556  |  |         /* get the first free block */  | 
557  | 0  |         newBlock=trie->firstFreeBlock;  | 
558  | 0  |         trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];  | 
559  | 0  |     } else { | 
560  |  |         /* get a new block from the high end */  | 
561  | 0  |         newBlock=trie->dataLength;  | 
562  | 0  |         newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;  | 
563  | 0  |         if(newTop>trie->dataCapacity) { | 
564  |  |             /* out of memory in the data array */  | 
565  | 0  |             int32_t capacity;  | 
566  | 0  |             uint32_t *data;  | 
567  |  | 
  | 
568  | 0  |             if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) { | 
569  | 0  |                 capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;  | 
570  | 0  |             } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) { | 
571  | 0  |                 capacity=UNEWTRIE2_MAX_DATA_LENGTH;  | 
572  | 0  |             } else { | 
573  |  |                 /*  | 
574  |  |                  * Should never occur.  | 
575  |  |                  * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,  | 
576  |  |                  * or the code writes more values than should be possible.  | 
577  |  |                  */  | 
578  | 0  |                 return -1;  | 
579  | 0  |             }  | 
580  | 0  |             data=(uint32_t *)uprv_malloc(capacity*4);  | 
581  | 0  |             if(data==NULL) { | 
582  | 0  |                 return -1;  | 
583  | 0  |             }  | 
584  | 0  |             uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4);  | 
585  | 0  |             uprv_free(trie->data);  | 
586  | 0  |             trie->data=data;  | 
587  | 0  |             trie->dataCapacity=capacity;  | 
588  | 0  |         }  | 
589  | 0  |         trie->dataLength=newTop;  | 
590  | 0  |     }  | 
591  | 0  |     uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);  | 
592  | 0  |     trie->map[newBlock>>UTRIE2_SHIFT_2]=0;  | 
593  | 0  |     return newBlock;  | 
594  | 0  | }  | 
595  |  |  | 
596  |  | /* call when the block's reference counter reaches 0 */  | 
597  |  | static void  | 
598  | 0  | releaseDataBlock(UNewTrie2 *trie, int32_t block) { | 
599  |  |     /* put this block at the front of the free-block chain */  | 
600  | 0  |     trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;  | 
601  | 0  |     trie->firstFreeBlock=block;  | 
602  | 0  | }  | 
603  |  |  | 
604  |  | static inline UBool  | 
605  | 0  | isWritableBlock(UNewTrie2 *trie, int32_t block) { | 
606  | 0  |     return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);  | 
607  | 0  | }  | 
608  |  |  | 
609  |  | static inline void  | 
610  | 0  | setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) { | 
611  | 0  |     int32_t oldBlock;  | 
612  | 0  |     ++trie->map[block>>UTRIE2_SHIFT_2];  /* increment first, in case block==oldBlock! */  | 
613  | 0  |     oldBlock=trie->index2[i2];  | 
614  | 0  |     if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) { | 
615  | 0  |         releaseDataBlock(trie, oldBlock);  | 
616  | 0  |     }  | 
617  | 0  |     trie->index2[i2]=block;  | 
618  | 0  | }  | 
619  |  |  | 
620  |  | /**  | 
621  |  |  * No error checking for illegal arguments.  | 
622  |  |  *  | 
623  |  |  * @return -1 if no new data block available (out of memory in data array)  | 
624  |  |  * @internal  | 
625  |  |  */  | 
626  |  | static int32_t  | 
627  | 0  | getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) { | 
628  | 0  |     int32_t i2, oldBlock, newBlock;  | 
629  |  | 
  | 
630  | 0  |     i2=getIndex2Block(trie, c, forLSCP);  | 
631  | 0  |     if(i2<0) { | 
632  | 0  |         return -1;  /* program error */  | 
633  | 0  |     }  | 
634  |  |  | 
635  | 0  |     i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;  | 
636  | 0  |     oldBlock=trie->index2[i2];  | 
637  | 0  |     if(isWritableBlock(trie, oldBlock)) { | 
638  | 0  |         return oldBlock;  | 
639  | 0  |     }  | 
640  |  |  | 
641  |  |     /* allocate a new data block */  | 
642  | 0  |     newBlock=allocDataBlock(trie, oldBlock);  | 
643  | 0  |     if(newBlock<0) { | 
644  |  |         /* out of memory in the data array */  | 
645  | 0  |         return -1;  | 
646  | 0  |     }  | 
647  | 0  |     setIndex2Entry(trie, i2, newBlock);  | 
648  | 0  |     return newBlock;  | 
649  | 0  | }  | 
650  |  |  | 
651  |  | /**  | 
652  |  |  * @return TRUE if the value was successfully set  | 
653  |  |  */  | 
654  |  | static void  | 
655  |  | set32(UNewTrie2 *trie,  | 
656  |  |       UChar32 c, UBool forLSCP, uint32_t value,  | 
657  | 0  |       UErrorCode *pErrorCode) { | 
658  | 0  |     int32_t block;  | 
659  |  | 
  | 
660  | 0  |     if(trie==NULL || trie->isCompacted) { | 
661  | 0  |         *pErrorCode=U_NO_WRITE_PERMISSION;  | 
662  | 0  |         return;  | 
663  | 0  |     }  | 
664  |  | #ifdef UCPTRIE_DEBUG  | 
665  |  |     umutablecptrie_set(trie->t3, c, value, pErrorCode);  | 
666  |  | #endif  | 
667  |  |  | 
668  | 0  |     block=getDataBlock(trie, c, forLSCP);  | 
669  | 0  |     if(block<0) { | 
670  | 0  |         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;  | 
671  | 0  |         return;  | 
672  | 0  |     }  | 
673  |  |  | 
674  | 0  |     trie->data[block+(c&UTRIE2_DATA_MASK)]=value;  | 
675  | 0  | }  | 
676  |  |  | 
677  |  | U_CAPI void U_EXPORT2  | 
678  | 0  | utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) { | 
679  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
680  | 0  |         return;  | 
681  | 0  |     }  | 
682  | 0  |     if((uint32_t)c>0x10ffff) { | 
683  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
684  | 0  |         return;  | 
685  | 0  |     }  | 
686  | 0  |     set32(trie->newTrie, c, TRUE, value, pErrorCode);  | 
687  | 0  | }  | 
688  |  |  | 
689  |  | U_CAPI void U_EXPORT2  | 
690  |  | utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,  | 
691  |  |                                      UChar32 c, uint32_t value,  | 
692  | 0  |                                      UErrorCode *pErrorCode) { | 
693  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
694  | 0  |         return;  | 
695  | 0  |     }  | 
696  | 0  |     if(!U_IS_LEAD(c)) { | 
697  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
698  | 0  |         return;  | 
699  | 0  |     }  | 
700  | 0  |     set32(trie->newTrie, c, FALSE, value, pErrorCode);  | 
701  | 0  | }  | 
702  |  |  | 
703  |  | static void  | 
704  | 0  | writeBlock(uint32_t *block, uint32_t value) { | 
705  | 0  |     uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;  | 
706  | 0  |     while(block<limit) { | 
707  | 0  |         *block++=value;  | 
708  | 0  |     }  | 
709  | 0  | }  | 
710  |  |  | 
711  |  | /**  | 
712  |  |  * initialValue is ignored if overwrite=TRUE  | 
713  |  |  * @internal  | 
714  |  |  */  | 
715  |  | static void  | 
716  |  | fillBlock(uint32_t *block, UChar32 start, UChar32 limit,  | 
717  | 0  |           uint32_t value, uint32_t initialValue, UBool overwrite) { | 
718  | 0  |     uint32_t *pLimit;  | 
719  |  | 
  | 
720  | 0  |     pLimit=block+limit;  | 
721  | 0  |     block+=start;  | 
722  | 0  |     if(overwrite) { | 
723  | 0  |         while(block<pLimit) { | 
724  | 0  |             *block++=value;  | 
725  | 0  |         }  | 
726  | 0  |     } else { | 
727  | 0  |         while(block<pLimit) { | 
728  | 0  |             if(*block==initialValue) { | 
729  | 0  |                 *block=value;  | 
730  | 0  |             }  | 
731  | 0  |             ++block;  | 
732  | 0  |         }  | 
733  | 0  |     }  | 
734  | 0  | }  | 
735  |  |  | 
736  |  | U_CAPI void U_EXPORT2  | 
737  |  | utrie2_setRange32(UTrie2 *trie,  | 
738  |  |                   UChar32 start, UChar32 end,  | 
739  |  |                   uint32_t value, UBool overwrite,  | 
740  | 0  |                   UErrorCode *pErrorCode) { | 
741  |  |     /*  | 
742  |  |      * repeat value in [start..end]  | 
743  |  |      * mark index values for repeat-data blocks by setting bit 31 of the index values  | 
744  |  |      * fill around existing values if any, if(overwrite)  | 
745  |  |      */  | 
746  | 0  |     UNewTrie2 *newTrie;  | 
747  | 0  |     int32_t block, rest, repeatBlock;  | 
748  | 0  |     UChar32 limit;  | 
749  |  | 
  | 
750  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
751  | 0  |         return;  | 
752  | 0  |     }  | 
753  | 0  |     if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) { | 
754  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
755  | 0  |         return;  | 
756  | 0  |     }  | 
757  | 0  |     newTrie=trie->newTrie;  | 
758  | 0  |     if(newTrie==NULL || newTrie->isCompacted) { | 
759  | 0  |         *pErrorCode=U_NO_WRITE_PERMISSION;  | 
760  | 0  |         return;  | 
761  | 0  |     }  | 
762  |  | #ifdef UCPTRIE_DEBUG  | 
763  |  |     umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode);  | 
764  |  | #endif  | 
765  | 0  |     if(!overwrite && value==newTrie->initialValue) { | 
766  | 0  |         return; /* nothing to do */  | 
767  | 0  |     }  | 
768  |  |  | 
769  | 0  |     limit=end+1;  | 
770  | 0  |     if(start&UTRIE2_DATA_MASK) { | 
771  | 0  |         UChar32 nextStart;  | 
772  |  |  | 
773  |  |         /* set partial block at [start..following block boundary[ */  | 
774  | 0  |         block=getDataBlock(newTrie, start, TRUE);  | 
775  | 0  |         if(block<0) { | 
776  | 0  |             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;  | 
777  | 0  |             return;  | 
778  | 0  |         }  | 
779  |  |  | 
780  | 0  |         nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK;  | 
781  | 0  |         if(nextStart<=limit) { | 
782  | 0  |             fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,  | 
783  | 0  |                       value, newTrie->initialValue, overwrite);  | 
784  | 0  |             start=nextStart;  | 
785  | 0  |         } else { | 
786  | 0  |             fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,  | 
787  | 0  |                       value, newTrie->initialValue, overwrite);  | 
788  | 0  |             return;  | 
789  | 0  |         }  | 
790  | 0  |     }  | 
791  |  |  | 
792  |  |     /* number of positions in the last, partial block */  | 
793  | 0  |     rest=limit&UTRIE2_DATA_MASK;  | 
794  |  |  | 
795  |  |     /* round down limit to a block boundary */  | 
796  | 0  |     limit&=~UTRIE2_DATA_MASK;  | 
797  |  |  | 
798  |  |     /* iterate over all-value blocks */  | 
799  | 0  |     if(value==newTrie->initialValue) { | 
800  | 0  |         repeatBlock=newTrie->dataNullOffset;  | 
801  | 0  |     } else { | 
802  | 0  |         repeatBlock=-1;  | 
803  | 0  |     }  | 
804  |  | 
  | 
805  | 0  |     while(start<limit) { | 
806  | 0  |         int32_t i2;  | 
807  | 0  |         UBool setRepeatBlock=FALSE;  | 
808  |  | 
  | 
809  | 0  |         if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) { | 
810  | 0  |             start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */  | 
811  | 0  |             continue;  | 
812  | 0  |         }  | 
813  |  |  | 
814  |  |         /* get index value */  | 
815  | 0  |         i2=getIndex2Block(newTrie, start, TRUE);  | 
816  | 0  |         if(i2<0) { | 
817  | 0  |             *pErrorCode=U_INTERNAL_PROGRAM_ERROR;  | 
818  | 0  |             return;  | 
819  | 0  |         }  | 
820  | 0  |         i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;  | 
821  | 0  |         block=newTrie->index2[i2];  | 
822  | 0  |         if(isWritableBlock(newTrie, block)) { | 
823  |  |             /* already allocated */  | 
824  | 0  |             if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) { | 
825  |  |                 /*  | 
826  |  |                  * We overwrite all values, and it's not a  | 
827  |  |                  * protected (ASCII-linear or 2-byte UTF-8) block:  | 
828  |  |                  * replace with the repeatBlock.  | 
829  |  |                  */  | 
830  | 0  |                 setRepeatBlock=TRUE;  | 
831  | 0  |             } else { | 
832  |  |                 /* !overwrite, or protected block: just write the values into this block */  | 
833  | 0  |                 fillBlock(newTrie->data+block,  | 
834  | 0  |                           0, UTRIE2_DATA_BLOCK_LENGTH,  | 
835  | 0  |                           value, newTrie->initialValue, overwrite);  | 
836  | 0  |             }  | 
837  | 0  |         } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) { | 
838  |  |             /*  | 
839  |  |              * Set the repeatBlock instead of the null block or previous repeat block:  | 
840  |  |              *  | 
841  |  |              * If !isWritableBlock() then all entries in the block have the same value  | 
842  |  |              * because it's the null block or a range block (the repeatBlock from a previous  | 
843  |  |              * call to utrie2_setRange32()).  | 
844  |  |              * No other blocks are used multiple times before compacting.  | 
845  |  |              *  | 
846  |  |              * The null block is the only non-writable block with the initialValue because  | 
847  |  |              * of the repeatBlock initialization above. (If value==initialValue, then  | 
848  |  |              * the repeatBlock will be the null data block.)  | 
849  |  |              *  | 
850  |  |              * We set our repeatBlock if the desired value differs from the block's value,  | 
851  |  |              * and if we overwrite any data or if the data is all initial values  | 
852  |  |              * (which is the same as the block being the null block, see above).  | 
853  |  |              */  | 
854  | 0  |             setRepeatBlock=TRUE;  | 
855  | 0  |         }  | 
856  | 0  |         if(setRepeatBlock) { | 
857  | 0  |             if(repeatBlock>=0) { | 
858  | 0  |                 setIndex2Entry(newTrie, i2, repeatBlock);  | 
859  | 0  |             } else { | 
860  |  |                 /* create and set and fill the repeatBlock */  | 
861  | 0  |                 repeatBlock=getDataBlock(newTrie, start, TRUE);  | 
862  | 0  |                 if(repeatBlock<0) { | 
863  | 0  |                     *pErrorCode=U_MEMORY_ALLOCATION_ERROR;  | 
864  | 0  |                     return;  | 
865  | 0  |                 }  | 
866  | 0  |                 writeBlock(newTrie->data+repeatBlock, value);  | 
867  | 0  |             }  | 
868  | 0  |         }  | 
869  |  |  | 
870  | 0  |         start+=UTRIE2_DATA_BLOCK_LENGTH;  | 
871  | 0  |     }  | 
872  |  |  | 
873  | 0  |     if(rest>0) { | 
874  |  |         /* set partial block at [last block boundary..limit[ */  | 
875  | 0  |         block=getDataBlock(newTrie, start, TRUE);  | 
876  | 0  |         if(block<0) { | 
877  | 0  |             *pErrorCode=U_MEMORY_ALLOCATION_ERROR;  | 
878  | 0  |             return;  | 
879  | 0  |         }  | 
880  |  |  | 
881  | 0  |         fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);  | 
882  | 0  |     }  | 
883  |  |  | 
884  | 0  |     return;  | 
885  | 0  | }  | 
886  |  |  | 
887  |  | /* compaction --------------------------------------------------------------- */  | 
888  |  |  | 
889  |  | static inline UBool  | 
890  | 0  | equal_int32(const int32_t *s, const int32_t *t, int32_t length) { | 
891  | 0  |     while(length>0 && *s==*t) { | 
892  | 0  |         ++s;  | 
893  | 0  |         ++t;  | 
894  | 0  |         --length;  | 
895  | 0  |     }  | 
896  | 0  |     return (UBool)(length==0);  | 
897  | 0  | }  | 
898  |  |  | 
899  |  | static inline UBool  | 
900  | 0  | equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) { | 
901  | 0  |     while(length>0 && *s==*t) { | 
902  | 0  |         ++s;  | 
903  | 0  |         ++t;  | 
904  | 0  |         --length;  | 
905  | 0  |     }  | 
906  | 0  |     return (UBool)(length==0);  | 
907  | 0  | }  | 
908  |  |  | 
909  |  | static int32_t  | 
910  | 0  | findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) { | 
911  | 0  |     int32_t block;  | 
912  |  |  | 
913  |  |     /* ensure that we do not even partially get past index2Length */  | 
914  | 0  |     index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;  | 
915  |  | 
  | 
916  | 0  |     for(block=0; block<=index2Length; ++block) { | 
917  | 0  |         if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) { | 
918  | 0  |             return block;  | 
919  | 0  |         }  | 
920  | 0  |     }  | 
921  | 0  |     return -1;  | 
922  | 0  | }  | 
923  |  |  | 
924  |  | static int32_t  | 
925  | 0  | findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) { | 
926  | 0  |     int32_t block;  | 
927  |  |  | 
928  |  |     /* ensure that we do not even partially get past dataLength */  | 
929  | 0  |     dataLength-=blockLength;  | 
930  |  | 
  | 
931  | 0  |     for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) { | 
932  | 0  |         if(equal_uint32(data+block, data+otherBlock, blockLength)) { | 
933  | 0  |             return block;  | 
934  | 0  |         }  | 
935  | 0  |     }  | 
936  | 0  |     return -1;  | 
937  | 0  | }  | 
938  |  |  | 
939  |  | /*  | 
940  |  |  * Find the start of the last range in the trie by enumerating backward.  | 
941  |  |  * Indexes for supplementary code points higher than this will be omitted.  | 
942  |  |  */  | 
943  |  | static UChar32  | 
944  | 0  | findHighStart(UNewTrie2 *trie, uint32_t highValue) { | 
945  | 0  |     const uint32_t *data32;  | 
946  |  | 
  | 
947  | 0  |     uint32_t value, initialValue;  | 
948  | 0  |     UChar32 c, prev;  | 
949  | 0  |     int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;  | 
950  |  | 
  | 
951  | 0  |     data32=trie->data;  | 
952  | 0  |     initialValue=trie->initialValue;  | 
953  |  | 
  | 
954  | 0  |     index2NullOffset=trie->index2NullOffset;  | 
955  | 0  |     nullBlock=trie->dataNullOffset;  | 
956  |  |  | 
957  |  |     /* set variables for previous range */  | 
958  | 0  |     if(highValue==initialValue) { | 
959  | 0  |         prevI2Block=index2NullOffset;  | 
960  | 0  |         prevBlock=nullBlock;  | 
961  | 0  |     } else { | 
962  | 0  |         prevI2Block=-1;  | 
963  | 0  |         prevBlock=-1;  | 
964  | 0  |     }  | 
965  | 0  |     prev=0x110000;  | 
966  |  |  | 
967  |  |     /* enumerate index-2 blocks */  | 
968  | 0  |     i1=UNEWTRIE2_INDEX_1_LENGTH;  | 
969  | 0  |     c=prev;  | 
970  | 0  |     while(c>0) { | 
971  | 0  |         i2Block=trie->index1[--i1];  | 
972  | 0  |         if(i2Block==prevI2Block) { | 
973  |  |             /* the index-2 block is the same as the previous one, and filled with highValue */  | 
974  | 0  |             c-=UTRIE2_CP_PER_INDEX_1_ENTRY;  | 
975  | 0  |             continue;  | 
976  | 0  |         }  | 
977  | 0  |         prevI2Block=i2Block;  | 
978  | 0  |         if(i2Block==index2NullOffset) { | 
979  |  |             /* this is the null index-2 block */  | 
980  | 0  |             if(highValue!=initialValue) { | 
981  | 0  |                 return c;  | 
982  | 0  |             }  | 
983  | 0  |             c-=UTRIE2_CP_PER_INDEX_1_ENTRY;  | 
984  | 0  |         } else { | 
985  |  |             /* enumerate data blocks for one index-2 block */  | 
986  | 0  |             for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) { | 
987  | 0  |                 block=trie->index2[i2Block+ --i2];  | 
988  | 0  |                 if(block==prevBlock) { | 
989  |  |                     /* the block is the same as the previous one, and filled with highValue */  | 
990  | 0  |                     c-=UTRIE2_DATA_BLOCK_LENGTH;  | 
991  | 0  |                     continue;  | 
992  | 0  |                 }  | 
993  | 0  |                 prevBlock=block;  | 
994  | 0  |                 if(block==nullBlock) { | 
995  |  |                     /* this is the null data block */  | 
996  | 0  |                     if(highValue!=initialValue) { | 
997  | 0  |                         return c;  | 
998  | 0  |                     }  | 
999  | 0  |                     c-=UTRIE2_DATA_BLOCK_LENGTH;  | 
1000  | 0  |                 } else { | 
1001  | 0  |                     for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) { | 
1002  | 0  |                         value=data32[block+ --j];  | 
1003  | 0  |                         if(value!=highValue) { | 
1004  | 0  |                             return c;  | 
1005  | 0  |                         }  | 
1006  | 0  |                         --c;  | 
1007  | 0  |                     }  | 
1008  | 0  |                 }  | 
1009  | 0  |             }  | 
1010  | 0  |         }  | 
1011  | 0  |     }  | 
1012  |  |  | 
1013  |  |     /* deliver last range */  | 
1014  | 0  |     return 0;  | 
1015  | 0  | }  | 
1016  |  |  | 
1017  |  | /*  | 
1018  |  |  * Compact a build-time trie.  | 
1019  |  |  *  | 
1020  |  |  * The compaction  | 
1021  |  |  * - removes blocks that are identical with earlier ones  | 
1022  |  |  * - overlaps adjacent blocks as much as possible (if overlap==TRUE)  | 
1023  |  |  * - moves blocks in steps of the data granularity  | 
1024  |  |  * - moves and overlaps blocks that overlap with multiple values in the overlap region  | 
1025  |  |  *  | 
1026  |  |  * It does not  | 
1027  |  |  * - try to move and overlap blocks that are not already adjacent  | 
1028  |  |  */  | 
1029  |  | static void  | 
1030  | 0  | compactData(UNewTrie2 *trie) { | 
1031  |  | #ifdef UTRIE2_DEBUG  | 
1032  |  |     int32_t countSame=0, sumOverlaps=0;  | 
1033  |  | #endif  | 
1034  |  | 
  | 
1035  | 0  |     int32_t start, newStart, movedStart;  | 
1036  | 0  |     int32_t blockLength, overlap;  | 
1037  | 0  |     int32_t i, mapIndex, blockCount;  | 
1038  |  |  | 
1039  |  |     /* do not compact linear-ASCII data */  | 
1040  | 0  |     newStart=UTRIE2_DATA_START_OFFSET;  | 
1041  | 0  |     for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) { | 
1042  | 0  |         trie->map[i]=start;  | 
1043  | 0  |     }  | 
1044  |  |  | 
1045  |  |     /*  | 
1046  |  |      * Start with a block length of 64 for 2-byte UTF-8,  | 
1047  |  |      * then switch to UTRIE2_DATA_BLOCK_LENGTH.  | 
1048  |  |      */  | 
1049  | 0  |     blockLength=64;  | 
1050  | 0  |     blockCount=blockLength>>UTRIE2_SHIFT_2;  | 
1051  | 0  |     for(start=newStart; start<trie->dataLength;) { | 
1052  |  |         /*  | 
1053  |  |          * start: index of first entry of current block  | 
1054  |  |          * newStart: index where the current block is to be moved  | 
1055  |  |          *           (right after current end of already-compacted data)  | 
1056  |  |          */  | 
1057  | 0  |         if(start==UNEWTRIE2_DATA_0800_OFFSET) { | 
1058  | 0  |             blockLength=UTRIE2_DATA_BLOCK_LENGTH;  | 
1059  | 0  |             blockCount=1;  | 
1060  | 0  |         }  | 
1061  |  |  | 
1062  |  |         /* skip blocks that are not used */  | 
1063  | 0  |         if(trie->map[start>>UTRIE2_SHIFT_2]<=0) { | 
1064  |  |             /* advance start to the next block */  | 
1065  | 0  |             start+=blockLength;  | 
1066  |  |  | 
1067  |  |             /* leave newStart with the previous block! */  | 
1068  | 0  |             continue;  | 
1069  | 0  |         }  | 
1070  |  |  | 
1071  |  |         /* search for an identical block */  | 
1072  | 0  |         if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))  | 
1073  | 0  |              >=0  | 
1074  | 0  |         ) { | 
1075  |  | #ifdef UTRIE2_DEBUG  | 
1076  |  |             ++countSame;  | 
1077  |  | #endif  | 
1078  |  |             /* found an identical block, set the other block's index value for the current block */  | 
1079  | 0  |             for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { | 
1080  | 0  |                 trie->map[mapIndex++]=movedStart;  | 
1081  | 0  |                 movedStart+=UTRIE2_DATA_BLOCK_LENGTH;  | 
1082  | 0  |             }  | 
1083  |  |  | 
1084  |  |             /* advance start to the next block */  | 
1085  | 0  |             start+=blockLength;  | 
1086  |  |  | 
1087  |  |             /* leave newStart with the previous block! */  | 
1088  | 0  |             continue;  | 
1089  | 0  |         }  | 
1090  |  |  | 
1091  |  |         /* see if the beginning of this block can be overlapped with the end of the previous block */  | 
1092  |  |         /* look for maximum overlap (modulo granularity) with the previous, adjacent block */  | 
1093  | 0  |         for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;  | 
1094  | 0  |             overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);  | 
1095  | 0  |             overlap-=UTRIE2_DATA_GRANULARITY) {} | 
1096  |  | 
  | 
1097  |  | #ifdef UTRIE2_DEBUG  | 
1098  |  |             sumOverlaps+=overlap;  | 
1099  |  | #endif  | 
1100  | 0  |         if(overlap>0 || newStart<start) { | 
1101  |  |             /* some overlap, or just move the whole block */  | 
1102  | 0  |             movedStart=newStart-overlap;  | 
1103  | 0  |             for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { | 
1104  | 0  |                 trie->map[mapIndex++]=movedStart;  | 
1105  | 0  |                 movedStart+=UTRIE2_DATA_BLOCK_LENGTH;  | 
1106  | 0  |             }  | 
1107  |  |  | 
1108  |  |             /* move the non-overlapping indexes to their new positions */  | 
1109  | 0  |             start+=overlap;  | 
1110  | 0  |             for(i=blockLength-overlap; i>0; --i) { | 
1111  | 0  |                 trie->data[newStart++]=trie->data[start++];  | 
1112  | 0  |             }  | 
1113  | 0  |         } else /* no overlap && newStart==start */ { | 
1114  | 0  |             for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) { | 
1115  | 0  |                 trie->map[mapIndex++]=start;  | 
1116  | 0  |                 start+=UTRIE2_DATA_BLOCK_LENGTH;  | 
1117  | 0  |             }  | 
1118  | 0  |             newStart=start;  | 
1119  | 0  |         }  | 
1120  | 0  |     }  | 
1121  |  |  | 
1122  |  |     /* now adjust the index-2 table */  | 
1123  | 0  |     for(i=0; i<trie->index2Length; ++i) { | 
1124  | 0  |         if(i==UNEWTRIE2_INDEX_GAP_OFFSET) { | 
1125  |  |             /* Gap indexes are invalid (-1). Skip over the gap. */  | 
1126  | 0  |             i+=UNEWTRIE2_INDEX_GAP_LENGTH;  | 
1127  | 0  |         }  | 
1128  | 0  |         trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];  | 
1129  | 0  |     }  | 
1130  | 0  |     trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];  | 
1131  |  |  | 
1132  |  |     /* ensure dataLength alignment */  | 
1133  | 0  |     while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) { | 
1134  | 0  |         trie->data[newStart++]=trie->initialValue;  | 
1135  | 0  |     }  | 
1136  |  | 
  | 
1137  |  | #ifdef UTRIE2_DEBUG  | 
1138  |  |     /* we saved some space */  | 
1139  |  |     printf("compacting UTrie2: count of 32-bit data words %lu->%lu  countSame=%ld  sumOverlaps=%ld\n", | 
1140  |  |             (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps);  | 
1141  |  | #endif  | 
1142  |  | 
  | 
1143  | 0  |     trie->dataLength=newStart;  | 
1144  | 0  | }  | 
1145  |  |  | 
1146  |  | static void  | 
1147  | 0  | compactIndex2(UNewTrie2 *trie) { | 
1148  | 0  |     int32_t i, start, newStart, movedStart, overlap;  | 
1149  |  |  | 
1150  |  |     /* do not compact linear-BMP index-2 blocks */  | 
1151  | 0  |     newStart=UTRIE2_INDEX_2_BMP_LENGTH;  | 
1152  | 0  |     for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) { | 
1153  | 0  |         trie->map[i]=start;  | 
1154  | 0  |     }  | 
1155  |  |  | 
1156  |  |     /* Reduce the index table gap to what will be needed at runtime. */  | 
1157  | 0  |     newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);  | 
1158  |  | 
  | 
1159  | 0  |     for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) { | 
1160  |  |         /*  | 
1161  |  |          * start: index of first entry of current block  | 
1162  |  |          * newStart: index where the current block is to be moved  | 
1163  |  |          *           (right after current end of already-compacted data)  | 
1164  |  |          */  | 
1165  |  |  | 
1166  |  |         /* search for an identical block */  | 
1167  | 0  |         if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))  | 
1168  | 0  |              >=0  | 
1169  | 0  |         ) { | 
1170  |  |             /* found an identical block, set the other block's index value for the current block */  | 
1171  | 0  |             trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;  | 
1172  |  |  | 
1173  |  |             /* advance start to the next block */  | 
1174  | 0  |             start+=UTRIE2_INDEX_2_BLOCK_LENGTH;  | 
1175  |  |  | 
1176  |  |             /* leave newStart with the previous block! */  | 
1177  | 0  |             continue;  | 
1178  | 0  |         }  | 
1179  |  |  | 
1180  |  |         /* see if the beginning of this block can be overlapped with the end of the previous block */  | 
1181  |  |         /* look for maximum overlap with the previous, adjacent block */  | 
1182  | 0  |         for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;  | 
1183  | 0  |             overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);  | 
1184  | 0  |             --overlap) {} | 
1185  |  | 
  | 
1186  | 0  |         if(overlap>0 || newStart<start) { | 
1187  |  |             /* some overlap, or just move the whole block */  | 
1188  | 0  |             trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;  | 
1189  |  |  | 
1190  |  |             /* move the non-overlapping indexes to their new positions */  | 
1191  | 0  |             start+=overlap;  | 
1192  | 0  |             for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) { | 
1193  | 0  |                 trie->index2[newStart++]=trie->index2[start++];  | 
1194  | 0  |             }  | 
1195  | 0  |         } else /* no overlap && newStart==start */ { | 
1196  | 0  |             trie->map[start>>UTRIE2_SHIFT_1_2]=start;  | 
1197  | 0  |             start+=UTRIE2_INDEX_2_BLOCK_LENGTH;  | 
1198  | 0  |             newStart=start;  | 
1199  | 0  |         }  | 
1200  | 0  |     }  | 
1201  |  |  | 
1202  |  |     /* now adjust the index-1 table */  | 
1203  | 0  |     for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) { | 
1204  | 0  |         trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];  | 
1205  | 0  |     }  | 
1206  | 0  |     trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];  | 
1207  |  |  | 
1208  |  |     /*  | 
1209  |  |      * Ensure data table alignment:  | 
1210  |  |      * Needs to be granularity-aligned for 16-bit trie  | 
1211  |  |      * (so that dataMove will be down-shiftable),  | 
1212  |  |      * and 2-aligned for uint32_t data.  | 
1213  |  |      */  | 
1214  | 0  |     while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) { | 
1215  |  |         /* Arbitrary value: 0x3fffc not possible for real data. */  | 
1216  | 0  |         trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;  | 
1217  | 0  |     }  | 
1218  |  | 
  | 
1219  |  | #ifdef UTRIE2_DEBUG  | 
1220  |  |     /* we saved some space */  | 
1221  |  |     printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n", | 
1222  |  |             (long)trie->index2Length, (long)newStart);  | 
1223  |  | #endif  | 
1224  |  | 
  | 
1225  | 0  |     trie->index2Length=newStart;  | 
1226  | 0  | }  | 
1227  |  |  | 
1228  |  | static void  | 
1229  | 0  | compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) { | 
1230  | 0  |     UNewTrie2 *newTrie;  | 
1231  | 0  |     UChar32 highStart, suppHighStart;  | 
1232  | 0  |     uint32_t highValue;  | 
1233  |  | 
  | 
1234  | 0  |     newTrie=trie->newTrie;  | 
1235  |  |  | 
1236  |  |     /* find highStart and round it up */  | 
1237  | 0  |     highValue=utrie2_get32(trie, 0x10ffff);  | 
1238  | 0  |     highStart=findHighStart(newTrie, highValue);  | 
1239  | 0  |     highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);  | 
1240  | 0  |     if(highStart==0x110000) { | 
1241  | 0  |         highValue=trie->errorValue;  | 
1242  | 0  |     }  | 
1243  |  |  | 
1244  |  |     /*  | 
1245  |  |      * Set trie->highStart only after utrie2_get32(trie, highStart).  | 
1246  |  |      * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.  | 
1247  |  |      */  | 
1248  | 0  |     trie->highStart=newTrie->highStart=highStart;  | 
1249  |  | 
  | 
1250  |  | #ifdef UTRIE2_DEBUG  | 
1251  |  |     printf("UTrie2: highStart U+%06lx  highValue 0x%lx  initialValue 0x%lx\n", | 
1252  |  |             (long)highStart, (long)highValue, (long)trie->initialValue);  | 
1253  |  | #endif  | 
1254  |  | 
  | 
1255  | 0  |     if(highStart<0x110000) { | 
1256  |  |         /* Blank out [highStart..10ffff] to release associated data blocks. */  | 
1257  | 0  |         suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;  | 
1258  | 0  |         utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);  | 
1259  | 0  |         if(U_FAILURE(*pErrorCode)) { | 
1260  | 0  |             return;  | 
1261  | 0  |         }  | 
1262  | 0  |     }  | 
1263  |  |  | 
1264  | 0  |     compactData(newTrie);  | 
1265  | 0  |     if(highStart>0x10000) { | 
1266  | 0  |         compactIndex2(newTrie);  | 
1267  |  | #ifdef UTRIE2_DEBUG  | 
1268  |  |     } else { | 
1269  |  |         printf("UTrie2: highStart U+%04lx  count of 16-bit index words %lu->%lu\n", | 
1270  |  |                 (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);  | 
1271  |  | #endif  | 
1272  | 0  |     }  | 
1273  |  |  | 
1274  |  |     /*  | 
1275  |  |      * Store the highValue in the data array and round up the dataLength.  | 
1276  |  |      * Must be done after compactData() because that assumes that dataLength  | 
1277  |  |      * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.  | 
1278  |  |      */  | 
1279  | 0  |     newTrie->data[newTrie->dataLength++]=highValue;  | 
1280  | 0  |     while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) { | 
1281  | 0  |         newTrie->data[newTrie->dataLength++]=trie->initialValue;  | 
1282  | 0  |     }  | 
1283  |  | 
  | 
1284  | 0  |     newTrie->isCompacted=TRUE;  | 
1285  | 0  | }  | 
1286  |  |  | 
1287  |  | /* serialization ------------------------------------------------------------ */  | 
1288  |  |  | 
1289  |  | /**  | 
1290  |  |  * Maximum length of the runtime index array.  | 
1291  |  |  * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.  | 
1292  |  |  * (The actual maximum length is lower,  | 
1293  |  |  * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)  | 
1294  |  |  */  | 
1295  | 0  | #define UTRIE2_MAX_INDEX_LENGTH 0xffff  | 
1296  |  |  | 
1297  |  | /**  | 
1298  |  |  * Maximum length of the runtime data array.  | 
1299  |  |  * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,  | 
1300  |  |  * and by uint16_t UTrie2Header.shiftedDataLength.  | 
1301  |  |  */  | 
1302  | 0  | #define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)  | 
1303  |  |  | 
1304  |  | /* Compact and internally serialize the trie. */  | 
1305  |  | U_CAPI void U_EXPORT2  | 
1306  | 0  | utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) { | 
1307  | 0  |     UNewTrie2 *newTrie;  | 
1308  | 0  |     UTrie2Header *header;  | 
1309  | 0  |     uint32_t *p;  | 
1310  | 0  |     uint16_t *dest16;  | 
1311  | 0  |     int32_t i, length;  | 
1312  | 0  |     int32_t allIndexesLength;  | 
1313  | 0  |     int32_t dataMove;  /* >0 if the data is moved to the end of the index array */  | 
1314  | 0  |     UChar32 highStart;  | 
1315  |  |  | 
1316  |  |     /* argument check */  | 
1317  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
1318  | 0  |         return;  | 
1319  | 0  |     }  | 
1320  | 0  |     if( trie==NULL ||  | 
1321  | 0  |         valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits  | 
1322  | 0  |     ) { | 
1323  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1324  | 0  |         return;  | 
1325  | 0  |     }  | 
1326  | 0  |     newTrie=trie->newTrie;  | 
1327  | 0  |     if(newTrie==NULL) { | 
1328  |  |         /* already frozen */  | 
1329  | 0  |         UTrie2ValueBits frozenValueBits=  | 
1330  | 0  |             trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;  | 
1331  | 0  |         if(valueBits!=frozenValueBits) { | 
1332  | 0  |             *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1333  | 0  |         }  | 
1334  | 0  |         return;  | 
1335  | 0  |     }  | 
1336  |  |  | 
1337  |  |     /* compact if necessary */  | 
1338  | 0  |     if(!newTrie->isCompacted) { | 
1339  | 0  |         compactTrie(trie, pErrorCode);  | 
1340  | 0  |         if(U_FAILURE(*pErrorCode)) { | 
1341  | 0  |             return;  | 
1342  | 0  |         }  | 
1343  | 0  |     }  | 
1344  | 0  |     highStart=trie->highStart;  | 
1345  |  | 
  | 
1346  | 0  |     if(highStart<=0x10000) { | 
1347  | 0  |         allIndexesLength=UTRIE2_INDEX_1_OFFSET;  | 
1348  | 0  |     } else { | 
1349  | 0  |         allIndexesLength=newTrie->index2Length;  | 
1350  | 0  |     }  | 
1351  | 0  |     if(valueBits==UTRIE2_16_VALUE_BITS) { | 
1352  | 0  |         dataMove=allIndexesLength;  | 
1353  | 0  |     } else { | 
1354  | 0  |         dataMove=0;  | 
1355  | 0  |     }  | 
1356  |  |  | 
1357  |  |     /* are indexLength and dataLength within limits? */  | 
1358  | 0  |     if( /* for unshifted indexLength */  | 
1359  | 0  |         allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||  | 
1360  |  |         /* for unshifted dataNullOffset */  | 
1361  | 0  |         (dataMove+newTrie->dataNullOffset)>0xffff ||  | 
1362  |  |         /* for unshifted 2-byte UTF-8 index-2 values */  | 
1363  | 0  |         (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||  | 
1364  |  |         /* for shiftedDataLength */  | 
1365  | 0  |         (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH  | 
1366  | 0  |     ) { | 
1367  | 0  |         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
1368  | 0  |         return;  | 
1369  | 0  |     }  | 
1370  |  |  | 
1371  |  |     /* calculate the total serialized length */  | 
1372  | 0  |     length=sizeof(UTrie2Header)+allIndexesLength*2;  | 
1373  | 0  |     if(valueBits==UTRIE2_16_VALUE_BITS) { | 
1374  | 0  |         length+=newTrie->dataLength*2;  | 
1375  | 0  |     } else { | 
1376  | 0  |         length+=newTrie->dataLength*4;  | 
1377  | 0  |     }  | 
1378  |  | 
  | 
1379  | 0  |     trie->memory=uprv_malloc(length);  | 
1380  | 0  |     if(trie->memory==NULL) { | 
1381  | 0  |         *pErrorCode=U_MEMORY_ALLOCATION_ERROR;  | 
1382  | 0  |         return;  | 
1383  | 0  |     }  | 
1384  | 0  |     trie->length=length;  | 
1385  | 0  |     trie->isMemoryOwned=TRUE;  | 
1386  |  | 
  | 
1387  | 0  |     trie->indexLength=allIndexesLength;  | 
1388  | 0  |     trie->dataLength=newTrie->dataLength;  | 
1389  | 0  |     if(highStart<=0x10000) { | 
1390  | 0  |         trie->index2NullOffset=0xffff;  | 
1391  | 0  |     } else { | 
1392  | 0  |         trie->index2NullOffset=static_cast<uint16_t>(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset);  | 
1393  | 0  |     }  | 
1394  | 0  |     trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);  | 
1395  | 0  |     trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;  | 
1396  |  |  | 
1397  |  |     /* set the header fields */  | 
1398  | 0  |     header=(UTrie2Header *)trie->memory;  | 
1399  |  | 
  | 
1400  | 0  |     header->signature=UTRIE2_SIG; /* "Tri2" */  | 
1401  | 0  |     header->options=(uint16_t)valueBits;  | 
1402  |  | 
  | 
1403  | 0  |     header->indexLength=(uint16_t)trie->indexLength;  | 
1404  | 0  |     header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);  | 
1405  | 0  |     header->index2NullOffset=trie->index2NullOffset;  | 
1406  | 0  |     header->dataNullOffset=trie->dataNullOffset;  | 
1407  | 0  |     header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);  | 
1408  |  |  | 
1409  |  |     /* fill the index and data arrays */  | 
1410  | 0  |     dest16=(uint16_t *)(header+1);  | 
1411  | 0  |     trie->index=dest16;  | 
1412  |  |  | 
1413  |  |     /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */  | 
1414  | 0  |     p=(uint32_t *)newTrie->index2;  | 
1415  | 0  |     for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) { | 
1416  | 0  |         *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);  | 
1417  | 0  |     }  | 
1418  |  |  | 
1419  |  |     /* write UTF-8 2-byte index-2 values, not right-shifted */  | 
1420  | 0  |     for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */ | 
1421  | 0  |         *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);  | 
1422  | 0  |     }  | 
1423  | 0  |     for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */ | 
1424  | 0  |         *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);  | 
1425  | 0  |     }  | 
1426  |  | 
  | 
1427  | 0  |     if(highStart>0x10000) { | 
1428  | 0  |         int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;  | 
1429  | 0  |         int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;  | 
1430  |  |  | 
1431  |  |         /* write 16-bit index-1 values for supplementary code points */  | 
1432  | 0  |         p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;  | 
1433  | 0  |         for(i=index1Length; i>0; --i) { | 
1434  | 0  |             *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);  | 
1435  | 0  |         }  | 
1436  |  |  | 
1437  |  |         /*  | 
1438  |  |          * write the index-2 array values for supplementary code points,  | 
1439  |  |          * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove  | 
1440  |  |          */  | 
1441  | 0  |         p=(uint32_t *)newTrie->index2+index2Offset;  | 
1442  | 0  |         for(i=newTrie->index2Length-index2Offset; i>0; --i) { | 
1443  | 0  |             *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);  | 
1444  | 0  |         }  | 
1445  | 0  |     }  | 
1446  |  |  | 
1447  |  |     /* write the 16/32-bit data array */  | 
1448  | 0  |     switch(valueBits) { | 
1449  | 0  |     case UTRIE2_16_VALUE_BITS:  | 
1450  |  |         /* write 16-bit data values */  | 
1451  | 0  |         trie->data16=dest16;  | 
1452  | 0  |         trie->data32=NULL;  | 
1453  | 0  |         p=newTrie->data;  | 
1454  | 0  |         for(i=newTrie->dataLength; i>0; --i) { | 
1455  | 0  |             *dest16++=(uint16_t)*p++;  | 
1456  | 0  |         }  | 
1457  | 0  |         break;  | 
1458  | 0  |     case UTRIE2_32_VALUE_BITS:  | 
1459  |  |         /* write 32-bit data values */  | 
1460  | 0  |         trie->data16=NULL;  | 
1461  | 0  |         trie->data32=(uint32_t *)dest16;  | 
1462  | 0  |         uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4);  | 
1463  | 0  |         break;  | 
1464  | 0  |     default:  | 
1465  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1466  | 0  |         return;  | 
1467  | 0  |     }  | 
1468  |  |  | 
1469  |  | #ifdef UTRIE2_DEBUG  | 
1470  |  |     utrie2_printLengths(trie, "");  | 
1471  |  | #endif  | 
1472  |  |  | 
1473  |  | #ifdef UCPTRIE_DEBUG  | 
1474  |  |     umutablecptrie_setName(newTrie->t3, trie->name);  | 
1475  |  |     ucptrie_close(  | 
1476  |  |         umutablecptrie_buildImmutable(  | 
1477  |  |             newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode));  | 
1478  |  | #endif  | 
1479  |  |     /* Delete the UNewTrie2. */  | 
1480  | 0  |     uprv_free(newTrie->data);  | 
1481  | 0  |     uprv_free(newTrie);  | 
1482  | 0  |     trie->newTrie=NULL;  | 
1483  | 0  | }  |