Coverage Report

Created: 2025-07-11 06:23

/src/icu/source/common/utrie2_builder.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 2001-2014, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*   file name:  utrie2_builder.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2008sep26 (split off from utrie2.c)
16
*   created by: Markus W. Scherer
17
*
18
*   This is a common implementation of a Unicode trie.
19
*   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
20
*   Unicode code points (0..0x10ffff).
21
*   This is the second common version of a Unicode trie (hence the name UTrie2).
22
*   See utrie2.h for a comparison.
23
*
24
*   This file contains only the builder code.
25
*   See utrie2.c for the runtime and enumeration code.
26
*/
27
#ifdef UTRIE2_DEBUG
28
#   include <stdio.h>
29
#endif
30
31
#include "unicode/utypes.h"
32
#include "cmemory.h"
33
#include "utrie2.h"
34
#include "utrie2_impl.h"
35
36
#include "utrie.h" /* for utrie2_fromUTrie() and utrie_swap() */
37
38
/* Implementation notes ----------------------------------------------------- */
39
40
/*
41
 * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
42
 * have been chosen to minimize trie sizes overall.
43
 * Most of the code is flexible enough to work with a range of values,
44
 * within certain limits.
45
 *
46
 * Exception: Support for separate values for lead surrogate code _units_
47
 * vs. code _points_ was added after the constants were fixed,
48
 * and has not been tested nor particularly designed for different constant values.
49
 * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
50
 * part and back.)
51
 *
52
 * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
53
 * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
54
 * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
55
 * remapping) stops working.
56
 *
57
 * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
58
 * assumes that a single index-2 block is used for 0x400 code points
59
 * corresponding to one lead surrogate.
60
 *
61
 * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
62
 * more than one Unicode plane, and the split of the index-2 table into a BMP
63
 * part and a supplementary part, with a gap in between, would not work.
64
 *
65
 * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
66
 * there is data with more than 64k distinct values,
67
 * for example for Unihan collation with a separate collation weight per
68
 * Han character.
69
 */
70
71
/* Building a trie ----------------------------------------------------------*/
72
73
enum {
74
    /** The null index-2 block, following the gap in the index-2 table. */
75
    UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
76
77
    /** The start of allocated index-2 blocks. */
78
    UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
79
80
    /**
81
     * The null data block.
82
     * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
83
     * to work with 6-bit trail bytes from 2-byte UTF-8.
84
     */
85
    UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
86
87
    /** The start of allocated data blocks. */
88
    UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
89
90
    /**
91
     * The start of data blocks for U+0800 and above.
92
     * Below, compaction uses a block length of 64 for 2-byte UTF-8.
93
     * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
94
     * Data values for 0x780 code points beyond ASCII.
95
     */
96
    UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
97
};
98
99
/* Start with allocation of 16k data entries. */
100
0
#define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
101
102
/* Grow about 8x each time. */
103
0
#define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
104
105
static int32_t
106
allocIndex2Block(UNewTrie2 *trie);
107
108
U_CAPI UTrie2 * U_EXPORT2
109
0
utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
110
0
    UTrie2 *trie;
111
0
    UNewTrie2 *newTrie;
112
0
    uint32_t *data;
113
0
    int32_t i, j;
114
115
0
    if(U_FAILURE(*pErrorCode)) {
116
0
        return NULL;
117
0
    }
118
119
0
    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
120
0
    newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
121
0
    data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
122
0
    if(trie==NULL || newTrie==NULL || data==NULL) {
123
0
        uprv_free(trie);
124
0
        uprv_free(newTrie);
125
0
        uprv_free(data);
126
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
127
0
        return 0;
128
0
    }
129
130
0
    uprv_memset(trie, 0, sizeof(UTrie2));
131
0
    trie->initialValue=initialValue;
132
0
    trie->errorValue=errorValue;
133
0
    trie->highStart=0x110000;
134
0
    trie->newTrie=newTrie;
135
136
0
    newTrie->data=data;
137
0
    newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
138
0
    newTrie->initialValue=initialValue;
139
0
    newTrie->errorValue=errorValue;
140
0
    newTrie->highStart=0x110000;
141
0
    newTrie->firstFreeBlock=0;  /* no free block in the list */
142
0
    newTrie->isCompacted=FALSE;
143
144
    /*
145
     * preallocate and reset
146
     * - ASCII
147
     * - the bad-UTF-8-data block
148
     * - the null data block
149
     */
150
0
    for(i=0; i<0x80; ++i) {
151
0
        newTrie->data[i]=initialValue;
152
0
    }
153
0
    for(; i<0xc0; ++i) {
154
0
        newTrie->data[i]=errorValue;
155
0
    }
156
0
    for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
157
0
        newTrie->data[i]=initialValue;
158
0
    }
159
0
    newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
160
0
    newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
161
162
    /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
163
0
    for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
164
0
        newTrie->index2[i]=j;
165
0
        newTrie->map[i]=1;
166
0
    }
167
    /* reference counts for the bad-UTF-8-data block */
168
0
    for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
169
0
        newTrie->map[i]=0;
170
0
    }
171
    /*
172
     * Reference counts for the null data block: all blocks except for the ASCII blocks.
173
     * Plus 1 so that we don't drop this block during compaction.
174
     * Plus as many as needed for lead surrogate code points.
175
     */
176
    /* i==newTrie->dataNullOffset */
177
0
    newTrie->map[i++]=
178
0
        (0x110000>>UTRIE2_SHIFT_2)-
179
0
        (0x80>>UTRIE2_SHIFT_2)+
180
0
        1+
181
0
        UTRIE2_LSCP_INDEX_2_LENGTH;
182
0
    j+=UTRIE2_DATA_BLOCK_LENGTH;
183
0
    for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
184
0
        newTrie->map[i]=0;
185
0
    }
186
187
    /*
188
     * set the remaining indexes in the BMP index-2 block
189
     * to the null data block
190
     */
191
0
    for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
192
0
        newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
193
0
    }
194
195
    /*
196
     * Fill the index gap with impossible values so that compaction
197
     * does not overlap other index-2 blocks with the gap.
198
     */
199
0
    for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
200
0
        newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
201
0
    }
202
203
    /* set the indexes in the null index-2 block */
204
0
    for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
205
0
        newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
206
0
    }
207
0
    newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
208
0
    newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
209
210
    /* set the index-1 indexes for the linear index-2 block */
211
0
    for(i=0, j=0;
212
0
        i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
213
0
        ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
214
0
    ) {
215
0
        newTrie->index1[i]=j;
216
0
    }
217
218
    /* set the remaining index-1 indexes to the null index-2 block */
219
0
    for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
220
0
        newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
221
0
    }
222
223
    /*
224
     * Preallocate and reset data for U+0080..U+07ff,
225
     * for 2-byte UTF-8 which will be compacted in 64-blocks
226
     * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
227
     */
228
0
    for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
229
0
        utrie2_set32(trie, i, initialValue, pErrorCode);
230
0
    }
231
232
0
    return trie;
233
0
}
234
235
static UNewTrie2 *
236
0
cloneBuilder(const UNewTrie2 *other) {
237
0
    UNewTrie2 *trie;
238
239
0
    trie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
240
0
    if(trie==NULL) {
241
0
        return NULL;
242
0
    }
243
244
0
    trie->data=(uint32_t *)uprv_malloc(other->dataCapacity*4);
245
0
    if(trie->data==NULL) {
246
0
        uprv_free(trie);
247
0
        return NULL;
248
0
    }
249
0
    trie->dataCapacity=other->dataCapacity;
250
251
    /* clone data */
252
0
    uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
253
0
    uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4);
254
0
    trie->index2NullOffset=other->index2NullOffset;
255
0
    trie->index2Length=other->index2Length;
256
257
0
    uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);
258
0
    trie->dataNullOffset=other->dataNullOffset;
259
0
    trie->dataLength=other->dataLength;
260
261
    /* reference counters */
262
0
    if(other->isCompacted) {
263
0
        trie->firstFreeBlock=0;
264
0
    } else {
265
0
        uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4);
266
0
        trie->firstFreeBlock=other->firstFreeBlock;
267
0
    }
268
269
0
    trie->initialValue=other->initialValue;
270
0
    trie->errorValue=other->errorValue;
271
0
    trie->highStart=other->highStart;
272
0
    trie->isCompacted=other->isCompacted;
273
274
0
    return trie;
275
0
}
276
277
U_CAPI UTrie2 * U_EXPORT2
278
0
utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
279
0
    UTrie2 *trie;
280
281
0
    if(U_FAILURE(*pErrorCode)) {
282
0
        return NULL;
283
0
    }
284
0
    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
285
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
286
0
        return NULL;
287
0
    }
288
289
0
    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
290
0
    if(trie==NULL) {
291
0
        return NULL;
292
0
    }
293
0
    uprv_memcpy(trie, other, sizeof(UTrie2));
294
295
0
    if(other->memory!=NULL) {
296
0
        trie->memory=uprv_malloc(other->length);
297
0
        if(trie->memory!=NULL) {
298
0
            trie->isMemoryOwned=TRUE;
299
0
            uprv_memcpy(trie->memory, other->memory, other->length);
300
301
            /* make the clone's pointers point to its own memory */
302
0
            trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
303
0
            if(other->data16!=NULL) {
304
0
                trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
305
0
            }
306
0
            if(other->data32!=NULL) {
307
0
                trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
308
0
            }
309
0
        }
310
0
    } else /* other->newTrie!=NULL */ {
311
0
        trie->newTrie=cloneBuilder(other->newTrie);
312
0
    }
313
314
0
    if(trie->memory==NULL && trie->newTrie==NULL) {
315
0
        uprv_free(trie);
316
0
        trie=NULL;
317
0
    }
318
0
    return trie;
319
0
}
320
321
typedef struct NewTrieAndStatus {
322
    UTrie2 *trie;
323
    UErrorCode errorCode;
324
    UBool exclusiveLimit;  /* rather than inclusive range end */
325
} NewTrieAndStatus;
326
327
static UBool U_CALLCONV
328
0
copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
329
0
    NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
330
0
    if(value!=nt->trie->initialValue) {
331
0
        if(nt->exclusiveLimit) {
332
0
            --end;
333
0
        }
334
0
        if(start==end) {
335
0
            utrie2_set32(nt->trie, start, value, &nt->errorCode);
336
0
        } else {
337
0
            utrie2_setRange32(nt->trie, start, end, value, TRUE, &nt->errorCode);
338
0
        }
339
0
        return U_SUCCESS(nt->errorCode);
340
0
    } else {
341
0
        return TRUE;
342
0
    }
343
0
}
344
345
#ifdef UTRIE2_DEBUG
346
static void
347
utrie_printLengths(const UTrie *trie) {
348
    long indexLength=trie->indexLength;
349
    long dataLength=(long)trie->dataLength;
350
    long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
351
    printf("**UTrieLengths** index:%6ld  data:%6ld  serialized:%6ld\n",
352
           indexLength, dataLength, totalLength);
353
}
354
355
static void
356
utrie2_printLengths(const UTrie2 *trie, const char *which) {
357
    long indexLength=trie->indexLength;
358
    long dataLength=(long)trie->dataLength;
359
    long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=NULL ? 4 : 2);
360
    printf("**UTrie2Lengths(%s)** index:%6ld  data:%6ld  serialized:%6ld\n",
361
           which, indexLength, dataLength, totalLength);
362
}
363
#endif
364
365
U_CAPI UTrie2 * U_EXPORT2
366
0
utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
367
0
    NewTrieAndStatus context;
368
0
    UChar lead;
369
370
0
    if(U_FAILURE(*pErrorCode)) {
371
0
        return NULL;
372
0
    }
373
0
    if(other==NULL || (other->memory==NULL && other->newTrie==NULL)) {
374
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
375
0
        return NULL;
376
0
    }
377
0
    if(other->newTrie!=NULL && !other->newTrie->isCompacted) {
378
0
        return utrie2_clone(other, pErrorCode);  /* clone an unfrozen trie */
379
0
    }
380
381
    /* Clone the frozen trie by enumerating it and building a new one. */
382
0
    context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
383
0
    if(U_FAILURE(*pErrorCode)) {
384
0
        return NULL;
385
0
    }
386
0
    context.exclusiveLimit=FALSE;
387
0
    context.errorCode=*pErrorCode;
388
0
    utrie2_enum(other, NULL, copyEnumRange, &context);
389
0
    *pErrorCode=context.errorCode;
390
0
    for(lead=0xd800; lead<0xdc00; ++lead) {
391
0
        uint32_t value;
392
0
        if(other->data32==NULL) {
393
0
            value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
394
0
        } else {
395
0
            value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
396
0
        }
397
0
        if(value!=other->initialValue) {
398
0
            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
399
0
        }
400
0
    }
401
0
    if(U_FAILURE(*pErrorCode)) {
402
0
        utrie2_close(context.trie);
403
0
        context.trie=NULL;
404
0
    }
405
0
    return context.trie;
406
0
}
407
408
/* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
409
U_CAPI UTrie2 * U_EXPORT2
410
0
utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
411
0
    NewTrieAndStatus context;
412
0
    UChar lead;
413
414
0
    if(U_FAILURE(*pErrorCode)) {
415
0
        return NULL;
416
0
    }
417
0
    if(trie1==NULL) {
418
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
419
0
        return NULL;
420
0
    }
421
0
    context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
422
0
    if(U_FAILURE(*pErrorCode)) {
423
0
        return NULL;
424
0
    }
425
0
    context.exclusiveLimit=TRUE;
426
0
    context.errorCode=*pErrorCode;
427
0
    utrie_enum(trie1, NULL, copyEnumRange, &context);
428
0
    *pErrorCode=context.errorCode;
429
0
    for(lead=0xd800; lead<0xdc00; ++lead) {
430
0
        uint32_t value;
431
0
        if(trie1->data32==NULL) {
432
0
            value=UTRIE_GET16_FROM_LEAD(trie1, lead);
433
0
        } else {
434
0
            value=UTRIE_GET32_FROM_LEAD(trie1, lead);
435
0
        }
436
0
        if(value!=trie1->initialValue) {
437
0
            utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
438
0
        }
439
0
    }
440
0
    if(U_SUCCESS(*pErrorCode)) {
441
0
        utrie2_freeze(context.trie,
442
0
                      trie1->data32!=NULL ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
443
0
                      pErrorCode);
444
0
    }
445
#ifdef UTRIE2_DEBUG
446
    if(U_SUCCESS(*pErrorCode)) {
447
        utrie_printLengths(trie1);
448
        utrie2_printLengths(context.trie, "fromUTrie");
449
    }
450
#endif
451
0
    if(U_FAILURE(*pErrorCode)) {
452
0
        utrie2_close(context.trie);
453
0
        context.trie=NULL;
454
0
    }
455
0
    return context.trie;
456
0
}
457
458
static inline UBool
459
0
isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
460
0
    int32_t i2, block;
461
462
0
    if(U_IS_LEAD(c) && forLSCP) {
463
0
        i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
464
0
            (c>>UTRIE2_SHIFT_2);
465
0
    } else {
466
0
        i2=trie->index1[c>>UTRIE2_SHIFT_1]+
467
0
            ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
468
0
    }
469
0
    block=trie->index2[i2];
470
0
    return (UBool)(block==trie->dataNullOffset);
471
0
}
472
473
static int32_t
474
0
allocIndex2Block(UNewTrie2 *trie) {
475
0
    int32_t newBlock, newTop;
476
477
0
    newBlock=trie->index2Length;
478
0
    newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
479
0
    if(newTop>UPRV_LENGTHOF(trie->index2)) {
480
        /*
481
         * Should never occur.
482
         * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
483
         * or the code writes more values than should be possible.
484
         */
485
0
        return -1;
486
0
    }
487
0
    trie->index2Length=newTop;
488
0
    uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
489
0
    return newBlock;
490
0
}
491
492
static int32_t
493
0
getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
494
0
    int32_t i1, i2;
495
496
0
    if(U_IS_LEAD(c) && forLSCP) {
497
0
        return UTRIE2_LSCP_INDEX_2_OFFSET;
498
0
    }
499
500
0
    i1=c>>UTRIE2_SHIFT_1;
501
0
    i2=trie->index1[i1];
502
0
    if(i2==trie->index2NullOffset) {
503
0
        i2=allocIndex2Block(trie);
504
0
        if(i2<0) {
505
0
            return -1;  /* program error */
506
0
        }
507
0
        trie->index1[i1]=i2;
508
0
    }
509
0
    return i2;
510
0
}
511
512
static int32_t
513
0
allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
514
0
    int32_t newBlock, newTop;
515
516
0
    if(trie->firstFreeBlock!=0) {
517
        /* get the first free block */
518
0
        newBlock=trie->firstFreeBlock;
519
0
        trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
520
0
    } else {
521
        /* get a new block from the high end */
522
0
        newBlock=trie->dataLength;
523
0
        newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
524
0
        if(newTop>trie->dataCapacity) {
525
            /* out of memory in the data array */
526
0
            int32_t capacity;
527
0
            uint32_t *data;
528
529
0
            if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
530
0
                capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
531
0
            } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
532
0
                capacity=UNEWTRIE2_MAX_DATA_LENGTH;
533
0
            } else {
534
                /*
535
                 * Should never occur.
536
                 * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
537
                 * or the code writes more values than should be possible.
538
                 */
539
0
                return -1;
540
0
            }
541
0
            data=(uint32_t *)uprv_malloc(capacity*4);
542
0
            if(data==NULL) {
543
0
                return -1;
544
0
            }
545
0
            uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4);
546
0
            uprv_free(trie->data);
547
0
            trie->data=data;
548
0
            trie->dataCapacity=capacity;
549
0
        }
550
0
        trie->dataLength=newTop;
551
0
    }
552
0
    uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
553
0
    trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
554
0
    return newBlock;
555
0
}
556
557
/* call when the block's reference counter reaches 0 */
558
static void
559
0
releaseDataBlock(UNewTrie2 *trie, int32_t block) {
560
    /* put this block at the front of the free-block chain */
561
0
    trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
562
0
    trie->firstFreeBlock=block;
563
0
}
564
565
static inline UBool
566
0
isWritableBlock(UNewTrie2 *trie, int32_t block) {
567
0
    return (UBool)(block!=trie->dataNullOffset && 1==trie->map[block>>UTRIE2_SHIFT_2]);
568
0
}
569
570
static inline void
571
0
setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
572
0
    int32_t oldBlock;
573
0
    ++trie->map[block>>UTRIE2_SHIFT_2];  /* increment first, in case block==oldBlock! */
574
0
    oldBlock=trie->index2[i2];
575
0
    if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
576
0
        releaseDataBlock(trie, oldBlock);
577
0
    }
578
0
    trie->index2[i2]=block;
579
0
}
580
581
/**
582
 * No error checking for illegal arguments.
583
 *
584
 * @return -1 if no new data block available (out of memory in data array)
585
 * @internal
586
 */
587
static int32_t
588
0
getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
589
0
    int32_t i2, oldBlock, newBlock;
590
591
0
    i2=getIndex2Block(trie, c, forLSCP);
592
0
    if(i2<0) {
593
0
        return -1;  /* program error */
594
0
    }
595
596
0
    i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
597
0
    oldBlock=trie->index2[i2];
598
0
    if(isWritableBlock(trie, oldBlock)) {
599
0
        return oldBlock;
600
0
    }
601
602
    /* allocate a new data block */
603
0
    newBlock=allocDataBlock(trie, oldBlock);
604
0
    if(newBlock<0) {
605
        /* out of memory in the data array */
606
0
        return -1;
607
0
    }
608
0
    setIndex2Entry(trie, i2, newBlock);
609
0
    return newBlock;
610
0
}
611
612
/**
613
 * @return TRUE if the value was successfully set
614
 */
615
static void
616
set32(UNewTrie2 *trie,
617
      UChar32 c, UBool forLSCP, uint32_t value,
618
0
      UErrorCode *pErrorCode) {
619
0
    int32_t block;
620
621
0
    if(trie==NULL || trie->isCompacted) {
622
0
        *pErrorCode=U_NO_WRITE_PERMISSION;
623
0
        return;
624
0
    }
625
626
0
    block=getDataBlock(trie, c, forLSCP);
627
0
    if(block<0) {
628
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
629
0
        return;
630
0
    }
631
632
0
    trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
633
0
}
634
635
U_CAPI void U_EXPORT2
636
0
utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
637
0
    if(U_FAILURE(*pErrorCode)) {
638
0
        return;
639
0
    }
640
0
    if((uint32_t)c>0x10ffff) {
641
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
642
0
        return;
643
0
    }
644
0
    set32(trie->newTrie, c, TRUE, value, pErrorCode);
645
0
}
646
647
U_CAPI void U_EXPORT2
648
utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
649
                                     UChar32 c, uint32_t value,
650
0
                                     UErrorCode *pErrorCode) {
651
0
    if(U_FAILURE(*pErrorCode)) {
652
0
        return;
653
0
    }
654
0
    if(!U_IS_LEAD(c)) {
655
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
656
0
        return;
657
0
    }
658
0
    set32(trie->newTrie, c, FALSE, value, pErrorCode);
659
0
}
660
661
static void
662
0
writeBlock(uint32_t *block, uint32_t value) {
663
0
    uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
664
0
    while(block<limit) {
665
0
        *block++=value;
666
0
    }
667
0
}
668
669
/**
670
 * initialValue is ignored if overwrite=TRUE
671
 * @internal
672
 */
673
static void
674
fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
675
0
          uint32_t value, uint32_t initialValue, UBool overwrite) {
676
0
    uint32_t *pLimit;
677
678
0
    pLimit=block+limit;
679
0
    block+=start;
680
0
    if(overwrite) {
681
0
        while(block<pLimit) {
682
0
            *block++=value;
683
0
        }
684
0
    } else {
685
0
        while(block<pLimit) {
686
0
            if(*block==initialValue) {
687
0
                *block=value;
688
0
            }
689
0
            ++block;
690
0
        }
691
0
    }
692
0
}
693
694
U_CAPI void U_EXPORT2
695
utrie2_setRange32(UTrie2 *trie,
696
                  UChar32 start, UChar32 end,
697
                  uint32_t value, UBool overwrite,
698
0
                  UErrorCode *pErrorCode) {
699
    /*
700
     * repeat value in [start..end]
701
     * mark index values for repeat-data blocks by setting bit 31 of the index values
702
     * fill around existing values if any, if(overwrite)
703
     */
704
0
    UNewTrie2 *newTrie;
705
0
    int32_t block, rest, repeatBlock;
706
0
    UChar32 limit;
707
708
0
    if(U_FAILURE(*pErrorCode)) {
709
0
        return;
710
0
    }
711
0
    if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
712
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
713
0
        return;
714
0
    }
715
0
    newTrie=trie->newTrie;
716
0
    if(newTrie==NULL || newTrie->isCompacted) {
717
0
        *pErrorCode=U_NO_WRITE_PERMISSION;
718
0
        return;
719
0
    }
720
0
    if(!overwrite && value==newTrie->initialValue) {
721
0
        return; /* nothing to do */
722
0
    }
723
724
0
    limit=end+1;
725
0
    if(start&UTRIE2_DATA_MASK) {
726
0
        UChar32 nextStart;
727
728
        /* set partial block at [start..following block boundary[ */
729
0
        block=getDataBlock(newTrie, start, TRUE);
730
0
        if(block<0) {
731
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
732
0
            return;
733
0
        }
734
735
0
        nextStart=(start+UTRIE2_DATA_BLOCK_LENGTH)&~UTRIE2_DATA_MASK;
736
0
        if(nextStart<=limit) {
737
0
            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
738
0
                      value, newTrie->initialValue, overwrite);
739
0
            start=nextStart;
740
0
        } else {
741
0
            fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
742
0
                      value, newTrie->initialValue, overwrite);
743
0
            return;
744
0
        }
745
0
    }
746
747
    /* number of positions in the last, partial block */
748
0
    rest=limit&UTRIE2_DATA_MASK;
749
750
    /* round down limit to a block boundary */
751
0
    limit&=~UTRIE2_DATA_MASK;
752
753
    /* iterate over all-value blocks */
754
0
    if(value==newTrie->initialValue) {
755
0
        repeatBlock=newTrie->dataNullOffset;
756
0
    } else {
757
0
        repeatBlock=-1;
758
0
    }
759
760
0
    while(start<limit) {
761
0
        int32_t i2;
762
0
        UBool setRepeatBlock=FALSE;
763
764
0
        if(value==newTrie->initialValue && isInNullBlock(newTrie, start, TRUE)) {
765
0
            start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
766
0
            continue;
767
0
        }
768
769
        /* get index value */
770
0
        i2=getIndex2Block(newTrie, start, TRUE);
771
0
        if(i2<0) {
772
0
            *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
773
0
            return;
774
0
        }
775
0
        i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
776
0
        block=newTrie->index2[i2];
777
0
        if(isWritableBlock(newTrie, block)) {
778
            /* already allocated */
779
0
            if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
780
                /*
781
                 * We overwrite all values, and it's not a
782
                 * protected (ASCII-linear or 2-byte UTF-8) block:
783
                 * replace with the repeatBlock.
784
                 */
785
0
                setRepeatBlock=TRUE;
786
0
            } else {
787
                /* !overwrite, or protected block: just write the values into this block */
788
0
                fillBlock(newTrie->data+block,
789
0
                          0, UTRIE2_DATA_BLOCK_LENGTH,
790
0
                          value, newTrie->initialValue, overwrite);
791
0
            }
792
0
        } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
793
            /*
794
             * Set the repeatBlock instead of the null block or previous repeat block:
795
             *
796
             * If !isWritableBlock() then all entries in the block have the same value
797
             * because it's the null block or a range block (the repeatBlock from a previous
798
             * call to utrie2_setRange32()).
799
             * No other blocks are used multiple times before compacting.
800
             *
801
             * The null block is the only non-writable block with the initialValue because
802
             * of the repeatBlock initialization above. (If value==initialValue, then
803
             * the repeatBlock will be the null data block.)
804
             *
805
             * We set our repeatBlock if the desired value differs from the block's value,
806
             * and if we overwrite any data or if the data is all initial values
807
             * (which is the same as the block being the null block, see above).
808
             */
809
0
            setRepeatBlock=TRUE;
810
0
        }
811
0
        if(setRepeatBlock) {
812
0
            if(repeatBlock>=0) {
813
0
                setIndex2Entry(newTrie, i2, repeatBlock);
814
0
            } else {
815
                /* create and set and fill the repeatBlock */
816
0
                repeatBlock=getDataBlock(newTrie, start, TRUE);
817
0
                if(repeatBlock<0) {
818
0
                    *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
819
0
                    return;
820
0
                }
821
0
                writeBlock(newTrie->data+repeatBlock, value);
822
0
            }
823
0
        }
824
825
0
        start+=UTRIE2_DATA_BLOCK_LENGTH;
826
0
    }
827
828
0
    if(rest>0) {
829
        /* set partial block at [last block boundary..limit[ */
830
0
        block=getDataBlock(newTrie, start, TRUE);
831
0
        if(block<0) {
832
0
            *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
833
0
            return;
834
0
        }
835
836
0
        fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
837
0
    }
838
839
0
    return;
840
0
}
841
842
/* compaction --------------------------------------------------------------- */
843
844
static inline UBool
845
0
equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
846
0
    while(length>0 && *s==*t) {
847
0
        ++s;
848
0
        ++t;
849
0
        --length;
850
0
    }
851
0
    return (UBool)(length==0);
852
0
}
853
854
static inline UBool
855
0
equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
856
0
    while(length>0 && *s==*t) {
857
0
        ++s;
858
0
        ++t;
859
0
        --length;
860
0
    }
861
0
    return (UBool)(length==0);
862
0
}
863
864
static int32_t
865
0
findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
866
0
    int32_t block;
867
868
    /* ensure that we do not even partially get past index2Length */
869
0
    index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
870
871
0
    for(block=0; block<=index2Length; ++block) {
872
0
        if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
873
0
            return block;
874
0
        }
875
0
    }
876
0
    return -1;
877
0
}
878
879
static int32_t
880
0
findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
881
0
    int32_t block;
882
883
    /* ensure that we do not even partially get past dataLength */
884
0
    dataLength-=blockLength;
885
886
0
    for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
887
0
        if(equal_uint32(data+block, data+otherBlock, blockLength)) {
888
0
            return block;
889
0
        }
890
0
    }
891
0
    return -1;
892
0
}
893
894
/*
895
 * Find the start of the last range in the trie by enumerating backward.
896
 * Indexes for supplementary code points higher than this will be omitted.
897
 */
898
static UChar32
899
0
findHighStart(UNewTrie2 *trie, uint32_t highValue) {
900
0
    const uint32_t *data32;
901
902
0
    uint32_t value, initialValue;
903
0
    UChar32 c, prev;
904
0
    int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
905
906
0
    data32=trie->data;
907
0
    initialValue=trie->initialValue;
908
909
0
    index2NullOffset=trie->index2NullOffset;
910
0
    nullBlock=trie->dataNullOffset;
911
912
    /* set variables for previous range */
913
0
    if(highValue==initialValue) {
914
0
        prevI2Block=index2NullOffset;
915
0
        prevBlock=nullBlock;
916
0
    } else {
917
0
        prevI2Block=-1;
918
0
        prevBlock=-1;
919
0
    }
920
0
    prev=0x110000;
921
922
    /* enumerate index-2 blocks */
923
0
    i1=UNEWTRIE2_INDEX_1_LENGTH;
924
0
    c=prev;
925
0
    while(c>0) {
926
0
        i2Block=trie->index1[--i1];
927
0
        if(i2Block==prevI2Block) {
928
            /* the index-2 block is the same as the previous one, and filled with highValue */
929
0
            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
930
0
            continue;
931
0
        }
932
0
        prevI2Block=i2Block;
933
0
        if(i2Block==index2NullOffset) {
934
            /* this is the null index-2 block */
935
0
            if(highValue!=initialValue) {
936
0
                return c;
937
0
            }
938
0
            c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
939
0
        } else {
940
            /* enumerate data blocks for one index-2 block */
941
0
            for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
942
0
                block=trie->index2[i2Block+ --i2];
943
0
                if(block==prevBlock) {
944
                    /* the block is the same as the previous one, and filled with highValue */
945
0
                    c-=UTRIE2_DATA_BLOCK_LENGTH;
946
0
                    continue;
947
0
                }
948
0
                prevBlock=block;
949
0
                if(block==nullBlock) {
950
                    /* this is the null data block */
951
0
                    if(highValue!=initialValue) {
952
0
                        return c;
953
0
                    }
954
0
                    c-=UTRIE2_DATA_BLOCK_LENGTH;
955
0
                } else {
956
0
                    for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
957
0
                        value=data32[block+ --j];
958
0
                        if(value!=highValue) {
959
0
                            return c;
960
0
                        }
961
0
                        --c;
962
0
                    }
963
0
                }
964
0
            }
965
0
        }
966
0
    }
967
968
    /* deliver last range */
969
0
    return 0;
970
0
}
971
972
/*
973
 * Compact a build-time trie.
974
 *
975
 * The compaction
976
 * - removes blocks that are identical with earlier ones
977
 * - overlaps adjacent blocks as much as possible (if overlap==TRUE)
978
 * - moves blocks in steps of the data granularity
979
 * - moves and overlaps blocks that overlap with multiple values in the overlap region
980
 *
981
 * It does not
982
 * - try to move and overlap blocks that are not already adjacent
983
 */
984
static void
985
0
compactData(UNewTrie2 *trie) {
986
0
    int32_t start, newStart, movedStart;
987
0
    int32_t blockLength, overlap;
988
0
    int32_t i, mapIndex, blockCount;
989
990
    /* do not compact linear-ASCII data */
991
0
    newStart=UTRIE2_DATA_START_OFFSET;
992
0
    for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
993
0
        trie->map[i]=start;
994
0
    }
995
996
    /*
997
     * Start with a block length of 64 for 2-byte UTF-8,
998
     * then switch to UTRIE2_DATA_BLOCK_LENGTH.
999
     */
1000
0
    blockLength=64;
1001
0
    blockCount=blockLength>>UTRIE2_SHIFT_2;
1002
0
    for(start=newStart; start<trie->dataLength;) {
1003
        /*
1004
         * start: index of first entry of current block
1005
         * newStart: index where the current block is to be moved
1006
         *           (right after current end of already-compacted data)
1007
         */
1008
0
        if(start==UNEWTRIE2_DATA_0800_OFFSET) {
1009
0
            blockLength=UTRIE2_DATA_BLOCK_LENGTH;
1010
0
            blockCount=1;
1011
0
        }
1012
1013
        /* skip blocks that are not used */
1014
0
        if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
1015
            /* advance start to the next block */
1016
0
            start+=blockLength;
1017
1018
            /* leave newStart with the previous block! */
1019
0
            continue;
1020
0
        }
1021
1022
        /* search for an identical block */
1023
0
        if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
1024
0
             >=0
1025
0
        ) {
1026
            /* found an identical block, set the other block's index value for the current block */
1027
0
            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
1028
0
                trie->map[mapIndex++]=movedStart;
1029
0
                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
1030
0
            }
1031
1032
            /* advance start to the next block */
1033
0
            start+=blockLength;
1034
1035
            /* leave newStart with the previous block! */
1036
0
            continue;
1037
0
        }
1038
1039
        /* see if the beginning of this block can be overlapped with the end of the previous block */
1040
        /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
1041
0
        for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
1042
0
            overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
1043
0
            overlap-=UTRIE2_DATA_GRANULARITY) {}
1044
1045
0
        if(overlap>0 || newStart<start) {
1046
            /* some overlap, or just move the whole block */
1047
0
            movedStart=newStart-overlap;
1048
0
            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
1049
0
                trie->map[mapIndex++]=movedStart;
1050
0
                movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
1051
0
            }
1052
1053
            /* move the non-overlapping indexes to their new positions */
1054
0
            start+=overlap;
1055
0
            for(i=blockLength-overlap; i>0; --i) {
1056
0
                trie->data[newStart++]=trie->data[start++];
1057
0
            }
1058
0
        } else /* no overlap && newStart==start */ {
1059
0
            for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
1060
0
                trie->map[mapIndex++]=start;
1061
0
                start+=UTRIE2_DATA_BLOCK_LENGTH;
1062
0
            }
1063
0
            newStart=start;
1064
0
        }
1065
0
    }
1066
1067
    /* now adjust the index-2 table */
1068
0
    for(i=0; i<trie->index2Length; ++i) {
1069
0
        if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
1070
            /* Gap indexes are invalid (-1). Skip over the gap. */
1071
0
            i+=UNEWTRIE2_INDEX_GAP_LENGTH;
1072
0
        }
1073
0
        trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
1074
0
    }
1075
0
    trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
1076
1077
    /* ensure dataLength alignment */
1078
0
    while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
1079
0
        trie->data[newStart++]=trie->initialValue;
1080
0
    }
1081
1082
#ifdef UTRIE2_DEBUG
1083
    /* we saved some space */
1084
    printf("compacting UTrie2: count of 32-bit data words %lu->%lu\n",
1085
            (long)trie->dataLength, (long)newStart);
1086
#endif
1087
1088
0
    trie->dataLength=newStart;
1089
0
}
1090
1091
static void
1092
0
compactIndex2(UNewTrie2 *trie) {
1093
0
    int32_t i, start, newStart, movedStart, overlap;
1094
1095
    /* do not compact linear-BMP index-2 blocks */
1096
0
    newStart=UTRIE2_INDEX_2_BMP_LENGTH;
1097
0
    for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
1098
0
        trie->map[i]=start;
1099
0
    }
1100
1101
    /* Reduce the index table gap to what will be needed at runtime. */
1102
0
    newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
1103
1104
0
    for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
1105
        /*
1106
         * start: index of first entry of current block
1107
         * newStart: index where the current block is to be moved
1108
         *           (right after current end of already-compacted data)
1109
         */
1110
1111
        /* search for an identical block */
1112
0
        if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
1113
0
             >=0
1114
0
        ) {
1115
            /* found an identical block, set the other block's index value for the current block */
1116
0
            trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
1117
1118
            /* advance start to the next block */
1119
0
            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
1120
1121
            /* leave newStart with the previous block! */
1122
0
            continue;
1123
0
        }
1124
1125
        /* see if the beginning of this block can be overlapped with the end of the previous block */
1126
        /* look for maximum overlap with the previous, adjacent block */
1127
0
        for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
1128
0
            overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
1129
0
            --overlap) {}
1130
1131
0
        if(overlap>0 || newStart<start) {
1132
            /* some overlap, or just move the whole block */
1133
0
            trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
1134
1135
            /* move the non-overlapping indexes to their new positions */
1136
0
            start+=overlap;
1137
0
            for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
1138
0
                trie->index2[newStart++]=trie->index2[start++];
1139
0
            }
1140
0
        } else /* no overlap && newStart==start */ {
1141
0
            trie->map[start>>UTRIE2_SHIFT_1_2]=start;
1142
0
            start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
1143
0
            newStart=start;
1144
0
        }
1145
0
    }
1146
1147
    /* now adjust the index-1 table */
1148
0
    for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
1149
0
        trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
1150
0
    }
1151
0
    trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
1152
1153
    /*
1154
     * Ensure data table alignment:
1155
     * Needs to be granularity-aligned for 16-bit trie
1156
     * (so that dataMove will be down-shiftable),
1157
     * and 2-aligned for uint32_t data.
1158
     */
1159
0
    while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
1160
        /* Arbitrary value: 0x3fffc not possible for real data. */
1161
0
        trie->index2[newStart++]=(int32_t)0xffff<<UTRIE2_INDEX_SHIFT;
1162
0
    }
1163
1164
#ifdef UTRIE2_DEBUG
1165
    /* we saved some space */
1166
    printf("compacting UTrie2: count of 16-bit index-2 words %lu->%lu\n",
1167
            (long)trie->index2Length, (long)newStart);
1168
#endif
1169
1170
0
    trie->index2Length=newStart;
1171
0
}
1172
1173
static void
1174
0
compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
1175
0
    UNewTrie2 *newTrie;
1176
0
    UChar32 highStart, suppHighStart;
1177
0
    uint32_t highValue;
1178
1179
0
    newTrie=trie->newTrie;
1180
1181
    /* find highStart and round it up */
1182
0
    highValue=utrie2_get32(trie, 0x10ffff);
1183
0
    highStart=findHighStart(newTrie, highValue);
1184
0
    highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
1185
0
    if(highStart==0x110000) {
1186
0
        highValue=trie->errorValue;
1187
0
    }
1188
1189
    /*
1190
     * Set trie->highStart only after utrie2_get32(trie, highStart).
1191
     * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
1192
     */
1193
0
    trie->highStart=newTrie->highStart=highStart;
1194
1195
#ifdef UTRIE2_DEBUG
1196
    printf("UTrie2: highStart U+%04lx  highValue 0x%lx  initialValue 0x%lx\n",
1197
            (long)highStart, (long)highValue, (long)trie->initialValue);
1198
#endif
1199
1200
0
    if(highStart<0x110000) {
1201
        /* Blank out [highStart..10ffff] to release associated data blocks. */
1202
0
        suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
1203
0
        utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, TRUE, pErrorCode);
1204
0
        if(U_FAILURE(*pErrorCode)) {
1205
0
            return;
1206
0
        }
1207
0
    }
1208
1209
0
    compactData(newTrie);
1210
0
    if(highStart>0x10000) {
1211
0
        compactIndex2(newTrie);
1212
#ifdef UTRIE2_DEBUG
1213
    } else {
1214
        printf("UTrie2: highStart U+%04lx  count of 16-bit index-2 words %lu->%lu\n",
1215
                (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
1216
#endif
1217
0
    }
1218
1219
    /*
1220
     * Store the highValue in the data array and round up the dataLength.
1221
     * Must be done after compactData() because that assumes that dataLength
1222
     * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
1223
     */
1224
0
    newTrie->data[newTrie->dataLength++]=highValue;
1225
0
    while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
1226
0
        newTrie->data[newTrie->dataLength++]=trie->initialValue;
1227
0
    }
1228
1229
0
    newTrie->isCompacted=TRUE;
1230
0
}
1231
1232
/* serialization ------------------------------------------------------------ */
1233
1234
/**
1235
 * Maximum length of the runtime index array.
1236
 * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
1237
 * (The actual maximum length is lower,
1238
 * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
1239
 */
1240
0
#define UTRIE2_MAX_INDEX_LENGTH 0xffff
1241
1242
/**
1243
 * Maximum length of the runtime data array.
1244
 * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
1245
 * and by uint16_t UTrie2Header.shiftedDataLength.
1246
 */
1247
0
#define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
1248
1249
/* Compact and internally serialize the trie. */
1250
U_CAPI void U_EXPORT2
1251
0
utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
1252
0
    UNewTrie2 *newTrie;
1253
0
    UTrie2Header *header;
1254
0
    uint32_t *p;
1255
0
    uint16_t *dest16;
1256
0
    int32_t i, length;
1257
0
    int32_t allIndexesLength;
1258
0
    int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
1259
0
    UChar32 highStart;
1260
1261
    /* argument check */
1262
0
    if(U_FAILURE(*pErrorCode)) {
1263
0
        return;
1264
0
    }
1265
0
    if( trie==NULL ||
1266
0
        valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
1267
0
    ) {
1268
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1269
0
        return;
1270
0
    }
1271
0
    newTrie=trie->newTrie;
1272
0
    if(newTrie==NULL) {
1273
        /* already frozen */
1274
0
        UTrie2ValueBits frozenValueBits=
1275
0
            trie->data16!=NULL ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
1276
0
        if(valueBits!=frozenValueBits) {
1277
0
            *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1278
0
        }
1279
0
        return;
1280
0
    }
1281
1282
    /* compact if necessary */
1283
0
    if(!newTrie->isCompacted) {
1284
0
        compactTrie(trie, pErrorCode);
1285
0
        if(U_FAILURE(*pErrorCode)) {
1286
0
            return;
1287
0
        }
1288
0
    }
1289
0
    highStart=trie->highStart;
1290
1291
0
    if(highStart<=0x10000) {
1292
0
        allIndexesLength=UTRIE2_INDEX_1_OFFSET;
1293
0
    } else {
1294
0
        allIndexesLength=newTrie->index2Length;
1295
0
    }
1296
0
    if(valueBits==UTRIE2_16_VALUE_BITS) {
1297
0
        dataMove=allIndexesLength;
1298
0
    } else {
1299
0
        dataMove=0;
1300
0
    }
1301
1302
    /* are indexLength and dataLength within limits? */
1303
0
    if( /* for unshifted indexLength */
1304
0
        allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
1305
        /* for unshifted dataNullOffset */
1306
0
        (dataMove+newTrie->dataNullOffset)>0xffff ||
1307
        /* for unshifted 2-byte UTF-8 index-2 values */
1308
0
        (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
1309
        /* for shiftedDataLength */
1310
0
        (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
1311
0
    ) {
1312
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1313
0
        return;
1314
0
    }
1315
1316
    /* calculate the total serialized length */
1317
0
    length=sizeof(UTrie2Header)+allIndexesLength*2;
1318
0
    if(valueBits==UTRIE2_16_VALUE_BITS) {
1319
0
        length+=newTrie->dataLength*2;
1320
0
    } else {
1321
0
        length+=newTrie->dataLength*4;
1322
0
    }
1323
1324
0
    trie->memory=uprv_malloc(length);
1325
0
    if(trie->memory==NULL) {
1326
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1327
0
        return;
1328
0
    }
1329
0
    trie->length=length;
1330
0
    trie->isMemoryOwned=TRUE;
1331
1332
0
    trie->indexLength=allIndexesLength;
1333
0
    trie->dataLength=newTrie->dataLength;
1334
0
    if(highStart<=0x10000) {
1335
0
        trie->index2NullOffset=0xffff;
1336
0
    } else {
1337
0
        trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset;
1338
0
    }
1339
0
    trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
1340
0
    trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
1341
1342
    /* set the header fields */
1343
0
    header=(UTrie2Header *)trie->memory;
1344
1345
0
    header->signature=UTRIE2_SIG; /* "Tri2" */
1346
0
    header->options=(uint16_t)valueBits;
1347
1348
0
    header->indexLength=(uint16_t)trie->indexLength;
1349
0
    header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
1350
0
    header->index2NullOffset=trie->index2NullOffset;
1351
0
    header->dataNullOffset=trie->dataNullOffset;
1352
0
    header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
1353
1354
    /* fill the index and data arrays */
1355
0
    dest16=(uint16_t *)(header+1);
1356
0
    trie->index=dest16;
1357
1358
    /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
1359
0
    p=(uint32_t *)newTrie->index2;
1360
0
    for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
1361
0
        *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
1362
0
    }
1363
1364
    /* write UTF-8 2-byte index-2 values, not right-shifted */
1365
0
    for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
1366
0
        *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
1367
0
    }
1368
0
    for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
1369
0
        *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
1370
0
    }
1371
1372
0
    if(highStart>0x10000) {
1373
0
        int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
1374
0
        int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
1375
1376
        /* write 16-bit index-1 values for supplementary code points */
1377
0
        p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
1378
0
        for(i=index1Length; i>0; --i) {
1379
0
            *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
1380
0
        }
1381
1382
        /*
1383
         * write the index-2 array values for supplementary code points,
1384
         * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
1385
         */
1386
0
        p=(uint32_t *)newTrie->index2+index2Offset;
1387
0
        for(i=newTrie->index2Length-index2Offset; i>0; --i) {
1388
0
            *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
1389
0
        }
1390
0
    }
1391
1392
    /* write the 16/32-bit data array */
1393
0
    switch(valueBits) {
1394
0
    case UTRIE2_16_VALUE_BITS:
1395
        /* write 16-bit data values */
1396
0
        trie->data16=dest16;
1397
0
        trie->data32=NULL;
1398
0
        p=newTrie->data;
1399
0
        for(i=newTrie->dataLength; i>0; --i) {
1400
0
            *dest16++=(uint16_t)*p++;
1401
0
        }
1402
0
        break;
1403
0
    case UTRIE2_32_VALUE_BITS:
1404
        /* write 32-bit data values */
1405
0
        trie->data16=NULL;
1406
0
        trie->data32=(uint32_t *)dest16;
1407
0
        uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4);
1408
0
        break;
1409
0
    default:
1410
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1411
0
        return;
1412
0
    }
1413
1414
    /* Delete the UNewTrie2. */
1415
0
    uprv_free(newTrie->data);
1416
0
    uprv_free(newTrie);
1417
0
    trie->newTrie=NULL;
1418
0
}
1419
1420
/*
1421
 * This is here to avoid a dependency from utrie2.cpp on utrie.c.
1422
 * This file already depends on utrie.c.
1423
 * Otherwise, this should be in utrie2.cpp right after utrie2_swap().
1424
 */
1425
U_CAPI int32_t U_EXPORT2
1426
utrie2_swapAnyVersion(const UDataSwapper *ds,
1427
                      const void *inData, int32_t length, void *outData,
1428
0
                      UErrorCode *pErrorCode) {
1429
0
    if(U_SUCCESS(*pErrorCode)) {
1430
0
        switch(utrie2_getVersion(inData, length, TRUE)) {
1431
0
        case 1:
1432
0
            return utrie_swap(ds, inData, length, outData, pErrorCode);
1433
0
        case 2:
1434
0
            return utrie2_swap(ds, inData, length, outData, pErrorCode);
1435
0
        default:
1436
0
            *pErrorCode=U_INVALID_FORMAT_ERROR;
1437
0
            return 0;
1438
0
        }
1439
0
    }
1440
0
    return 0;
1441
0
}