Coverage Report

Created: 2023-06-07 07:17

/src/icu/source/common/utrie2.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 2001-2014, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*   file name:  utrie2.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2008aug16 (starting from a copy of utrie.c)
16
*   created by: Markus W. Scherer
17
*
18
*   This is a common implementation of a Unicode trie.
19
*   It is a kind of compressed, serializable table of 16- or 32-bit values associated with
20
*   Unicode code points (0..0x10ffff).
21
*   This is the second common version of a Unicode trie (hence the name UTrie2).
22
*   See utrie2.h for a comparison.
23
*
24
*   This file contains only the runtime and enumeration code, for read-only access.
25
*   See utrie2_builder.c for the builder code.
26
*/
27
#ifdef UTRIE2_DEBUG
28
#   include <stdio.h>
29
#endif
30
31
#include "unicode/utypes.h"
32
#include "unicode/utf.h"
33
#include "unicode/utf8.h"
34
#include "unicode/utf16.h"
35
#include "cmemory.h"
36
#include "utrie2.h"
37
#include "utrie2_impl.h"
38
#include "uassert.h"
39
40
/* Public UTrie2 API implementation ----------------------------------------- */
41
42
static uint32_t
43
0
get32(const UNewTrie2 *trie, UChar32 c, UBool fromLSCP) {
44
0
    int32_t i2, block;
45
46
0
    if(c>=trie->highStart && (!U_IS_LEAD(c) || fromLSCP)) {
47
0
        return trie->data[trie->dataLength-UTRIE2_DATA_GRANULARITY];
48
0
    }
49
50
0
    if(U_IS_LEAD(c) && fromLSCP) {
51
0
        i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
52
0
            (c>>UTRIE2_SHIFT_2);
53
0
    } else {
54
0
        i2=trie->index1[c>>UTRIE2_SHIFT_1]+
55
0
            ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
56
0
    }
57
0
    block=trie->index2[i2];
58
0
    return trie->data[block+(c&UTRIE2_DATA_MASK)];
59
0
}
60
61
U_CAPI uint32_t U_EXPORT2
62
0
utrie2_get32(const UTrie2 *trie, UChar32 c) {
63
0
    if(trie->data16!=NULL) {
64
0
        return UTRIE2_GET16(trie, c);
65
0
    } else if(trie->data32!=NULL) {
66
0
        return UTRIE2_GET32(trie, c);
67
0
    } else if((uint32_t)c>0x10ffff) {
68
0
        return trie->errorValue;
69
0
    } else {
70
0
        return get32(trie->newTrie, c, TRUE);
71
0
    }
72
0
}
73
74
U_CAPI uint32_t U_EXPORT2
75
0
utrie2_get32FromLeadSurrogateCodeUnit(const UTrie2 *trie, UChar32 c) {
76
0
    if(!U_IS_LEAD(c)) {
77
0
        return trie->errorValue;
78
0
    }
79
0
    if(trie->data16!=NULL) {
80
0
        return UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, c);
81
0
    } else if(trie->data32!=NULL) {
82
0
        return UTRIE2_GET32_FROM_U16_SINGLE_LEAD(trie, c);
83
0
    } else {
84
0
        return get32(trie->newTrie, c, FALSE);
85
0
    }
86
0
}
87
88
static inline int32_t
89
0
u8Index(const UTrie2 *trie, UChar32 c, int32_t i) {
90
0
    int32_t idx=
91
0
        _UTRIE2_INDEX_FROM_CP(
92
0
            trie,
93
0
            trie->data32==NULL ? trie->indexLength : 0,
94
0
            c);
95
0
    return (idx<<3)|i;
96
0
}
97
98
U_CAPI int32_t U_EXPORT2
99
utrie2_internalU8NextIndex(const UTrie2 *trie, UChar32 c,
100
0
                           const uint8_t *src, const uint8_t *limit) {
101
0
    int32_t i, length;
102
0
    i=0;
103
    /* support 64-bit pointers by avoiding cast of arbitrary difference */
104
0
    if((limit-src)<=7) {
105
0
        length=(int32_t)(limit-src);
106
0
    } else {
107
0
        length=7;
108
0
    }
109
0
    c=utf8_nextCharSafeBody(src, &i, length, c, -1);
110
0
    return u8Index(trie, c, i);
111
0
}
112
113
U_CAPI int32_t U_EXPORT2
114
utrie2_internalU8PrevIndex(const UTrie2 *trie, UChar32 c,
115
0
                           const uint8_t *start, const uint8_t *src) {
116
0
    int32_t i, length;
117
    /* support 64-bit pointers by avoiding cast of arbitrary difference */
118
0
    if((src-start)<=7) {
119
0
        i=length=(int32_t)(src-start);
120
0
    } else {
121
0
        i=length=7;
122
0
        start=src-7;
123
0
    }
124
0
    c=utf8_prevCharSafeBody(start, 0, &i, c, -1);
125
0
    i=length-i;  /* number of bytes read backward from src */
126
0
    return u8Index(trie, c, i);
127
0
}
128
129
U_CAPI UTrie2 * U_EXPORT2
130
utrie2_openFromSerialized(UTrie2ValueBits valueBits,
131
                          const void *data, int32_t length, int32_t *pActualLength,
132
0
                          UErrorCode *pErrorCode) {
133
0
    const UTrie2Header *header;
134
0
    const uint16_t *p16;
135
0
    int32_t actualLength;
136
137
0
    UTrie2 tempTrie;
138
0
    UTrie2 *trie;
139
140
0
    if(U_FAILURE(*pErrorCode)) {
141
0
        return 0;
142
0
    }
143
144
0
    if( length<=0 || (U_POINTER_MASK_LSB(data, 3)!=0) ||
145
0
        valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
146
0
    ) {
147
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
148
0
        return 0;
149
0
    }
150
151
    /* enough data for a trie header? */
152
0
    if(length<(int32_t)sizeof(UTrie2Header)) {
153
0
        *pErrorCode=U_INVALID_FORMAT_ERROR;
154
0
        return 0;
155
0
    }
156
157
    /* check the signature */
158
0
    header=(const UTrie2Header *)data;
159
0
    if(header->signature!=UTRIE2_SIG) {
160
0
        *pErrorCode=U_INVALID_FORMAT_ERROR;
161
0
        return 0;
162
0
    }
163
164
    /* get the options */
165
0
    if(valueBits!=(UTrie2ValueBits)(header->options&UTRIE2_OPTIONS_VALUE_BITS_MASK)) {
166
0
        *pErrorCode=U_INVALID_FORMAT_ERROR;
167
0
        return 0;
168
0
    }
169
170
    /* get the length values and offsets */
171
0
    uprv_memset(&tempTrie, 0, sizeof(tempTrie));
172
0
    tempTrie.indexLength=header->indexLength;
173
0
    tempTrie.dataLength=header->shiftedDataLength<<UTRIE2_INDEX_SHIFT;
174
0
    tempTrie.index2NullOffset=header->index2NullOffset;
175
0
    tempTrie.dataNullOffset=header->dataNullOffset;
176
177
0
    tempTrie.highStart=header->shiftedHighStart<<UTRIE2_SHIFT_1;
178
0
    tempTrie.highValueIndex=tempTrie.dataLength-UTRIE2_DATA_GRANULARITY;
179
0
    if(valueBits==UTRIE2_16_VALUE_BITS) {
180
0
        tempTrie.highValueIndex+=tempTrie.indexLength;
181
0
    }
182
183
    /* calculate the actual length */
184
0
    actualLength=(int32_t)sizeof(UTrie2Header)+tempTrie.indexLength*2;
185
0
    if(valueBits==UTRIE2_16_VALUE_BITS) {
186
0
        actualLength+=tempTrie.dataLength*2;
187
0
    } else {
188
0
        actualLength+=tempTrie.dataLength*4;
189
0
    }
190
0
    if(length<actualLength) {
191
0
        *pErrorCode=U_INVALID_FORMAT_ERROR;  /* not enough bytes */
192
0
        return 0;
193
0
    }
194
195
    /* allocate the trie */
196
0
    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
197
0
    if(trie==NULL) {
198
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
199
0
        return 0;
200
0
    }
201
0
    uprv_memcpy(trie, &tempTrie, sizeof(tempTrie));
202
0
    trie->memory=(uint32_t *)data;
203
0
    trie->length=actualLength;
204
0
    trie->isMemoryOwned=FALSE;
205
206
    /* set the pointers to its index and data arrays */
207
0
    p16=(const uint16_t *)(header+1);
208
0
    trie->index=p16;
209
0
    p16+=trie->indexLength;
210
211
    /* get the data */
212
0
    switch(valueBits) {
213
0
    case UTRIE2_16_VALUE_BITS:
214
0
        trie->data16=p16;
215
0
        trie->data32=NULL;
216
0
        trie->initialValue=trie->index[trie->dataNullOffset];
217
0
        trie->errorValue=trie->data16[UTRIE2_BAD_UTF8_DATA_OFFSET];
218
0
        break;
219
0
    case UTRIE2_32_VALUE_BITS:
220
0
        trie->data16=NULL;
221
0
        trie->data32=(const uint32_t *)p16;
222
0
        trie->initialValue=trie->data32[trie->dataNullOffset];
223
0
        trie->errorValue=trie->data32[UTRIE2_BAD_UTF8_DATA_OFFSET];
224
0
        break;
225
0
    default:
226
0
        *pErrorCode=U_INVALID_FORMAT_ERROR;
227
0
        return 0;
228
0
    }
229
230
0
    if(pActualLength!=NULL) {
231
0
        *pActualLength=actualLength;
232
0
    }
233
0
    return trie;
234
0
}
235
236
U_CAPI UTrie2 * U_EXPORT2
237
utrie2_openDummy(UTrie2ValueBits valueBits,
238
                 uint32_t initialValue, uint32_t errorValue,
239
0
                 UErrorCode *pErrorCode) {
240
0
    UTrie2 *trie;
241
0
    UTrie2Header *header;
242
0
    uint32_t *p;
243
0
    uint16_t *dest16;
244
0
    int32_t indexLength, dataLength, length, i;
245
0
    int32_t dataMove;  /* >0 if the data is moved to the end of the index array */
246
247
0
    if(U_FAILURE(*pErrorCode)) {
248
0
        return 0;
249
0
    }
250
251
0
    if(valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits) {
252
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
253
0
        return 0;
254
0
    }
255
256
    /* calculate the total length of the dummy trie data */
257
0
    indexLength=UTRIE2_INDEX_1_OFFSET;
258
0
    dataLength=UTRIE2_DATA_START_OFFSET+UTRIE2_DATA_GRANULARITY;
259
0
    length=(int32_t)sizeof(UTrie2Header)+indexLength*2;
260
0
    if(valueBits==UTRIE2_16_VALUE_BITS) {
261
0
        length+=dataLength*2;
262
0
    } else {
263
0
        length+=dataLength*4;
264
0
    }
265
266
    /* allocate the trie */
267
0
    trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
268
0
    if(trie==NULL) {
269
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
270
0
        return 0;
271
0
    }
272
0
    uprv_memset(trie, 0, sizeof(UTrie2));
273
0
    trie->memory=uprv_malloc(length);
274
0
    if(trie->memory==NULL) {
275
0
        uprv_free(trie);
276
0
        *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
277
0
        return 0;
278
0
    }
279
0
    trie->length=length;
280
0
    trie->isMemoryOwned=TRUE;
281
282
    /* set the UTrie2 fields */
283
0
    if(valueBits==UTRIE2_16_VALUE_BITS) {
284
0
        dataMove=indexLength;
285
0
    } else {
286
0
        dataMove=0;
287
0
    }
288
289
0
    trie->indexLength=indexLength;
290
0
    trie->dataLength=dataLength;
291
0
    trie->index2NullOffset=UTRIE2_INDEX_2_OFFSET;
292
0
    trie->dataNullOffset=(uint16_t)dataMove;
293
0
    trie->initialValue=initialValue;
294
0
    trie->errorValue=errorValue;
295
0
    trie->highStart=0;
296
0
    trie->highValueIndex=dataMove+UTRIE2_DATA_START_OFFSET;
297
298
    /* set the header fields */
299
0
    header=(UTrie2Header *)trie->memory;
300
301
0
    header->signature=UTRIE2_SIG; /* "Tri2" */
302
0
    header->options=(uint16_t)valueBits;
303
304
0
    header->indexLength=(uint16_t)indexLength;
305
0
    header->shiftedDataLength=(uint16_t)(dataLength>>UTRIE2_INDEX_SHIFT);
306
0
    header->index2NullOffset=(uint16_t)UTRIE2_INDEX_2_OFFSET;
307
0
    header->dataNullOffset=(uint16_t)dataMove;
308
0
    header->shiftedHighStart=0;
309
310
    /* fill the index and data arrays */
311
0
    dest16=(uint16_t *)(header+1);
312
0
    trie->index=dest16;
313
314
    /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT */
315
0
    for(i=0; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
316
0
        *dest16++=(uint16_t)(dataMove>>UTRIE2_INDEX_SHIFT);  /* null data block */
317
0
    }
318
319
    /* write UTF-8 2-byte index-2 values, not right-shifted */
320
0
    for(i=0; i<(0xc2-0xc0); ++i) {                                  /* C0..C1 */
321
0
        *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
322
0
    }
323
0
    for(; i<(0xe0-0xc0); ++i) {                                     /* C2..DF */
324
0
        *dest16++=(uint16_t)dataMove;
325
0
    }
326
327
    /* write the 16/32-bit data array */
328
0
    switch(valueBits) {
329
0
    case UTRIE2_16_VALUE_BITS:
330
        /* write 16-bit data values */
331
0
        trie->data16=dest16;
332
0
        trie->data32=NULL;
333
0
        for(i=0; i<0x80; ++i) {
334
0
            *dest16++=(uint16_t)initialValue;
335
0
        }
336
0
        for(; i<0xc0; ++i) {
337
0
            *dest16++=(uint16_t)errorValue;
338
0
        }
339
        /* highValue and reserved values */
340
0
        for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
341
0
            *dest16++=(uint16_t)initialValue;
342
0
        }
343
0
        break;
344
0
    case UTRIE2_32_VALUE_BITS:
345
        /* write 32-bit data values */
346
0
        p=(uint32_t *)dest16;
347
0
        trie->data16=NULL;
348
0
        trie->data32=p;
349
0
        for(i=0; i<0x80; ++i) {
350
0
            *p++=initialValue;
351
0
        }
352
0
        for(; i<0xc0; ++i) {
353
0
            *p++=errorValue;
354
0
        }
355
        /* highValue and reserved values */
356
0
        for(i=0; i<UTRIE2_DATA_GRANULARITY; ++i) {
357
0
            *p++=initialValue;
358
0
        }
359
0
        break;
360
0
    default:
361
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
362
0
        return 0;
363
0
    }
364
365
0
    return trie;
366
0
}
367
368
U_CAPI void U_EXPORT2
369
0
utrie2_close(UTrie2 *trie) {
370
0
    if(trie!=NULL) {
371
0
        if(trie->isMemoryOwned) {
372
0
            uprv_free(trie->memory);
373
0
        }
374
0
        if(trie->newTrie!=NULL) {
375
0
            uprv_free(trie->newTrie->data);
376
0
            uprv_free(trie->newTrie);
377
0
        }
378
0
        uprv_free(trie);
379
0
    }
380
0
}
381
382
U_CAPI int32_t U_EXPORT2
383
0
utrie2_getVersion(const void *data, int32_t length, UBool anyEndianOk) {
384
0
    uint32_t signature;
385
0
    if(length<16 || data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)) {
386
0
        return 0;
387
0
    }
388
0
    signature=*(const uint32_t *)data;
389
0
    if(signature==UTRIE2_SIG) {
390
0
        return 2;
391
0
    }
392
0
    if(anyEndianOk && signature==UTRIE2_OE_SIG) {
393
0
        return 2;
394
0
    }
395
0
    if(signature==UTRIE_SIG) {
396
0
        return 1;
397
0
    }
398
0
    if(anyEndianOk && signature==UTRIE_OE_SIG) {
399
0
        return 1;
400
0
    }
401
0
    return 0;
402
0
}
403
404
U_CAPI UBool U_EXPORT2
405
0
utrie2_isFrozen(const UTrie2 *trie) {
406
0
    return (UBool)(trie->newTrie==NULL);
407
0
}
408
409
U_CAPI int32_t U_EXPORT2
410
utrie2_serialize(const UTrie2 *trie,
411
                 void *data, int32_t capacity,
412
0
                 UErrorCode *pErrorCode) {
413
    /* argument check */
414
0
    if(U_FAILURE(*pErrorCode)) {
415
0
        return 0;
416
0
    }
417
418
0
    if( trie==NULL || trie->memory==NULL || trie->newTrie!=NULL ||
419
0
        capacity<0 || (capacity>0 && (data==NULL || (U_POINTER_MASK_LSB(data, 3)!=0)))
420
0
    ) {
421
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
422
0
        return 0;
423
0
    }
424
425
0
    if(capacity>=trie->length) {
426
0
        uprv_memcpy(data, trie->memory, trie->length);
427
0
    } else {
428
0
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
429
0
    }
430
0
    return trie->length;
431
0
}
432
433
U_CAPI int32_t U_EXPORT2
434
utrie2_swap(const UDataSwapper *ds,
435
            const void *inData, int32_t length, void *outData,
436
0
            UErrorCode *pErrorCode) {
437
0
    const UTrie2Header *inTrie;
438
0
    UTrie2Header trie;
439
0
    int32_t dataLength, size;
440
0
    UTrie2ValueBits valueBits;
441
442
0
    if(U_FAILURE(*pErrorCode)) {
443
0
        return 0;
444
0
    }
445
0
    if(ds==NULL || inData==NULL || (length>=0 && outData==NULL)) {
446
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
447
0
        return 0;
448
0
    }
449
450
    /* setup and swapping */
451
0
    if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
452
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
453
0
        return 0;
454
0
    }
455
456
0
    inTrie=(const UTrie2Header *)inData;
457
0
    trie.signature=ds->readUInt32(inTrie->signature);
458
0
    trie.options=ds->readUInt16(inTrie->options);
459
0
    trie.indexLength=ds->readUInt16(inTrie->indexLength);
460
0
    trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
461
462
0
    valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
463
0
    dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
464
465
0
    if( trie.signature!=UTRIE2_SIG ||
466
0
        valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
467
0
        trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
468
0
        dataLength<UTRIE2_DATA_START_OFFSET
469
0
    ) {
470
0
        *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
471
0
        return 0;
472
0
    }
473
474
0
    size=sizeof(UTrie2Header)+trie.indexLength*2;
475
0
    switch(valueBits) {
476
0
    case UTRIE2_16_VALUE_BITS:
477
0
        size+=dataLength*2;
478
0
        break;
479
0
    case UTRIE2_32_VALUE_BITS:
480
0
        size+=dataLength*4;
481
0
        break;
482
0
    default:
483
0
        *pErrorCode=U_INVALID_FORMAT_ERROR;
484
0
        return 0;
485
0
    }
486
487
0
    if(length>=0) {
488
0
        UTrie2Header *outTrie;
489
490
0
        if(length<size) {
491
0
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
492
0
            return 0;
493
0
        }
494
495
0
        outTrie=(UTrie2Header *)outData;
496
497
        /* swap the header */
498
0
        ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
499
0
        ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
500
501
        /* swap the index and the data */
502
0
        switch(valueBits) {
503
0
        case UTRIE2_16_VALUE_BITS:
504
0
            ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
505
0
            break;
506
0
        case UTRIE2_32_VALUE_BITS:
507
0
            ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
508
0
            ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
509
0
                                     (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
510
0
            break;
511
0
        default:
512
0
            *pErrorCode=U_INVALID_FORMAT_ERROR;
513
0
            return 0;
514
0
        }
515
0
    }
516
517
0
    return size;
518
0
}
519
520
// utrie2_swapAnyVersion() should be defined here but lives in utrie2_builder.c
521
// to avoid a dependency from utrie2.cpp on utrie.c.
522
523
/* enumeration -------------------------------------------------------------- */
524
525
0
#define MIN_VALUE(a, b) ((a)<(b) ? (a) : (b))
526
527
/* default UTrie2EnumValue() returns the input value itself */
528
static uint32_t U_CALLCONV
529
0
enumSameValue(const void * /*context*/, uint32_t value) {
530
0
    return value;
531
0
}
532
533
/**
534
 * Enumerate all ranges of code points with the same relevant values.
535
 * The values are transformed from the raw trie entries by the enumValue function.
536
 *
537
 * Currently requires start<limit and both start and limit must be multiples
538
 * of UTRIE2_DATA_BLOCK_LENGTH.
539
 *
540
 * Optimizations:
541
 * - Skip a whole block if we know that it is filled with a single value,
542
 *   and it is the same as we visited just before.
543
 * - Handle the null block specially because we know a priori that it is filled
544
 *   with a single value.
545
 */
546
static void
547
enumEitherTrie(const UTrie2 *trie,
548
               UChar32 start, UChar32 limit,
549
0
               UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
550
0
    const uint32_t *data32;
551
0
    const uint16_t *idx;
552
553
0
    uint32_t value, prevValue, initialValue;
554
0
    UChar32 c, prev, highStart;
555
0
    int32_t j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
556
557
0
    if(enumRange==NULL) {
558
0
        return;
559
0
    }
560
0
    if(enumValue==NULL) {
561
0
        enumValue=enumSameValue;
562
0
    }
563
564
0
    if(trie->newTrie==NULL) {
565
        /* frozen trie */
566
0
        idx=trie->index;
567
0
        U_ASSERT(idx!=NULL); /* the following code assumes trie->newTrie is not NULL when idx is NULL */
568
0
        data32=trie->data32;
569
570
0
        index2NullOffset=trie->index2NullOffset;
571
0
        nullBlock=trie->dataNullOffset;
572
0
    } else {
573
        /* unfrozen, mutable trie */
574
0
        idx=NULL;
575
0
        data32=trie->newTrie->data;
576
0
        U_ASSERT(data32!=NULL); /* the following code assumes idx is not NULL when data32 is NULL */
577
578
0
        index2NullOffset=trie->newTrie->index2NullOffset;
579
0
        nullBlock=trie->newTrie->dataNullOffset;
580
0
    }
581
582
0
    highStart=trie->highStart;
583
584
    /* get the enumeration value that corresponds to an initial-value trie data entry */
585
0
    initialValue=enumValue(context, trie->initialValue);
586
587
    /* set variables for previous range */
588
0
    prevI2Block=-1;
589
0
    prevBlock=-1;
590
0
    prev=start;
591
0
    prevValue=0;
592
593
    /* enumerate index-2 blocks */
594
0
    for(c=start; c<limit && c<highStart;) {
595
        /* Code point limit for iterating inside this i2Block. */
596
0
        UChar32 tempLimit=c+UTRIE2_CP_PER_INDEX_1_ENTRY;
597
0
        if(limit<tempLimit) {
598
0
            tempLimit=limit;
599
0
        }
600
0
        if(c<=0xffff) {
601
0
            if(!U_IS_SURROGATE(c)) {
602
0
                i2Block=c>>UTRIE2_SHIFT_2;
603
0
            } else if(U_IS_SURROGATE_LEAD(c)) {
604
                /*
605
                 * Enumerate values for lead surrogate code points, not code units:
606
                 * This special block has half the normal length.
607
                 */
608
0
                i2Block=UTRIE2_LSCP_INDEX_2_OFFSET;
609
0
                tempLimit=MIN_VALUE(0xdc00, limit);
610
0
            } else {
611
                /*
612
                 * Switch back to the normal part of the index-2 table.
613
                 * Enumerate the second half of the surrogates block.
614
                 */
615
0
                i2Block=0xd800>>UTRIE2_SHIFT_2;
616
0
                tempLimit=MIN_VALUE(0xe000, limit);
617
0
            }
618
0
        } else {
619
            /* supplementary code points */
620
0
            if(idx!=NULL) {
621
0
                i2Block=idx[(UTRIE2_INDEX_1_OFFSET-UTRIE2_OMITTED_BMP_INDEX_1_LENGTH)+
622
0
                              (c>>UTRIE2_SHIFT_1)];
623
0
            } else {
624
0
                i2Block=trie->newTrie->index1[c>>UTRIE2_SHIFT_1];
625
0
            }
626
0
            if(i2Block==prevI2Block && (c-prev)>=UTRIE2_CP_PER_INDEX_1_ENTRY) {
627
                /*
628
                 * The index-2 block is the same as the previous one, and filled with prevValue.
629
                 * Only possible for supplementary code points because the linear-BMP index-2
630
                 * table creates unique i2Block values.
631
                 */
632
0
                c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
633
0
                continue;
634
0
            }
635
0
        }
636
0
        prevI2Block=i2Block;
637
0
        if(i2Block==index2NullOffset) {
638
            /* this is the null index-2 block */
639
0
            if(prevValue!=initialValue) {
640
0
                if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
641
0
                    return;
642
0
                }
643
0
                prevBlock=nullBlock;
644
0
                prev=c;
645
0
                prevValue=initialValue;
646
0
            }
647
0
            c+=UTRIE2_CP_PER_INDEX_1_ENTRY;
648
0
        } else {
649
            /* enumerate data blocks for one index-2 block */
650
0
            int32_t i2, i2Limit;
651
0
            i2=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
652
0
            if((c>>UTRIE2_SHIFT_1)==(tempLimit>>UTRIE2_SHIFT_1)) {
653
0
                i2Limit=(tempLimit>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
654
0
            } else {
655
0
                i2Limit=UTRIE2_INDEX_2_BLOCK_LENGTH;
656
0
            }
657
0
            for(; i2<i2Limit; ++i2) {
658
0
                if(idx!=NULL) {
659
0
                    block=(int32_t)idx[i2Block+i2]<<UTRIE2_INDEX_SHIFT;
660
0
                } else {
661
0
                    block=trie->newTrie->index2[i2Block+i2];
662
0
                }
663
0
                if(block==prevBlock && (c-prev)>=UTRIE2_DATA_BLOCK_LENGTH) {
664
                    /* the block is the same as the previous one, and filled with prevValue */
665
0
                    c+=UTRIE2_DATA_BLOCK_LENGTH;
666
0
                    continue;
667
0
                }
668
0
                prevBlock=block;
669
0
                if(block==nullBlock) {
670
                    /* this is the null data block */
671
0
                    if(prevValue!=initialValue) {
672
0
                        if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
673
0
                            return;
674
0
                        }
675
0
                        prev=c;
676
0
                        prevValue=initialValue;
677
0
                    }
678
0
                    c+=UTRIE2_DATA_BLOCK_LENGTH;
679
0
                } else {
680
0
                    for(j=0; j<UTRIE2_DATA_BLOCK_LENGTH; ++j) {
681
0
                        value=enumValue(context, data32!=NULL ? data32[block+j] : idx[block+j]);
682
0
                        if(value!=prevValue) {
683
0
                            if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
684
0
                                return;
685
0
                            }
686
0
                            prev=c;
687
0
                            prevValue=value;
688
0
                        }
689
0
                        ++c;
690
0
                    }
691
0
                }
692
0
            }
693
0
        }
694
0
    }
695
696
0
    if(c>limit) {
697
0
        c=limit;  /* could be higher if in the index2NullOffset */
698
0
    } else if(c<limit) {
699
        /* c==highStart<limit */
700
0
        uint32_t highValue;
701
0
        if(idx!=NULL) {
702
0
            highValue=
703
0
                data32!=NULL ?
704
0
                    data32[trie->highValueIndex] :
705
0
                    idx[trie->highValueIndex];
706
0
        } else {
707
0
            highValue=trie->newTrie->data[trie->newTrie->dataLength-UTRIE2_DATA_GRANULARITY];
708
0
        }
709
0
        value=enumValue(context, highValue);
710
0
        if(value!=prevValue) {
711
0
            if(prev<c && !enumRange(context, prev, c-1, prevValue)) {
712
0
                return;
713
0
            }
714
0
            prev=c;
715
0
            prevValue=value;
716
0
        }
717
0
        c=limit;
718
0
    }
719
720
    /* deliver last range */
721
0
    enumRange(context, prev, c-1, prevValue);
722
0
}
723
724
U_CAPI void U_EXPORT2
725
utrie2_enum(const UTrie2 *trie,
726
0
            UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange, const void *context) {
727
0
    enumEitherTrie(trie, 0, 0x110000, enumValue, enumRange, context);
728
0
}
729
730
U_CAPI void U_EXPORT2
731
utrie2_enumForLeadSurrogate(const UTrie2 *trie, UChar32 lead,
732
                            UTrie2EnumValue *enumValue, UTrie2EnumRange *enumRange,
733
0
                            const void *context) {
734
0
    if(!U16_IS_LEAD(lead)) {
735
0
        return;
736
0
    }
737
0
    lead=(lead-0xd7c0)<<10;   /* start code point */
738
0
    enumEitherTrie(trie, lead, lead+0x400, enumValue, enumRange, context);
739
0
}
740
741
/* C++ convenience wrappers ------------------------------------------------- */
742
743
U_NAMESPACE_BEGIN
744
745
0
uint16_t BackwardUTrie2StringIterator::previous16() {
746
0
    codePointLimit=codePointStart;
747
0
    if(start>=codePointStart) {
748
0
        codePoint=U_SENTINEL;
749
0
        return 0;
750
0
    }
751
0
    uint16_t result;
752
0
    UTRIE2_U16_PREV16(trie, start, codePointStart, codePoint, result);
753
0
    return result;
754
0
}
755
756
0
uint16_t ForwardUTrie2StringIterator::next16() {
757
0
    codePointStart=codePointLimit;
758
0
    if(codePointLimit==limit) {
759
0
        codePoint=U_SENTINEL;
760
0
        return 0;
761
0
    }
762
0
    uint16_t result;
763
0
    UTRIE2_U16_NEXT16(trie, codePointLimit, limit, codePoint, result);
764
0
    return result;
765
0
}
766
767
U_NAMESPACE_END