Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/collationsets.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2013-2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* collationsets.cpp
9
*
10
* created on: 2013feb09
11
* created by: Markus W. Scherer
12
*/
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_COLLATION
17
18
#include "unicode/ucharstrie.h"
19
#include "unicode/uniset.h"
20
#include "unicode/unistr.h"
21
#include "unicode/ustringtrie.h"
22
#include "collation.h"
23
#include "collationdata.h"
24
#include "collationsets.h"
25
#include "normalizer2impl.h"
26
#include "uassert.h"
27
#include "utf16collationiterator.h"
28
#include "utrie2.h"
29
30
U_NAMESPACE_BEGIN
31
32
U_CDECL_BEGIN
33
34
static UBool U_CALLCONV
35
0
enumTailoredRange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
36
0
    if(ce32 == Collation::FALLBACK_CE32) {
37
0
        return TRUE;  // fallback to base, not tailored
38
0
    }
39
0
    TailoredSet *ts = (TailoredSet *)context;
40
0
    return ts->handleCE32(start, end, ce32);
41
0
}
42
43
U_CDECL_END
44
45
void
46
0
TailoredSet::forData(const CollationData *d, UErrorCode &ec) {
47
0
    if(U_FAILURE(ec)) { return; }
48
0
    errorCode = ec;  // Preserve info & warning codes.
49
0
    data = d;
50
0
    baseData = d->base;
51
0
    U_ASSERT(baseData != NULL);
52
0
    utrie2_enum(data->trie, NULL, enumTailoredRange, this);
53
0
    ec = errorCode;
54
0
}
55
56
UBool
57
0
TailoredSet::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
58
0
    U_ASSERT(ce32 != Collation::FALLBACK_CE32);
59
0
    if(Collation::isSpecialCE32(ce32)) {
60
0
        ce32 = data->getIndirectCE32(ce32);
61
0
        if(ce32 == Collation::FALLBACK_CE32) {
62
0
            return U_SUCCESS(errorCode);
63
0
        }
64
0
    }
65
0
    do {
66
0
        uint32_t baseCE32 = baseData->getFinalCE32(baseData->getCE32(start));
67
        // Do not just continue if ce32 == baseCE32 because
68
        // contractions and expansions in different data objects
69
        // normally differ even if they have the same data offsets.
70
0
        if(Collation::isSelfContainedCE32(ce32) && Collation::isSelfContainedCE32(baseCE32)) {
71
            // fastpath
72
0
            if(ce32 != baseCE32) {
73
0
                tailored->add(start);
74
0
            }
75
0
        } else {
76
0
            compare(start, ce32, baseCE32);
77
0
        }
78
0
    } while(++start <= end);
79
0
    return U_SUCCESS(errorCode);
80
0
}
81
82
void
83
0
TailoredSet::compare(UChar32 c, uint32_t ce32, uint32_t baseCE32) {
84
0
    if(Collation::isPrefixCE32(ce32)) {
85
0
        const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
86
0
        ce32 = data->getFinalCE32(CollationData::readCE32(p));
87
0
        if(Collation::isPrefixCE32(baseCE32)) {
88
0
            const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
89
0
            baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
90
0
            comparePrefixes(c, p + 2, q + 2);
91
0
        } else {
92
0
            addPrefixes(data, c, p + 2);
93
0
        }
94
0
    } else if(Collation::isPrefixCE32(baseCE32)) {
95
0
        const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
96
0
        baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
97
0
        addPrefixes(baseData, c, q + 2);
98
0
    }
99
100
0
    if(Collation::isContractionCE32(ce32)) {
101
0
        const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
102
0
        if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
103
0
            ce32 = Collation::NO_CE32;
104
0
        } else {
105
0
            ce32 = data->getFinalCE32(CollationData::readCE32(p));
106
0
        }
107
0
        if(Collation::isContractionCE32(baseCE32)) {
108
0
            const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
109
0
            if((baseCE32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
110
0
                baseCE32 = Collation::NO_CE32;
111
0
            } else {
112
0
                baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
113
0
            }
114
0
            compareContractions(c, p + 2, q + 2);
115
0
        } else {
116
0
            addContractions(c, p + 2);
117
0
        }
118
0
    } else if(Collation::isContractionCE32(baseCE32)) {
119
0
        const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
120
0
        baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
121
0
        addContractions(c, q + 2);
122
0
    }
123
124
0
    int32_t tag;
125
0
    if(Collation::isSpecialCE32(ce32)) {
126
0
        tag = Collation::tagFromCE32(ce32);
127
0
        U_ASSERT(tag != Collation::PREFIX_TAG);
128
0
        U_ASSERT(tag != Collation::CONTRACTION_TAG);
129
        // Currently, the tailoring data builder does not write offset tags.
130
        // They might be useful for saving space,
131
        // but they would complicate the builder,
132
        // and in tailorings we assume that performance of tailored characters is more important.
133
0
        U_ASSERT(tag != Collation::OFFSET_TAG);
134
0
    } else {
135
0
        tag = -1;
136
0
    }
137
0
    int32_t baseTag;
138
0
    if(Collation::isSpecialCE32(baseCE32)) {
139
0
        baseTag = Collation::tagFromCE32(baseCE32);
140
0
        U_ASSERT(baseTag != Collation::PREFIX_TAG);
141
0
        U_ASSERT(baseTag != Collation::CONTRACTION_TAG);
142
0
    } else {
143
0
        baseTag = -1;
144
0
    }
145
146
    // Non-contextual mappings, expansions, etc.
147
0
    if(baseTag == Collation::OFFSET_TAG) {
148
        // We might be comparing a tailoring CE which is a copy of
149
        // a base offset-tag CE, via the [optimize [set]] syntax
150
        // or when a single-character mapping was copied for tailored contractions.
151
        // Offset tags always result in long-primary CEs,
152
        // with common secondary/tertiary weights.
153
0
        if(!Collation::isLongPrimaryCE32(ce32)) {
154
0
            add(c);
155
0
            return;
156
0
        }
157
0
        int64_t dataCE = baseData->ces[Collation::indexFromCE32(baseCE32)];
158
0
        uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE);
159
0
        if(Collation::primaryFromLongPrimaryCE32(ce32) != p) {
160
0
            add(c);
161
0
            return;
162
0
        }
163
0
    }
164
165
0
    if(tag != baseTag) {
166
0
        add(c);
167
0
        return;
168
0
    }
169
170
0
    if(tag == Collation::EXPANSION32_TAG) {
171
0
        const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
172
0
        int32_t length = Collation::lengthFromCE32(ce32);
173
174
0
        const uint32_t *baseCE32s = baseData->ce32s + Collation::indexFromCE32(baseCE32);
175
0
        int32_t baseLength = Collation::lengthFromCE32(baseCE32);
176
177
0
        if(length != baseLength) {
178
0
            add(c);
179
0
            return;
180
0
        }
181
0
        for(int32_t i = 0; i < length; ++i) {
182
0
            if(ce32s[i] != baseCE32s[i]) {
183
0
                add(c);
184
0
                break;
185
0
            }
186
0
        }
187
0
    } else if(tag == Collation::EXPANSION_TAG) {
188
0
        const int64_t *ces = data->ces + Collation::indexFromCE32(ce32);
189
0
        int32_t length = Collation::lengthFromCE32(ce32);
190
191
0
        const int64_t *baseCEs = baseData->ces + Collation::indexFromCE32(baseCE32);
192
0
        int32_t baseLength = Collation::lengthFromCE32(baseCE32);
193
194
0
        if(length != baseLength) {
195
0
            add(c);
196
0
            return;
197
0
        }
198
0
        for(int32_t i = 0; i < length; ++i) {
199
0
            if(ces[i] != baseCEs[i]) {
200
0
                add(c);
201
0
                break;
202
0
            }
203
0
        }
204
0
    } else if(tag == Collation::HANGUL_TAG) {
205
0
        UChar jamos[3];
206
0
        int32_t length = Hangul::decompose(c, jamos);
207
0
        if(tailored->contains(jamos[0]) || tailored->contains(jamos[1]) ||
208
0
                (length == 3 && tailored->contains(jamos[2]))) {
209
0
            add(c);
210
0
        }
211
0
    } else if(ce32 != baseCE32) {
212
0
        add(c);
213
0
    }
214
0
}
215
216
void
217
0
TailoredSet::comparePrefixes(UChar32 c, const UChar *p, const UChar *q) {
218
    // Parallel iteration over prefixes of both tables.
219
0
    UCharsTrie::Iterator prefixes(p, 0, errorCode);
220
0
    UCharsTrie::Iterator basePrefixes(q, 0, errorCode);
221
0
    const UnicodeString *tp = NULL;  // Tailoring prefix.
222
0
    const UnicodeString *bp = NULL;  // Base prefix.
223
    // Use a string with a U+FFFF as the limit sentinel.
224
    // U+FFFF is untailorable and will not occur in prefixes.
225
0
    UnicodeString none((UChar)0xffff);
226
0
    for(;;) {
227
0
        if(tp == NULL) {
228
0
            if(prefixes.next(errorCode)) {
229
0
                tp = &prefixes.getString();
230
0
            } else {
231
0
                tp = &none;
232
0
            }
233
0
        }
234
0
        if(bp == NULL) {
235
0
            if(basePrefixes.next(errorCode)) {
236
0
                bp = &basePrefixes.getString();
237
0
            } else {
238
0
                bp = &none;
239
0
            }
240
0
        }
241
0
        if(tp == &none && bp == &none) { break; }
242
0
        int32_t cmp = tp->compare(*bp);
243
0
        if(cmp < 0) {
244
            // tp occurs in the tailoring but not in the base.
245
0
            addPrefix(data, *tp, c, (uint32_t)prefixes.getValue());
246
0
            tp = NULL;
247
0
        } else if(cmp > 0) {
248
            // bp occurs in the base but not in the tailoring.
249
0
            addPrefix(baseData, *bp, c, (uint32_t)basePrefixes.getValue());
250
0
            bp = NULL;
251
0
        } else {
252
0
            setPrefix(*tp);
253
0
            compare(c, (uint32_t)prefixes.getValue(), (uint32_t)basePrefixes.getValue());
254
0
            resetPrefix();
255
0
            tp = NULL;
256
0
            bp = NULL;
257
0
        }
258
0
    }
259
0
}
260
261
void
262
0
TailoredSet::compareContractions(UChar32 c, const UChar *p, const UChar *q) {
263
    // Parallel iteration over suffixes of both tables.
264
0
    UCharsTrie::Iterator suffixes(p, 0, errorCode);
265
0
    UCharsTrie::Iterator baseSuffixes(q, 0, errorCode);
266
0
    const UnicodeString *ts = NULL;  // Tailoring suffix.
267
0
    const UnicodeString *bs = NULL;  // Base suffix.
268
    // Use a string with two U+FFFF as the limit sentinel.
269
    // U+FFFF is untailorable and will not occur in contractions except maybe
270
    // as a single suffix character for a root-collator boundary contraction.
271
0
    UnicodeString none((UChar)0xffff);
272
0
    none.append((UChar)0xffff);
273
0
    for(;;) {
274
0
        if(ts == NULL) {
275
0
            if(suffixes.next(errorCode)) {
276
0
                ts = &suffixes.getString();
277
0
            } else {
278
0
                ts = &none;
279
0
            }
280
0
        }
281
0
        if(bs == NULL) {
282
0
            if(baseSuffixes.next(errorCode)) {
283
0
                bs = &baseSuffixes.getString();
284
0
            } else {
285
0
                bs = &none;
286
0
            }
287
0
        }
288
0
        if(ts == &none && bs == &none) { break; }
289
0
        int32_t cmp = ts->compare(*bs);
290
0
        if(cmp < 0) {
291
            // ts occurs in the tailoring but not in the base.
292
0
            addSuffix(c, *ts);
293
0
            ts = NULL;
294
0
        } else if(cmp > 0) {
295
            // bs occurs in the base but not in the tailoring.
296
0
            addSuffix(c, *bs);
297
0
            bs = NULL;
298
0
        } else {
299
0
            suffix = ts;
300
0
            compare(c, (uint32_t)suffixes.getValue(), (uint32_t)baseSuffixes.getValue());
301
0
            suffix = NULL;
302
0
            ts = NULL;
303
0
            bs = NULL;
304
0
        }
305
0
    }
306
0
}
307
308
void
309
0
TailoredSet::addPrefixes(const CollationData *d, UChar32 c, const UChar *p) {
310
0
    UCharsTrie::Iterator prefixes(p, 0, errorCode);
311
0
    while(prefixes.next(errorCode)) {
312
0
        addPrefix(d, prefixes.getString(), c, (uint32_t)prefixes.getValue());
313
0
    }
314
0
}
315
316
void
317
0
TailoredSet::addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32) {
318
0
    setPrefix(pfx);
319
0
    ce32 = d->getFinalCE32(ce32);
320
0
    if(Collation::isContractionCE32(ce32)) {
321
0
        const UChar *p = d->contexts + Collation::indexFromCE32(ce32);
322
0
        addContractions(c, p + 2);
323
0
    }
324
0
    tailored->add(UnicodeString(unreversedPrefix).append(c));
325
0
    resetPrefix();
326
0
}
327
328
void
329
0
TailoredSet::addContractions(UChar32 c, const UChar *p) {
330
0
    UCharsTrie::Iterator suffixes(p, 0, errorCode);
331
0
    while(suffixes.next(errorCode)) {
332
0
        addSuffix(c, suffixes.getString());
333
0
    }
334
0
}
335
336
void
337
0
TailoredSet::addSuffix(UChar32 c, const UnicodeString &sfx) {
338
0
    tailored->add(UnicodeString(unreversedPrefix).append(c).append(sfx));
339
0
}
340
341
void
342
0
TailoredSet::add(UChar32 c) {
343
0
    if(unreversedPrefix.isEmpty() && suffix == NULL) {
344
0
        tailored->add(c);
345
0
    } else {
346
0
        UnicodeString s(unreversedPrefix);
347
0
        s.append(c);
348
0
        if(suffix != NULL) {
349
0
            s.append(*suffix);
350
0
        }
351
0
        tailored->add(s);
352
0
    }
353
0
}
354
355
0
ContractionsAndExpansions::CESink::~CESink() {}
356
357
U_CDECL_BEGIN
358
359
static UBool U_CALLCONV
360
0
enumCnERange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
361
0
    ContractionsAndExpansions *cne = (ContractionsAndExpansions *)context;
362
0
    if(cne->checkTailored == 0) {
363
        // There is no tailoring.
364
        // No need to collect nor check the tailored set.
365
0
    } else if(cne->checkTailored < 0) {
366
        // Collect the set of code points with mappings in the tailoring data.
367
0
        if(ce32 == Collation::FALLBACK_CE32) {
368
0
            return TRUE;  // fallback to base, not tailored
369
0
        } else {
370
0
            cne->tailored.add(start, end);
371
0
        }
372
        // checkTailored > 0: Exclude tailored ranges from the base data enumeration.
373
0
    } else if(start == end) {
374
0
        if(cne->tailored.contains(start)) {
375
0
            return TRUE;
376
0
        }
377
0
    } else if(cne->tailored.containsSome(start, end)) {
378
0
        cne->ranges.set(start, end).removeAll(cne->tailored);
379
0
        int32_t count = cne->ranges.getRangeCount();
380
0
        for(int32_t i = 0; i < count; ++i) {
381
0
            cne->handleCE32(cne->ranges.getRangeStart(i), cne->ranges.getRangeEnd(i), ce32);
382
0
        }
383
0
        return U_SUCCESS(cne->errorCode);
384
0
    }
385
0
    cne->handleCE32(start, end, ce32);
386
0
    return U_SUCCESS(cne->errorCode);
387
0
}
388
389
U_CDECL_END
390
391
void
392
0
ContractionsAndExpansions::forData(const CollationData *d, UErrorCode &ec) {
393
0
    if(U_FAILURE(ec)) { return; }
394
0
    errorCode = ec;  // Preserve info & warning codes.
395
    // Add all from the data, can be tailoring or base.
396
0
    if(d->base != NULL) {
397
0
        checkTailored = -1;
398
0
    }
399
0
    data = d;
400
0
    utrie2_enum(data->trie, NULL, enumCnERange, this);
401
0
    if(d->base == NULL || U_FAILURE(errorCode)) {
402
0
        ec = errorCode;
403
0
        return;
404
0
    }
405
    // Add all from the base data but only for un-tailored code points.
406
0
    tailored.freeze();
407
0
    checkTailored = 1;
408
0
    data = d->base;
409
0
    utrie2_enum(data->trie, NULL, enumCnERange, this);
410
0
    ec = errorCode;
411
0
}
412
413
void
414
0
ContractionsAndExpansions::forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec) {
415
0
    if(U_FAILURE(ec)) { return; }
416
0
    errorCode = ec;  // Preserve info & warning codes.
417
0
    uint32_t ce32 = d->getCE32(c);
418
0
    if(ce32 == Collation::FALLBACK_CE32) {
419
0
        d = d->base;
420
0
        ce32 = d->getCE32(c);
421
0
    }
422
0
    data = d;
423
0
    handleCE32(c, c, ce32);
424
0
    ec = errorCode;
425
0
}
426
427
void
428
0
ContractionsAndExpansions::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
429
0
    for(;;) {
430
0
        if((ce32 & 0xff) < Collation::SPECIAL_CE32_LOW_BYTE) {
431
            // !isSpecialCE32()
432
0
            if(sink != NULL) {
433
0
                sink->handleCE(Collation::ceFromSimpleCE32(ce32));
434
0
            }
435
0
            return;
436
0
        }
437
0
        switch(Collation::tagFromCE32(ce32)) {
438
0
        case Collation::FALLBACK_TAG:
439
0
            return;
440
0
        case Collation::RESERVED_TAG_3:
441
0
        case Collation::BUILDER_DATA_TAG:
442
0
        case Collation::LEAD_SURROGATE_TAG:
443
0
            if(U_SUCCESS(errorCode)) { errorCode = U_INTERNAL_PROGRAM_ERROR; }
444
0
            return;
445
0
        case Collation::LONG_PRIMARY_TAG:
446
0
            if(sink != NULL) {
447
0
                sink->handleCE(Collation::ceFromLongPrimaryCE32(ce32));
448
0
            }
449
0
            return;
450
0
        case Collation::LONG_SECONDARY_TAG:
451
0
            if(sink != NULL) {
452
0
                sink->handleCE(Collation::ceFromLongSecondaryCE32(ce32));
453
0
            }
454
0
            return;
455
0
        case Collation::LATIN_EXPANSION_TAG:
456
0
            if(sink != NULL) {
457
0
                ces[0] = Collation::latinCE0FromCE32(ce32);
458
0
                ces[1] = Collation::latinCE1FromCE32(ce32);
459
0
                sink->handleExpansion(ces, 2);
460
0
            }
461
            // Optimization: If we have a prefix,
462
            // then the relevant strings have been added already.
463
0
            if(unreversedPrefix.isEmpty()) {
464
0
                addExpansions(start, end);
465
0
            }
466
0
            return;
467
0
        case Collation::EXPANSION32_TAG:
468
0
            if(sink != NULL) {
469
0
                const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
470
0
                int32_t length = Collation::lengthFromCE32(ce32);
471
0
                for(int32_t i = 0; i < length; ++i) {
472
0
                    ces[i] = Collation::ceFromCE32(*ce32s++);
473
0
                }
474
0
                sink->handleExpansion(ces, length);
475
0
            }
476
            // Optimization: If we have a prefix,
477
            // then the relevant strings have been added already.
478
0
            if(unreversedPrefix.isEmpty()) {
479
0
                addExpansions(start, end);
480
0
            }
481
0
            return;
482
0
        case Collation::EXPANSION_TAG:
483
0
            if(sink != NULL) {
484
0
                int32_t length = Collation::lengthFromCE32(ce32);
485
0
                sink->handleExpansion(data->ces + Collation::indexFromCE32(ce32), length);
486
0
            }
487
            // Optimization: If we have a prefix,
488
            // then the relevant strings have been added already.
489
0
            if(unreversedPrefix.isEmpty()) {
490
0
                addExpansions(start, end);
491
0
            }
492
0
            return;
493
0
        case Collation::PREFIX_TAG:
494
0
            handlePrefixes(start, end, ce32);
495
0
            return;
496
0
        case Collation::CONTRACTION_TAG:
497
0
            handleContractions(start, end, ce32);
498
0
            return;
499
0
        case Collation::DIGIT_TAG:
500
            // Fetch the non-numeric-collation CE32 and continue.
501
0
            ce32 = data->ce32s[Collation::indexFromCE32(ce32)];
502
0
            break;
503
0
        case Collation::U0000_TAG:
504
0
            U_ASSERT(start == 0 && end == 0);
505
            // Fetch the normal ce32 for U+0000 and continue.
506
0
            ce32 = data->ce32s[0];
507
0
            break;
508
0
        case Collation::HANGUL_TAG:
509
0
            if(sink != NULL) {
510
                // TODO: This should be optimized,
511
                // especially if [start..end] is the complete Hangul range. (assert that)
512
0
                UTF16CollationIterator iter(data, FALSE, NULL, NULL, NULL);
513
0
                UChar hangul[1] = { 0 };
514
0
                for(UChar32 c = start; c <= end; ++c) {
515
0
                    hangul[0] = (UChar)c;
516
0
                    iter.setText(hangul, hangul + 1);
517
0
                    int32_t length = iter.fetchCEs(errorCode);
518
0
                    if(U_FAILURE(errorCode)) { return; }
519
                    // Ignore the terminating non-CE.
520
0
                    U_ASSERT(length >= 2 && iter.getCE(length - 1) == Collation::NO_CE);
521
0
                    sink->handleExpansion(iter.getCEs(), length - 1);
522
0
                }
523
0
            }
524
            // Optimization: If we have a prefix,
525
            // then the relevant strings have been added already.
526
0
            if(unreversedPrefix.isEmpty()) {
527
0
                addExpansions(start, end);
528
0
            }
529
0
            return;
530
0
        case Collation::OFFSET_TAG:
531
            // Currently no need to send offset CEs to the sink.
532
0
            return;
533
0
        case Collation::IMPLICIT_TAG:
534
            // Currently no need to send implicit CEs to the sink.
535
0
            return;
536
0
        }
537
0
    }
538
0
}
539
540
void
541
ContractionsAndExpansions::handlePrefixes(
542
0
        UChar32 start, UChar32 end, uint32_t ce32) {
543
0
    const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
544
0
    ce32 = CollationData::readCE32(p);  // Default if no prefix match.
545
0
    handleCE32(start, end, ce32);
546
0
    if(!addPrefixes) { return; }
547
0
    UCharsTrie::Iterator prefixes(p + 2, 0, errorCode);
548
0
    while(prefixes.next(errorCode)) {
549
0
        setPrefix(prefixes.getString());
550
        // Prefix/pre-context mappings are special kinds of contractions
551
        // that always yield expansions.
552
0
        addStrings(start, end, contractions);
553
0
        addStrings(start, end, expansions);
554
0
        handleCE32(start, end, (uint32_t)prefixes.getValue());
555
0
    }
556
0
    resetPrefix();
557
0
}
558
559
void
560
ContractionsAndExpansions::handleContractions(
561
0
        UChar32 start, UChar32 end, uint32_t ce32) {
562
0
    const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
563
0
    if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
564
        // No match on the single code point.
565
        // We are underneath a prefix, and the default mapping is just
566
        // a fallback to the mappings for a shorter prefix.
567
0
        U_ASSERT(!unreversedPrefix.isEmpty());
568
0
    } else {
569
0
        ce32 = CollationData::readCE32(p);  // Default if no suffix match.
570
0
        U_ASSERT(!Collation::isContractionCE32(ce32));
571
0
        handleCE32(start, end, ce32);
572
0
    }
573
0
    UCharsTrie::Iterator suffixes(p + 2, 0, errorCode);
574
0
    while(suffixes.next(errorCode)) {
575
0
        suffix = &suffixes.getString();
576
0
        addStrings(start, end, contractions);
577
0
        if(!unreversedPrefix.isEmpty()) {
578
0
            addStrings(start, end, expansions);
579
0
        }
580
0
        handleCE32(start, end, (uint32_t)suffixes.getValue());
581
0
    }
582
0
    suffix = NULL;
583
0
}
584
585
void
586
0
ContractionsAndExpansions::addExpansions(UChar32 start, UChar32 end) {
587
0
    if(unreversedPrefix.isEmpty() && suffix == NULL) {
588
0
        if(expansions != NULL) {
589
0
            expansions->add(start, end);
590
0
        }
591
0
    } else {
592
0
        addStrings(start, end, expansions);
593
0
    }
594
0
}
595
596
void
597
0
ContractionsAndExpansions::addStrings(UChar32 start, UChar32 end, UnicodeSet *set) {
598
0
    if(set == NULL) { return; }
599
0
    UnicodeString s(unreversedPrefix);
600
0
    do {
601
0
        s.append(start);
602
0
        if(suffix != NULL) {
603
0
            s.append(*suffix);
604
0
        }
605
0
        set->add(s);
606
0
        s.truncate(unreversedPrefix.length());
607
0
    } while(++start <= end);
608
0
}
609
610
U_NAMESPACE_END
611
612
#endif  // !UCONFIG_NO_COLLATION