Coverage Report

Created: 2026-01-22 06:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/ucasemap.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2005-2016, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  ucasemap.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2005may06
16
*   created by: Markus W. Scherer
17
*
18
*   Case mapping service object and functions using it.
19
*/
20
21
#include "unicode/utypes.h"
22
#include "unicode/brkiter.h"
23
#include "unicode/bytestream.h"
24
#include "unicode/casemap.h"
25
#include "unicode/edits.h"
26
#include "unicode/stringoptions.h"
27
#include "unicode/stringpiece.h"
28
#include "unicode/ubrk.h"
29
#include "unicode/uloc.h"
30
#include "unicode/ustring.h"
31
#include "unicode/ucasemap.h"
32
#if !UCONFIG_NO_BREAK_ITERATION
33
#include "unicode/utext.h"
34
#endif
35
#include "unicode/utf.h"
36
#include "unicode/utf8.h"
37
#include "unicode/utf16.h"
38
#include "bytesinkutil.h"
39
#include "cmemory.h"
40
#include "cstring.h"
41
#include "uassert.h"
42
#include "ucase.h"
43
#include "ucasemap_imp.h"
44
45
U_NAMESPACE_USE
46
47
/* UCaseMap service object -------------------------------------------------- */
48
49
UCaseMap::UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode) :
50
#if !UCONFIG_NO_BREAK_ITERATION
51
7.58k
        iter(nullptr),
52
#endif
53
7.58k
        caseLocale(UCASE_LOC_UNKNOWN), options(opts) {
54
7.58k
    ucasemap_setLocale(this, localeID, pErrorCode);
55
7.58k
}
56
57
7.58k
UCaseMap::~UCaseMap() {
58
7.58k
#if !UCONFIG_NO_BREAK_ITERATION
59
7.58k
    delete iter;
60
7.58k
#endif
61
7.58k
}
62
63
U_CAPI UCaseMap * U_EXPORT2
64
7.58k
ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode) {
65
7.58k
    if(U_FAILURE(*pErrorCode)) {
66
0
        return nullptr;
67
0
    }
68
7.58k
    UCaseMap *csm = new UCaseMap(locale, options, pErrorCode);
69
7.58k
    if(csm==nullptr) {
70
0
        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
71
0
        return nullptr;
72
7.58k
    } else if (U_FAILURE(*pErrorCode)) {
73
0
        delete csm;
74
0
        return nullptr;
75
0
    }
76
7.58k
    return csm;
77
7.58k
}
78
79
U_CAPI void U_EXPORT2
80
7.58k
ucasemap_close(UCaseMap *csm) {
81
7.58k
    delete csm;
82
7.58k
}
83
84
U_CAPI const char * U_EXPORT2
85
0
ucasemap_getLocale(const UCaseMap *csm) {
86
0
    return csm->locale;
87
0
}
88
89
U_CAPI uint32_t U_EXPORT2
90
0
ucasemap_getOptions(const UCaseMap *csm) {
91
0
    return csm->options;
92
0
}
93
94
U_CAPI void U_EXPORT2
95
7.58k
ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
96
7.58k
    if(U_FAILURE(*pErrorCode)) {
97
0
        return;
98
0
    }
99
7.58k
    if (locale != nullptr && *locale == 0) {
100
0
        csm->locale[0] = 0;
101
0
        csm->caseLocale = UCASE_LOC_ROOT;
102
0
        return;
103
0
    }
104
105
7.58k
    UErrorCode bufferStatus = U_ZERO_ERROR;
106
7.58k
    int32_t length=uloc_getName(locale, csm->locale, (int32_t)sizeof(csm->locale), &bufferStatus);
107
7.58k
    if(bufferStatus==U_BUFFER_OVERFLOW_ERROR || (U_SUCCESS(bufferStatus) && length==sizeof(csm->locale))) {
108
0
        bufferStatus = U_ZERO_ERROR;
109
        /* we only really need the language code for case mappings */
110
0
        length=uloc_getLanguage(locale, csm->locale, (int32_t)sizeof(csm->locale), &bufferStatus);
111
0
    }
112
7.58k
    if(U_FAILURE(bufferStatus)) {
113
0
        *pErrorCode=bufferStatus;
114
7.58k
    } else if(length==sizeof(csm->locale)) {
115
0
        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
116
0
    }
117
7.58k
    if(U_SUCCESS(*pErrorCode)) {     
118
7.58k
        csm->caseLocale = ucase_getCaseLocale(csm->locale);
119
7.58k
    } else {
120
0
        csm->locale[0]=0;
121
0
        csm->caseLocale = UCASE_LOC_ROOT;
122
0
    }
123
7.58k
}
124
125
U_CAPI void U_EXPORT2
126
0
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
127
0
    if(U_FAILURE(*pErrorCode)) {
128
0
        return;
129
0
    }
130
0
    csm->options=options;
131
0
}
132
133
/* UTF-8 string case mappings ----------------------------------------------- */
134
135
/* TODO(markus): Move to a new, separate utf8case.cpp file. */
136
137
namespace {
138
139
/* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
140
inline UBool
141
appendResult(int32_t cpLength, int32_t result, const char16_t *s,
142
30.3M
             ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
143
30.3M
    U_ASSERT(U_SUCCESS(errorCode));
144
145
    /* decode the result */
146
30.3M
    if(result<0) {
147
        /* (not) original code point */
148
27.4M
        if(edits!=nullptr) {
149
0
            edits->addUnchanged(cpLength);
150
0
        }
151
27.4M
        if((options & U_OMIT_UNCHANGED_TEXT) == 0) {
152
10.1M
            ByteSinkUtil::appendCodePoint(cpLength, ~result, sink);
153
10.1M
        }
154
27.4M
    } else {
155
2.88M
        if(result<=UCASE_MAX_STRING_LENGTH) {
156
            // string: "result" is the UTF-16 length
157
474k
            return ByteSinkUtil::appendChange(cpLength, s, result, sink, edits, errorCode);
158
2.41M
        } else {
159
2.41M
            ByteSinkUtil::appendCodePoint(cpLength, result, sink, edits);
160
2.41M
        }
161
2.88M
    }
162
29.9M
    return true;
163
30.3M
}
164
165
// See unicode/utf8.h U8_APPEND_UNSAFE().
166
297k
inline uint8_t getTwoByteLead(UChar32 c) { return static_cast<uint8_t>((c >> 6) | 0xc0); }
167
259k
inline uint8_t getTwoByteTrail(UChar32 c) { return static_cast<uint8_t>((c & 0x3f) | 0x80); }
168
169
UChar32 U_CALLCONV
170
253k
utf8_caseContextIterator(void *context, int8_t dir) {
171
253k
    UCaseContext* csc = static_cast<UCaseContext*>(context);
172
253k
    UChar32 c;
173
174
253k
    if(dir<0) {
175
        /* reset for backward iteration */
176
87.3k
        csc->index=csc->cpStart;
177
87.3k
        csc->dir=dir;
178
166k
    } else if(dir>0) {
179
        /* reset for forward iteration */
180
117k
        csc->index=csc->cpLimit;
181
117k
        csc->dir=dir;
182
117k
    } else {
183
        /* continue current iteration direction */
184
48.2k
        dir=csc->dir;
185
48.2k
    }
186
187
253k
    if(dir<0) {
188
124k
        if(csc->start<csc->index) {
189
124k
            U8_PREV((const uint8_t *)csc->p, csc->start, csc->index, c);
190
124k
            return c;
191
124k
        }
192
129k
    } else {
193
129k
        if(csc->index<csc->limit) {
194
128k
            U8_NEXT((const uint8_t *)csc->p, csc->index, csc->limit, c);
195
128k
            return c;
196
128k
        }
197
129k
    }
198
518
    return U_SENTINEL;
199
253k
}
200
201
/**
202
 * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
203
 * caseLocale < 0: Case-folds [srcStart..srcLimit[.
204
 */
205
void toLower(int32_t caseLocale, uint32_t options,
206
             const uint8_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
207
1.29M
             icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
208
1.29M
    const int8_t *latinToLower;
209
1.29M
    if (caseLocale == UCASE_LOC_ROOT ||
210
314k
            (caseLocale >= 0 ?
211
314k
                !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
212
1.29M
                (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
213
1.29M
        latinToLower = LatinCase::TO_LOWER_NORMAL;
214
1.29M
    } else {
215
8.85k
        latinToLower = LatinCase::TO_LOWER_TR_LT;
216
8.85k
    }
217
1.29M
    const UTrie2 *trie = ucase_getTrie();
218
1.29M
    int32_t prev = srcStart;
219
1.29M
    int32_t srcIndex = srcStart;
220
2.03M
    for (;;) {
221
        // fast path for simple cases
222
2.03M
        int32_t cpStart;
223
2.03M
        UChar32 c;
224
46.2M
        for (;;) {
225
46.2M
            if (U_FAILURE(errorCode) || srcIndex >= srcLimit) {
226
1.29M
                c = U_SENTINEL;
227
1.29M
                break;
228
1.29M
            }
229
44.9M
            uint8_t lead = src[srcIndex++];
230
44.9M
            if (lead <= 0x7f) {
231
27.1M
                int8_t d = latinToLower[lead];
232
27.1M
                if (d == LatinCase::EXC) {
233
58.3k
                    cpStart = srcIndex - 1;
234
58.3k
                    c = lead;
235
58.3k
                    break;
236
58.3k
                }
237
27.1M
                if (d == 0) { continue; }
238
3.50M
                ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
239
3.50M
                                              sink, options, edits, errorCode);
240
3.50M
                char ascii = static_cast<char>(lead + d);
241
3.50M
                sink.Append(&ascii, 1);
242
3.50M
                if (edits != nullptr) {
243
0
                    edits->addReplace(1, 1);
244
0
                }
245
3.50M
                prev = srcIndex;
246
3.50M
                continue;
247
27.1M
            } else if (lead < 0xe3) {
248
10.6M
                uint8_t t;
249
10.6M
                if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLimit &&
250
711k
                        (t = src[srcIndex] - 0x80) <= 0x3f) {
251
                    // U+0080..U+017F
252
507k
                    ++srcIndex;
253
507k
                    c = ((lead - 0xc0) << 6) | t;
254
507k
                    int8_t d = latinToLower[c];
255
507k
                    if (d == LatinCase::EXC) {
256
418k
                        cpStart = srcIndex - 2;
257
418k
                        break;
258
418k
                    }
259
88.8k
                    if (d == 0) { continue; }
260
47.6k
                    ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
261
47.6k
                                                  sink, options, edits, errorCode);
262
47.6k
                    ByteSinkUtil::appendTwoBytes(c + d, sink);
263
47.6k
                    if (edits != nullptr) {
264
0
                        edits->addReplace(2, 2);
265
0
                    }
266
47.6k
                    prev = srcIndex;
267
47.6k
                    continue;
268
88.8k
                }
269
10.6M
            } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
270
1.41M
                    (srcIndex + 2) <= srcLimit &&
271
1.40M
                    U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
272
                // most of CJK: no case mappings
273
776k
                srcIndex += 2;
274
776k
                continue;
275
776k
            }
276
16.5M
            cpStart = --srcIndex;
277
16.5M
            U8_NEXT(src, srcIndex, srcLimit, c);
278
16.5M
            if (c < 0) {
279
                // ill-formed UTF-8
280
10.7M
                continue;
281
10.7M
            }
282
5.74M
            uint16_t props = UTRIE2_GET16(trie, c);
283
5.74M
            if (UCASE_HAS_EXCEPTION(props)) { break; }
284
5.49M
            int32_t delta;
285
5.49M
            if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
286
5.38M
                continue;
287
5.38M
            }
288
108k
            ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
289
108k
                                          sink, options, edits, errorCode);
290
108k
            ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
291
108k
            prev = srcIndex;
292
108k
        }
293
2.03M
        if (c < 0) {
294
1.29M
            break;
295
1.29M
        }
296
        // slow path
297
733k
        const char16_t *s;
298
733k
        if (caseLocale >= 0) {
299
720k
            csc->cpStart = cpStart;
300
720k
            csc->cpLimit = srcIndex;
301
720k
            c = ucase_toFullLower(c, utf8_caseContextIterator, csc, &s, caseLocale);
302
720k
        } else {
303
13.3k
            c = ucase_toFullFolding(c, &s, options);
304
13.3k
        }
305
733k
        if (c >= 0) {
306
570k
            ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
307
570k
                                          sink, options, edits, errorCode);
308
570k
            appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
309
570k
            prev = srcIndex;
310
570k
        }
311
733k
    }
312
1.29M
    ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
313
1.29M
                                  sink, options, edits, errorCode);
314
1.29M
}
315
316
void toUpper(int32_t caseLocale, uint32_t options,
317
             const uint8_t *src, UCaseContext *csc, int32_t srcLength,
318
677
             icu::ByteSink &sink, icu::Edits *edits, UErrorCode &errorCode) {
319
677
    const int8_t *latinToUpper;
320
677
    if (caseLocale == UCASE_LOC_TURKISH) {
321
27
        latinToUpper = LatinCase::TO_UPPER_TR;
322
650
    } else {
323
650
        latinToUpper = LatinCase::TO_UPPER_NORMAL;
324
650
    }
325
677
    const UTrie2 *trie = ucase_getTrie();
326
677
    int32_t prev = 0;
327
677
    int32_t srcIndex = 0;
328
62.4k
    for (;;) {
329
        // fast path for simple cases
330
62.4k
        int32_t cpStart;
331
62.4k
        UChar32 c;
332
13.6M
        for (;;) {
333
13.6M
            if (U_FAILURE(errorCode) || srcIndex >= srcLength) {
334
677
                c = U_SENTINEL;
335
677
                break;
336
677
            }
337
13.6M
            uint8_t lead = src[srcIndex++];
338
13.6M
            if (lead <= 0x7f) {
339
8.38M
                int8_t d = latinToUpper[lead];
340
8.38M
                if (d == LatinCase::EXC) {
341
440
                    cpStart = srcIndex - 1;
342
440
                    c = lead;
343
440
                    break;
344
440
                }
345
8.38M
                if (d == 0) { continue; }
346
911k
                ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 1 - prev,
347
911k
                                              sink, options, edits, errorCode);
348
911k
                char ascii = static_cast<char>(lead + d);
349
911k
                sink.Append(&ascii, 1);
350
911k
                if (edits != nullptr) {
351
0
                    edits->addReplace(1, 1);
352
0
                }
353
911k
                prev = srcIndex;
354
911k
                continue;
355
8.38M
            } else if (lead < 0xe3) {
356
3.37M
                uint8_t t;
357
3.37M
                if (0xc2 <= lead && lead <= 0xc5 && srcIndex < srcLength &&
358
126k
                        (t = src[srcIndex] - 0x80) <= 0x3f) {
359
                    // U+0080..U+017F
360
32.7k
                    ++srcIndex;
361
32.7k
                    c = ((lead - 0xc0) << 6) | t;
362
32.7k
                    int8_t d = latinToUpper[c];
363
32.7k
                    if (d == LatinCase::EXC) {
364
854
                        cpStart = srcIndex - 2;
365
854
                        break;
366
854
                    }
367
31.9k
                    if (d == 0) { continue; }
368
11.2k
                    ByteSinkUtil::appendUnchanged(src + prev, srcIndex - 2 - prev,
369
11.2k
                                                  sink, options, edits, errorCode);
370
11.2k
                    ByteSinkUtil::appendTwoBytes(c + d, sink);
371
11.2k
                    if (edits != nullptr) {
372
0
                        edits->addReplace(2, 2);
373
0
                    }
374
11.2k
                    prev = srcIndex;
375
11.2k
                    continue;
376
31.9k
                }
377
3.37M
            } else if ((lead <= 0xe9 || lead == 0xeb || lead == 0xec) &&
378
732k
                    (srcIndex + 2) <= srcLength &&
379
732k
                    U8_IS_TRAIL(src[srcIndex]) && U8_IS_TRAIL(src[srcIndex + 1])) {
380
                // most of CJK: no case mappings
381
293k
                srcIndex += 2;
382
293k
                continue;
383
293k
            }
384
4.92M
            cpStart = --srcIndex;
385
4.92M
            U8_NEXT(src, srcIndex, srcLength, c);
386
4.92M
            if (c < 0) {
387
                // ill-formed UTF-8
388
4.08M
                continue;
389
4.08M
            }
390
842k
            uint16_t props = UTRIE2_GET16(trie, c);
391
842k
            if (UCASE_HAS_EXCEPTION(props)) { break; }
392
781k
            int32_t delta;
393
781k
            if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
394
709k
                continue;
395
709k
            }
396
72.8k
            ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
397
72.8k
                                          sink, options, edits, errorCode);
398
72.8k
            ByteSinkUtil::appendCodePoint(srcIndex - cpStart, c + delta, sink, edits);
399
72.8k
            prev = srcIndex;
400
72.8k
        }
401
62.4k
        if (c < 0) {
402
677
            break;
403
677
        }
404
        // slow path
405
61.8k
        csc->cpStart = cpStart;
406
61.8k
        csc->cpLimit = srcIndex;
407
61.8k
        const char16_t *s;
408
61.8k
        c = ucase_toFullUpper(c, utf8_caseContextIterator, csc, &s, caseLocale);
409
61.8k
        if (c >= 0) {
410
34.1k
            ByteSinkUtil::appendUnchanged(src + prev, cpStart - prev,
411
34.1k
                                          sink, options, edits, errorCode);
412
34.1k
            appendResult(srcIndex - cpStart, c, s, sink, options, edits, errorCode);
413
34.1k
            prev = srcIndex;
414
34.1k
        }
415
61.8k
    }
416
677
    ByteSinkUtil::appendUnchanged(src + prev, srcIndex - prev,
417
677
                                  sink, options, edits, errorCode);
418
677
}
419
420
}  // namespace
421
422
#if !UCONFIG_NO_BREAK_ITERATION
423
424
namespace {
425
426
constexpr uint8_t ACUTE_BYTE0 = u8"\u0301"[0];
427
428
constexpr uint8_t ACUTE_BYTE1 = u8"\u0301"[1];
429
430
/**
431
 * Input: c is a letter I with or without acute accent.
432
 * start is the index in src after c, and is less than segmentLimit.
433
 * If a plain i/I is followed by a plain j/J,
434
 * or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
435
 * then we output accordingly.
436
 *
437
 * @return the src index after the titlecased sequence, or the start index if no Dutch IJ
438
 */
439
int32_t maybeTitleDutchIJ(const uint8_t *src, UChar32 c, int32_t start, int32_t segmentLimit,
440
57.0k
                          ByteSink &sink, uint32_t options, icu::Edits *edits, UErrorCode &errorCode) {
441
57.0k
    U_ASSERT(start < segmentLimit);
442
443
57.0k
    int32_t index = start;
444
57.0k
    bool withAcute = false;
445
446
    // If the conditions are met, then the following variables tell us what to output.
447
57.0k
    int32_t unchanged1 = 0;  // code units before the j, or the whole sequence (0..3)
448
57.0k
    bool doTitleJ = false;  // true if the j needs to be titlecased
449
57.0k
    int32_t unchanged2 = 0;  // after the j (0 or 1)
450
451
    // next character after the first letter
452
57.0k
    UChar32 c2;
453
57.0k
    c2 = src[index++];
454
455
    // Is the first letter an i/I with accent?
456
57.0k
    if (c == u'I') {
457
56.7k
        if (c2 == ACUTE_BYTE0 && index < segmentLimit && src[index++] == ACUTE_BYTE1) {
458
11.3k
            withAcute = true;
459
11.3k
            unchanged1 = 2;  // ACUTE is 2 code units in UTF-8
460
11.3k
            if (index == segmentLimit) { return start; }
461
11.0k
            c2 = src[index++];
462
11.0k
        }
463
56.7k
    } else {  // Í
464
294
        withAcute = true;
465
294
    }
466
467
    // Is the next character a j/J?
468
56.6k
    if (c2 == u'j') {
469
19.7k
        doTitleJ = true;
470
36.9k
    } else if (c2 == u'J') {
471
23.6k
        ++unchanged1;
472
23.6k
    } else {
473
13.2k
        return start;
474
13.2k
    }
475
476
    // A plain i/I must be followed by a plain j/J.
477
    // An i/I with acute must be followed by a j/J with acute.
478
43.4k
    if (withAcute) {
479
9.61k
        if ((index + 1) >= segmentLimit || src[index++] != ACUTE_BYTE0 || src[index++] != ACUTE_BYTE1) {
480
6.17k
            return start;
481
6.17k
        }
482
3.44k
        if (doTitleJ) {
483
318
            unchanged2 = 2;  // ACUTE is 2 code units in UTF-8
484
3.12k
        } else {
485
3.12k
            unchanged1 = unchanged1 + 2;    // ACUTE is 2 code units in UTF-8
486
3.12k
        }
487
3.44k
    }
488
489
    // There must not be another combining mark.
490
37.2k
    if (index < segmentLimit) {
491
34.5k
        int32_t cp;
492
34.5k
        int32_t i = index;
493
34.5k
        U8_NEXT(src, i, segmentLimit, cp);
494
34.5k
        uint32_t typeMask = U_GET_GC_MASK(cp);
495
34.5k
        if ((typeMask & U_GC_M_MASK) != 0) {
496
542
            return start;
497
542
        }
498
34.5k
    }
499
500
    // Output the rest of the Dutch IJ.
501
36.7k
    ByteSinkUtil::appendUnchanged(src + start, unchanged1, sink, options, edits, errorCode);
502
36.7k
    start += unchanged1;
503
36.7k
    if (doTitleJ) {
504
18.3k
        ByteSinkUtil::appendCodePoint(1, u'J', sink, edits);
505
18.3k
        ++start;
506
18.3k
    }
507
36.7k
    ByteSinkUtil::appendUnchanged(src + start, unchanged2, sink, options, edits, errorCode);
508
509
36.7k
    U_ASSERT(start + unchanged2 == index);
510
36.7k
    return index;
511
37.2k
}
512
513
}  // namespace
514
515
U_CFUNC void U_CALLCONV
516
ucasemap_internalUTF8ToTitle(
517
        int32_t caseLocale, uint32_t options, BreakIterator *iter,
518
        const uint8_t *src, int32_t srcLength,
519
        ByteSink &sink, icu::Edits *edits,
520
4.53k
        UErrorCode &errorCode) {
521
4.53k
    if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
522
20
        return;
523
20
    }
524
525
    /* set up local variables */
526
4.51k
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
527
4.51k
    csc.p=(void *)src;
528
4.51k
    csc.limit=srcLength;
529
4.51k
    int32_t prev=0;
530
4.51k
    UBool isFirstIndex=true;
531
532
    /* titlecasing loop */
533
53.7M
    while(prev<srcLength) {
534
        /* find next index where to titlecase */
535
53.7M
        int32_t index;
536
53.7M
        if(isFirstIndex) {
537
4.45k
            isFirstIndex=false;
538
4.45k
            index=iter->first();
539
53.7M
        } else {
540
53.7M
            index=iter->next();
541
53.7M
        }
542
53.7M
        if(index==UBRK_DONE || index>srcLength) {
543
0
            index=srcLength;
544
0
        }
545
546
        /*
547
         * Segment [prev..index[ into 3 parts:
548
         * a) skipped characters (copy as-is) [prev..titleStart[
549
         * b) first letter (titlecase)              [titleStart..titleLimit[
550
         * c) subsequent characters (lowercase)                 [titleLimit..index[
551
         */
552
53.7M
        if(prev<index) {
553
            /* find and copy skipped characters [prev..titleStart[ */
554
53.7M
            int32_t titleStart=prev;
555
53.7M
            int32_t titleLimit=prev;
556
53.7M
            UChar32 c;
557
53.7M
            U8_NEXT(src, titleLimit, index, c);
558
53.7M
            if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
559
                // Adjust the titlecasing index to the next cased character,
560
                // or to the next letter/number/symbol/private use.
561
                // Stop with titleStart<titleLimit<=index
562
                // if there is a character to be titlecased,
563
                // or else stop with titleStart==titleLimit==index.
564
34.1M
                UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
565
39.8M
                while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
566
31.9M
                    titleStart=titleLimit;
567
31.9M
                    if(titleLimit==index) {
568
26.2M
                        break;
569
26.2M
                    }
570
5.65M
                    U8_NEXT(src, titleLimit, index, c);
571
5.65M
                }
572
34.1M
                if (prev < titleStart) {
573
26.3M
                    if (!ByteSinkUtil::appendUnchanged(src+prev, titleStart-prev,
574
26.3M
                                                       sink, options, edits, errorCode)) {
575
0
                        return;
576
0
                    }
577
26.3M
                }
578
34.1M
            }
579
580
53.7M
            if(titleStart<titleLimit) {
581
                /* titlecase c which is from [titleStart..titleLimit[ */
582
27.4M
                if(c>=0) {
583
23.8M
                    csc.cpStart=titleStart;
584
23.8M
                    csc.cpLimit=titleLimit;
585
23.8M
                    const char16_t *s;
586
23.8M
                    c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
587
23.8M
                    if (!appendResult(titleLimit-titleStart, c, s, sink, options, edits, errorCode)) {
588
0
                        return;
589
0
                    }
590
23.8M
                } else {
591
                    // Malformed UTF-8.
592
3.59M
                    if (!ByteSinkUtil::appendUnchanged(src+titleStart, titleLimit-titleStart,
593
3.59M
                                                       sink, options, edits, errorCode)) {
594
0
                        return;
595
0
                    }
596
3.59M
                }
597
598
                /* Special case Dutch IJ titlecasing */
599
27.4M
                if (titleLimit < index &&
600
1.80M
                    caseLocale == UCASE_LOC_DUTCH) {
601
335k
                    if (c < 0) {
602
188k
                        c = ~c;
603
188k
                    }
604
605
335k
                    if (c == u'I' || c == u'Í') {
606
57.0k
                        titleLimit = maybeTitleDutchIJ(src, c, titleLimit, index, sink, options, edits, errorCode);
607
57.0k
                    }
608
335k
                }
609
610
                /* lowercase [titleLimit..index[ */
611
27.4M
                if(titleLimit<index) {
612
1.80M
                    if((options&U_TITLECASE_NO_LOWERCASE)==0) {
613
                        /* Normal operation: Lowercase the rest of the word. */
614
1.29M
                        toLower(caseLocale, options,
615
1.29M
                                src, &csc, titleLimit, index,
616
1.29M
                                sink, edits, errorCode);
617
1.29M
                        if(U_FAILURE(errorCode)) {
618
0
                            return;
619
0
                        }
620
1.29M
                    } else {
621
                        /* Optionally just copy the rest of the word unchanged. */
622
505k
                        if (!ByteSinkUtil::appendUnchanged(src+titleLimit, index-titleLimit,
623
505k
                                                           sink, options, edits, errorCode)) {
624
0
                            return;
625
0
                        }
626
505k
                    }
627
1.80M
                }
628
27.4M
            }
629
53.7M
        }
630
631
53.7M
        prev=index;
632
53.7M
    }
633
4.51k
}
634
635
#endif
636
637
U_NAMESPACE_BEGIN
638
namespace GreekUpper {
639
640
187k
UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
641
190k
    while (i < length) {
642
190k
        UChar32 c;
643
190k
        U8_NEXT(s, i, length, c);
644
190k
        int32_t type = ucase_getTypeOrIgnorable(c);
645
190k
        if ((type & UCASE_IGNORABLE) != 0) {
646
            // Case-ignorable, continue with the loop.
647
187k
        } else if (type != UCASE_NONE) {
648
17.3k
            return true;  // Followed by cased letter.
649
169k
        } else {
650
169k
            return false;  // Uncased and not case-ignorable.
651
169k
        }
652
190k
    }
653
75
    return false;  // Not followed by cased letter.
654
187k
}
655
656
// Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
657
void toUpper(uint32_t options,
658
             const uint8_t *src, int32_t srcLength,
659
             ByteSink &sink, Edits *edits,
660
879
             UErrorCode &errorCode) {
661
879
    uint32_t state = 0;
662
9.04M
    for (int32_t i = 0; i < srcLength;) {
663
9.04M
        int32_t nextIndex = i;
664
9.04M
        UChar32 c;
665
9.04M
        U8_NEXT(src, nextIndex, srcLength, c);
666
9.04M
        uint32_t nextState = 0;
667
9.04M
        int32_t type = ucase_getTypeOrIgnorable(c);
668
9.04M
        if ((type & UCASE_IGNORABLE) != 0) {
669
            // c is case-ignorable
670
408k
            nextState |= (state & AFTER_CASED);
671
8.63M
        } else if (type != UCASE_NONE) {
672
            // c is cased
673
1.77M
            nextState |= AFTER_CASED;
674
1.77M
        }
675
9.04M
        uint32_t data = getLetterData(c);
676
9.04M
        if (data > 0) {
677
320k
            uint32_t upper = data & UPPER_MASK;
678
            // Add a dialytika to this iota or ypsilon vowel
679
            // if we removed a tonos from the previous vowel,
680
            // and that previous vowel did not also have (or gain) a dialytika.
681
            // Adding one only to the final vowel in a longer sequence
682
            // (which does not occur in normal writing) would require lookahead.
683
            // Set the same flag as for preserving an existing dialytika.
684
320k
            if ((data & HAS_VOWEL) != 0 &&
685
252k
                (state & (AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT | AFTER_VOWEL_WITH_COMBINING_ACCENT)) !=
686
252k
                    0 &&
687
17.0k
                (upper == 0x399 || upper == 0x3A5)) {
688
2.51k
                data |= (state & AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT) != 0 ? HAS_DIALYTIKA
689
2.51k
                                                                           : HAS_COMBINING_DIALYTIKA;
690
2.51k
            }
691
320k
            int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
692
320k
            if ((data & HAS_YPOGEGRAMMENI) != 0) {
693
14.3k
                numYpogegrammeni = 1;
694
14.3k
            }
695
320k
            const UBool hasPrecomposedAccent = (data & HAS_ACCENT) != 0;
696
            // Skip combining diacritics after this Greek letter.
697
320k
            int32_t nextNextIndex = nextIndex;
698
387k
            while (nextIndex < srcLength) {
699
387k
                UChar32 c2;
700
387k
                U8_NEXT(src, nextNextIndex, srcLength, c2);
701
387k
                uint32_t diacriticData = getDiacriticData(c2);
702
387k
                if (diacriticData != 0) {
703
67.5k
                    data |= diacriticData;
704
67.5k
                    if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
705
843
                        ++numYpogegrammeni;
706
843
                    }
707
67.5k
                    nextIndex = nextNextIndex;
708
319k
                } else {
709
319k
                    break;  // not a Greek diacritic
710
319k
                }
711
387k
            }
712
320k
            if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
713
227k
                nextState |= hasPrecomposedAccent ? AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT
714
227k
                                                  : AFTER_VOWEL_WITH_COMBINING_ACCENT;
715
227k
            }
716
            // Map according to Greek rules.
717
320k
            UBool addTonos = false;
718
320k
            if (upper == 0x397 &&
719
213k
                    (data & HAS_ACCENT) != 0 &&
720
210k
                    numYpogegrammeni == 0 &&
721
209k
                    (state & AFTER_CASED) == 0 &&
722
187k
                    !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
723
                // Keep disjunctive "or" with (only) a tonos.
724
                // We use the same "word boundary" conditions as for the Final_Sigma test.
725
169k
                if (hasPrecomposedAccent) {
726
165k
                    upper = 0x389;  // Preserve the precomposed form.
727
165k
                } else {
728
4.61k
                    addTonos = true;
729
4.61k
                }
730
169k
            } else if ((data & HAS_DIALYTIKA) != 0) {
731
                // Preserve a vowel with dialytika in precomposed form if it exists.
732
11.5k
                if (upper == 0x399) {
733
2.72k
                    upper = 0x3AA;
734
2.72k
                    data &= ~HAS_EITHER_DIALYTIKA;
735
8.80k
                } else if (upper == 0x3A5) {
736
7.29k
                    upper = 0x3AB;
737
7.29k
                    data &= ~HAS_EITHER_DIALYTIKA;
738
7.29k
                }
739
11.5k
            }
740
741
320k
            UBool change;
742
320k
            if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
743
23.1k
                change = true;  // common, simple usage
744
297k
            } else {
745
                // Find out first whether we are changing the text.
746
297k
                U_ASSERT(0x370 <= upper && upper <= 0x3ff);  // 2-byte UTF-8, main Greek block
747
297k
                change = (i + 2) > nextIndex ||
748
297k
                        src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) ||
749
67.1k
                        numYpogegrammeni > 0;
750
297k
                int32_t i2 = i + 2;
751
297k
                if ((data & HAS_EITHER_DIALYTIKA) != 0) {
752
8.39k
                    change |= (i2 + 2) > nextIndex ||
753
7.82k
                            src[i2] != static_cast<uint8_t>(u8"\u0308"[0]) ||
754
492
                            src[i2 + 1] != static_cast<uint8_t>(u8"\u0308"[1]);
755
8.39k
                    i2 += 2;
756
8.39k
                }
757
297k
                if (addTonos) {
758
4.61k
                    change |= (i2 + 2) > nextIndex ||
759
771
                            src[i2] != static_cast<uint8_t>(u8"\u0301"[0]) ||
760
449
                            src[i2 + 1] != static_cast<uint8_t>(u8"\u0301"[1]);
761
4.61k
                    i2 += 2;
762
4.61k
                }
763
297k
                int32_t oldLength = nextIndex - i;
764
297k
                int32_t newLength = (i2 - i) + numYpogegrammeni * 2;  // 2 bytes per U+0399
765
297k
                change |= oldLength != newLength;
766
297k
                if (change) {
767
232k
                    if (edits != nullptr) {
768
0
                        edits->addReplace(oldLength, newLength);
769
0
                    }
770
232k
                } else {
771
64.1k
                    if (edits != nullptr) {
772
0
                        edits->addUnchanged(oldLength);
773
0
                    }
774
                    // Write unchanged text?
775
64.1k
                    change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
776
64.1k
                }
777
297k
            }
778
779
320k
            if (change) {
780
256k
                ByteSinkUtil::appendTwoBytes(upper, sink);
781
256k
                if ((data & HAS_EITHER_DIALYTIKA) != 0) {
782
9.37k
                    sink.AppendU8(u8"\u0308", 2);  // restore or add a dialytika
783
9.37k
                }
784
256k
                if (addTonos) {
785
4.45k
                    sink.AppendU8(u8"\u0301", 2);
786
4.45k
                }
787
271k
                while (numYpogegrammeni > 0) {
788
15.2k
                    sink.AppendU8(u8"\u0399", 2);
789
15.2k
                    --numYpogegrammeni;
790
15.2k
                }
791
256k
            }
792
8.72M
        } else if(c>=0) {
793
5.88M
            const char16_t *s;
794
5.88M
            c=ucase_toFullUpper(c, nullptr, nullptr, &s, UCASE_LOC_GREEK);
795
5.88M
            if (!appendResult(nextIndex - i, c, s, sink, options, edits, errorCode)) {
796
0
                return;
797
0
            }
798
5.88M
        } else {
799
            // Malformed UTF-8.
800
2.84M
            if (!ByteSinkUtil::appendUnchanged(src+i, nextIndex-i,
801
2.84M
                                               sink, options, edits, errorCode)) {
802
0
                return;
803
0
            }
804
2.84M
        }
805
9.04M
        i = nextIndex;
806
9.04M
        state = nextState;
807
9.04M
    }
808
879
}
809
810
}  // namespace GreekUpper
811
U_NAMESPACE_END
812
813
static void U_CALLCONV
814
ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
815
                             const uint8_t *src, int32_t srcLength,
816
                             icu::ByteSink &sink, icu::Edits *edits,
817
1.24k
                             UErrorCode &errorCode) {
818
1.24k
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
819
1.24k
    csc.p=(void *)src;
820
1.24k
    csc.limit=srcLength;
821
1.24k
    toLower(
822
1.24k
        caseLocale, options,
823
1.24k
        src, &csc, 0, srcLength,
824
1.24k
        sink, edits, errorCode);
825
1.24k
}
826
827
static void U_CALLCONV
828
ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
829
                             const uint8_t *src, int32_t srcLength,
830
                             icu::ByteSink &sink, icu::Edits *edits,
831
1.55k
                             UErrorCode &errorCode) {
832
1.55k
    if (caseLocale == UCASE_LOC_GREEK) {
833
879
        GreekUpper::toUpper(options, src, srcLength, sink, edits, errorCode);
834
879
    } else {
835
677
        UCaseContext csc=UCASECONTEXT_INITIALIZER;
836
677
        csc.p=(void *)src;
837
677
        csc.limit=srcLength;
838
677
        toUpper(
839
677
            caseLocale, options,
840
677
            src, &csc, srcLength,
841
677
            sink, edits, errorCode);
842
677
    }
843
1.55k
}
844
845
static void U_CALLCONV
846
ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
847
                          const uint8_t *src, int32_t srcLength,
848
                          icu::ByteSink &sink, icu::Edits *edits,
849
240
                          UErrorCode &errorCode) {
850
240
    toLower(
851
240
        -1, options,
852
240
        src, nullptr, 0, srcLength,
853
240
        sink, edits, errorCode);
854
240
}
855
856
void
857
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
858
                 const char *src, int32_t srcLength,
859
                 UTF8CaseMapper *stringCaseMapper,
860
                 icu::ByteSink &sink, icu::Edits *edits,
861
0
                 UErrorCode &errorCode) {
862
    /* check argument values */
863
0
    if (U_FAILURE(errorCode)) {
864
0
        return;
865
0
    }
866
0
    if ((src == nullptr && srcLength != 0) || srcLength < -1) {
867
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
868
0
        return;
869
0
    }
870
871
    // Get the string length.
872
0
    if (srcLength == -1) {
873
0
        srcLength = static_cast<int32_t>(uprv_strlen(src));
874
0
    }
875
876
0
    if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
877
0
        edits->reset();
878
0
    }
879
0
    stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
880
0
                     reinterpret_cast<const uint8_t*>(src), srcLength, sink, edits, errorCode);
881
0
    sink.Flush();
882
0
    if (U_SUCCESS(errorCode)) {
883
0
        if (edits != nullptr) {
884
0
            edits->copyErrorTo(errorCode);
885
0
        }
886
0
    }
887
0
}
888
889
int32_t
890
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
891
                 char *dest, int32_t destCapacity,
892
                 const char *src, int32_t srcLength,
893
                 UTF8CaseMapper *stringCaseMapper,
894
                 icu::Edits *edits,
895
7.57k
                 UErrorCode &errorCode) {
896
    /* check argument values */
897
7.57k
    if(U_FAILURE(errorCode)) {
898
0
        return 0;
899
0
    }
900
7.57k
    if( destCapacity<0 ||
901
7.57k
        (dest==nullptr && destCapacity>0) ||
902
7.57k
        (src==nullptr && srcLength!=0) || srcLength<-1
903
7.57k
    ) {
904
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
905
0
        return 0;
906
0
    }
907
908
    /* get the string length */
909
7.57k
    if(srcLength==-1) {
910
0
        srcLength = static_cast<int32_t>(uprv_strlen(src));
911
0
    }
912
913
    /* check for overlapping source and destination */
914
7.57k
    if( dest!=nullptr &&
915
7.57k
        ((src>=dest && src<(dest+destCapacity)) ||
916
7.57k
         (dest>=src && dest<(src+srcLength)))
917
7.57k
    ) {
918
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
919
0
        return 0;
920
0
    }
921
922
7.57k
    if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
923
0
        edits->reset();
924
0
    }
925
7.57k
    int32_t reslen = ByteSinkUtil::viaByteSinkToTerminatedChars(
926
7.57k
        dest, destCapacity,
927
7.57k
        [&](ByteSink& sink, UErrorCode& status) {
928
7.57k
            stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
929
7.57k
                             reinterpret_cast<const uint8_t*>(src), srcLength, sink, edits, status);
930
7.57k
        },
931
7.57k
        errorCode);
932
7.57k
    if (U_SUCCESS(errorCode) && edits != nullptr) {
933
0
        edits->copyErrorTo(errorCode);
934
0
    }
935
7.57k
    return reslen;
936
7.57k
}
937
938
/* public API functions */
939
940
U_CAPI int32_t U_EXPORT2
941
ucasemap_utf8ToLower(const UCaseMap *csm,
942
                     char *dest, int32_t destCapacity,
943
                     const char *src, int32_t srcLength,
944
1.24k
                     UErrorCode *pErrorCode) {
945
1.24k
    return ucasemap_mapUTF8(
946
1.24k
        csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
947
1.24k
        dest, destCapacity,
948
1.24k
        src, srcLength,
949
1.24k
        ucasemap_internalUTF8ToLower, nullptr, *pErrorCode);
950
1.24k
}
951
952
U_CAPI int32_t U_EXPORT2
953
ucasemap_utf8ToUpper(const UCaseMap *csm,
954
                     char *dest, int32_t destCapacity,
955
                     const char *src, int32_t srcLength,
956
1.55k
                     UErrorCode *pErrorCode) {
957
1.55k
    return ucasemap_mapUTF8(
958
1.55k
        csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
959
1.55k
        dest, destCapacity,
960
1.55k
        src, srcLength,
961
1.55k
        ucasemap_internalUTF8ToUpper, nullptr, *pErrorCode);
962
1.55k
}
963
964
U_CAPI int32_t U_EXPORT2
965
ucasemap_utf8FoldCase(const UCaseMap *csm,
966
                      char *dest, int32_t destCapacity,
967
                      const char *src, int32_t srcLength,
968
240
                      UErrorCode *pErrorCode) {
969
240
    return ucasemap_mapUTF8(
970
240
        UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
971
240
        dest, destCapacity,
972
240
        src, srcLength,
973
240
        ucasemap_internalUTF8Fold, nullptr, *pErrorCode);
974
240
}
975
976
U_NAMESPACE_BEGIN
977
978
void CaseMap::utf8ToLower(
979
        const char *locale, uint32_t options,
980
        StringPiece src, ByteSink &sink, Edits *edits,
981
0
        UErrorCode &errorCode) {
982
0
    ucasemap_mapUTF8(
983
0
        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
984
0
        src.data(), src.length(),
985
0
        ucasemap_internalUTF8ToLower, sink, edits, errorCode);
986
0
}
987
988
void CaseMap::utf8ToUpper(
989
        const char *locale, uint32_t options,
990
        StringPiece src, ByteSink &sink, Edits *edits,
991
0
        UErrorCode &errorCode) {
992
0
    ucasemap_mapUTF8(
993
0
        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
994
0
        src.data(), src.length(),
995
0
        ucasemap_internalUTF8ToUpper, sink, edits, errorCode);
996
0
}
997
998
void CaseMap::utf8Fold(
999
        uint32_t options,
1000
        StringPiece src, ByteSink &sink, Edits *edits,
1001
0
        UErrorCode &errorCode) {
1002
0
    ucasemap_mapUTF8(
1003
0
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1004
0
        src.data(), src.length(),
1005
0
        ucasemap_internalUTF8Fold, sink, edits, errorCode);
1006
0
}
1007
1008
int32_t CaseMap::utf8ToLower(
1009
        const char *locale, uint32_t options,
1010
        const char *src, int32_t srcLength,
1011
        char *dest, int32_t destCapacity, Edits *edits,
1012
0
        UErrorCode &errorCode) {
1013
0
    return ucasemap_mapUTF8(
1014
0
        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
1015
0
        dest, destCapacity,
1016
0
        src, srcLength,
1017
0
        ucasemap_internalUTF8ToLower, edits, errorCode);
1018
0
}
1019
1020
int32_t CaseMap::utf8ToUpper(
1021
        const char *locale, uint32_t options,
1022
        const char *src, int32_t srcLength,
1023
        char *dest, int32_t destCapacity, Edits *edits,
1024
0
        UErrorCode &errorCode) {
1025
0
    return ucasemap_mapUTF8(
1026
0
        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
1027
0
        dest, destCapacity,
1028
0
        src, srcLength,
1029
0
        ucasemap_internalUTF8ToUpper, edits, errorCode);
1030
0
}
1031
1032
int32_t CaseMap::utf8Fold(
1033
        uint32_t options,
1034
        const char *src, int32_t srcLength,
1035
        char *dest, int32_t destCapacity, Edits *edits,
1036
0
        UErrorCode &errorCode) {
1037
0
    return ucasemap_mapUTF8(
1038
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1039
0
        dest, destCapacity,
1040
0
        src, srcLength,
1041
0
        ucasemap_internalUTF8Fold, edits, errorCode);
1042
0
}
1043
1044
U_NAMESPACE_END