Coverage Report

Created: 2026-06-23 06:26

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/ustrcase.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2001-2015, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  ustrcase.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2002feb20
16
*   created by: Markus W. Scherer
17
*
18
*   Implementation file for string casing C API functions.
19
*   Uses functions from uchar.c for basic functionality that requires access
20
*   to the Unicode Character Database (uprops.dat).
21
*/
22
23
#include "unicode/utypes.h"
24
#include "unicode/brkiter.h"
25
#include "unicode/casemap.h"
26
#include "unicode/edits.h"
27
#include "unicode/stringoptions.h"
28
#include "unicode/ustring.h"
29
#include "unicode/ucasemap.h"
30
#include "unicode/ubrk.h"
31
#include "unicode/utf.h"
32
#include "unicode/utf16.h"
33
#include "cmemory.h"
34
#include "ucase.h"
35
#include "ucasemap_imp.h"
36
#include "ustr_imp.h"
37
#include "uassert.h"
38
39
/**
40
 * Code point for COMBINING ACUTE ACCENT
41
 * @internal
42
 */
43
0
#define ACUTE u'\u0301'
44
45
U_NAMESPACE_BEGIN
46
47
namespace {
48
49
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
50
2.14M
                                   Edits *edits, UErrorCode &errorCode) {
51
2.14M
    if (U_SUCCESS(errorCode)) {
52
2.14M
        if (destIndex > destCapacity) {
53
7.23k
            errorCode = U_BUFFER_OVERFLOW_ERROR;
54
2.13M
        } else if (edits != nullptr) {
55
25.6k
            edits->copyErrorTo(errorCode);
56
25.6k
        }
57
2.14M
    }
58
2.14M
    return destIndex;
59
2.14M
}
60
61
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
62
inline int32_t
63
appendResult(char16_t *dest, int32_t destIndex, int32_t destCapacity,
64
             int32_t result, const char16_t *s,
65
21.3M
             int32_t cpLength, uint32_t options, icu::Edits *edits) {
66
21.3M
    UChar32 c;
67
21.3M
    int32_t length;
68
69
    /* decode the result */
70
21.3M
    if(result<0) {
71
        /* (not) original code point */
72
0
        if(edits!=nullptr) {
73
0
            edits->addUnchanged(cpLength);
74
0
        }
75
0
        if(options & U_OMIT_UNCHANGED_TEXT) {
76
0
            return destIndex;
77
0
        }
78
0
        c=~result;
79
0
        if(destIndex<destCapacity && c<=0xffff) {  // BMP slightly-fastpath
80
0
            dest[destIndex++] = static_cast<char16_t>(c);
81
0
            return destIndex;
82
0
        }
83
0
        length=cpLength;
84
21.3M
    } else {
85
21.3M
        if(result<=UCASE_MAX_STRING_LENGTH) {
86
20.5M
            c=U_SENTINEL;
87
20.5M
            length=result;
88
20.5M
        } else if(destIndex<destCapacity && result<=0xffff) {  // BMP slightly-fastpath
89
429k
            dest[destIndex++] = static_cast<char16_t>(result);
90
429k
            if(edits!=nullptr) {
91
57.2k
                edits->addReplace(cpLength, 1);
92
57.2k
            }
93
429k
            return destIndex;
94
429k
        } else {
95
343k
            c=result;
96
343k
            length=U16_LENGTH(c);
97
343k
        }
98
20.8M
        if(edits!=nullptr) {
99
10.5M
            edits->addReplace(cpLength, length);
100
10.5M
        }
101
20.8M
    }
102
20.8M
    if(length>(INT32_MAX-destIndex)) {
103
0
        return -1;  // integer overflow
104
0
    }
105
106
20.8M
    if(destIndex<destCapacity) {
107
        /* append the result */
108
10.7M
        if(c>=0) {
109
            /* code point */
110
11.7k
            UBool isError=false;
111
11.7k
            U16_APPEND(dest, destIndex, destCapacity, c, isError);
112
11.7k
            if(isError) {
113
                /* overflow, nothing written */
114
517
                destIndex+=length;
115
517
            }
116
10.7M
        } else {
117
            /* string */
118
10.7M
            if((destIndex+length)<=destCapacity) {
119
41.4M
                while(length>0) {
120
30.7M
                    dest[destIndex++]=*s++;
121
30.7M
                    --length;
122
30.7M
                }
123
10.7M
            } else {
124
                /* overflow */
125
3.69k
                destIndex+=length;
126
3.69k
            }
127
10.7M
        }
128
10.7M
    } else {
129
        /* preflight */
130
10.1M
        destIndex+=length;
131
10.1M
    }
132
20.8M
    return destIndex;
133
20.8M
}
134
135
inline int32_t
136
4.83M
appendUChar(char16_t *dest, int32_t destIndex, int32_t destCapacity, char16_t c) {
137
4.83M
    if(destIndex<destCapacity) {
138
2.92M
        dest[destIndex]=c;
139
2.92M
    } else if(destIndex==INT32_MAX) {
140
0
        return -1;  // integer overflow
141
0
    }
142
4.83M
    return destIndex+1;
143
4.83M
}
144
145
int32_t
146
appendNonEmptyUnchanged(char16_t *dest, int32_t destIndex, int32_t destCapacity,
147
5.29M
                        const char16_t *s, int32_t length, uint32_t options, icu::Edits *edits) {
148
5.29M
    if(edits!=nullptr) {
149
1.68M
        edits->addUnchanged(length);
150
1.68M
    }
151
5.29M
    if(options & U_OMIT_UNCHANGED_TEXT) {
152
1.68M
        return destIndex;
153
1.68M
    }
154
3.60M
    if(length>(INT32_MAX-destIndex)) {
155
0
        return -1;  // integer overflow
156
0
    }
157
3.60M
    if((destIndex+length)<=destCapacity) {
158
3.60M
        u_memcpy(dest+destIndex, s, length);
159
3.60M
    }
160
3.60M
    return destIndex + length;
161
3.60M
}
162
163
inline int32_t
164
appendUnchanged(char16_t *dest, int32_t destIndex, int32_t destCapacity,
165
28.3M
                const char16_t *s, int32_t length, uint32_t options, icu::Edits *edits) {
166
28.3M
    if (length <= 0) {
167
23.0M
        return destIndex;
168
23.0M
    }
169
5.29M
    return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
170
28.3M
}
171
172
UChar32 U_CALLCONV
173
531k
utf16_caseContextIterator(void *context, int8_t dir) {
174
531k
    UCaseContext* csc = static_cast<UCaseContext*>(context);
175
531k
    UChar32 c;
176
177
531k
    if(dir<0) {
178
        /* reset for backward iteration */
179
258k
        csc->index=csc->cpStart;
180
258k
        csc->dir=dir;
181
273k
    } else if(dir>0) {
182
        /* reset for forward iteration */
183
267k
        csc->index=csc->cpLimit;
184
267k
        csc->dir=dir;
185
267k
    } else {
186
        /* continue current iteration direction */
187
5.66k
        dir=csc->dir;
188
5.66k
    }
189
190
531k
    if(dir<0) {
191
261k
        if(csc->start<csc->index) {
192
260k
            U16_PREV((const char16_t *)csc->p, csc->start, csc->index, c);
193
260k
            return c;
194
260k
        }
195
270k
    } else {
196
270k
        if(csc->index<csc->limit) {
197
269k
            U16_NEXT((const char16_t *)csc->p, csc->index, csc->limit, c);
198
269k
            return c;
199
269k
        }
200
270k
    }
201
2.28k
    return U_SENTINEL;
202
531k
}
203
204
/**
205
 * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
206
 * caseLocale < 0: Case-folds [srcStart..srcLimit[.
207
 */
208
int32_t toLower(int32_t caseLocale, uint32_t options,
209
                char16_t *dest, int32_t destCapacity,
210
                const char16_t *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
211
2.14M
                icu::Edits *edits, UErrorCode &errorCode) {
212
2.14M
    const int8_t *latinToLower;
213
2.14M
    if (caseLocale == UCASE_LOC_ROOT ||
214
2.03M
            (caseLocale >= 0 ?
215
0
                !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
216
2.14M
                (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
217
2.14M
        latinToLower = LatinCase::TO_LOWER_NORMAL;
218
2.14M
    } else {
219
0
        latinToLower = LatinCase::TO_LOWER_TR_LT;
220
0
    }
221
2.14M
    const UTrie2 *trie = ucase_getTrie();
222
2.14M
    int32_t destIndex = 0;
223
2.14M
    int32_t prev = srcStart;
224
2.14M
    int32_t srcIndex = srcStart;
225
31.6M
    for (;;) {
226
        // fast path for simple cases
227
31.6M
        char16_t lead = 0;
228
85.0M
        while (srcIndex < srcLimit) {
229
82.9M
            lead = src[srcIndex];
230
82.9M
            int32_t delta;
231
82.9M
            if (lead < LatinCase::LONG_S) {
232
12.3M
                int8_t d = latinToLower[lead];
233
12.3M
                if (d == LatinCase::EXC) { break; }
234
10.3M
                ++srcIndex;
235
10.3M
                if (d == 0) { continue; }
236
1.92M
                delta = d;
237
70.5M
            } else if (lead >= 0xd800) {
238
8.49M
                break;  // surrogate or higher
239
62.0M
            } else {
240
62.0M
                uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
241
62.0M
                if (UCASE_HAS_EXCEPTION(props)) { break; }
242
43.0M
                ++srcIndex;
243
43.0M
                if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
244
40.1M
                    continue;
245
40.1M
                }
246
43.0M
            }
247
4.83M
            lead += static_cast<char16_t>(delta);
248
4.83M
            destIndex = appendUnchanged(dest, destIndex, destCapacity,
249
4.83M
                                        src + prev, srcIndex - 1 - prev, options, edits);
250
4.83M
            if (destIndex >= 0) {
251
4.83M
                destIndex = appendUChar(dest, destIndex, destCapacity, lead);
252
4.83M
                if (edits != nullptr) {
253
2.34M
                    edits->addReplace(1, 1);
254
2.34M
                }
255
4.83M
            }
256
4.83M
            if (destIndex < 0) {
257
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
258
0
                return 0;
259
0
            }
260
4.83M
            prev = srcIndex;
261
4.83M
        }
262
31.6M
        if (srcIndex >= srcLimit) {
263
2.14M
            break;
264
2.14M
        }
265
        // slow path
266
29.5M
        int32_t cpStart = srcIndex++;
267
29.5M
        char16_t trail;
268
29.5M
        UChar32 c;
269
29.5M
        if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
270
37.0k
            c = U16_GET_SUPPLEMENTARY(lead, trail);
271
37.0k
            ++srcIndex;
272
29.4M
        } else {
273
29.4M
            c = lead;
274
29.4M
        }
275
29.5M
        const char16_t *s = nullptr;
276
29.5M
        if (caseLocale >= 0) {
277
3.18M
            csc->cpStart = cpStart;
278
3.18M
            csc->cpLimit = srcIndex;
279
3.18M
            c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
280
26.3M
        } else {
281
26.3M
            c = ucase_toFullFolding(c, &s, options);
282
26.3M
        }
283
29.5M
        if (c >= 0) {
284
21.3M
            destIndex = appendUnchanged(dest, destIndex, destCapacity,
285
21.3M
                                        src + prev, cpStart - prev, options, edits);
286
21.3M
            if (destIndex >= 0) {
287
21.3M
                destIndex = appendResult(dest, destIndex, destCapacity, c, s,
288
21.3M
                                         srcIndex - cpStart, options, edits);
289
21.3M
            }
290
21.3M
            if (destIndex < 0) {
291
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
292
0
                return 0;
293
0
            }
294
21.3M
            prev = srcIndex;
295
21.3M
        }
296
29.5M
    }
297
2.14M
    destIndex = appendUnchanged(dest, destIndex, destCapacity,
298
2.14M
                                src + prev, srcIndex - prev, options, edits);
299
2.14M
    if (destIndex < 0) {
300
0
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
301
0
        return 0;
302
0
    }
303
2.14M
    return destIndex;
304
2.14M
}
305
306
int32_t toUpper(int32_t caseLocale, uint32_t options,
307
                char16_t *dest, int32_t destCapacity,
308
                const char16_t *src, UCaseContext *csc, int32_t srcLength,
309
0
                icu::Edits *edits, UErrorCode &errorCode) {
310
0
    const int8_t *latinToUpper;
311
0
    if (caseLocale == UCASE_LOC_TURKISH) {
312
0
        latinToUpper = LatinCase::TO_UPPER_TR;
313
0
    } else {
314
0
        latinToUpper = LatinCase::TO_UPPER_NORMAL;
315
0
    }
316
0
    const UTrie2 *trie = ucase_getTrie();
317
0
    int32_t destIndex = 0;
318
0
    int32_t prev = 0;
319
0
    int32_t srcIndex = 0;
320
0
    for (;;) {
321
        // fast path for simple cases
322
0
        char16_t lead = 0;
323
0
        while (srcIndex < srcLength) {
324
0
            lead = src[srcIndex];
325
0
            int32_t delta;
326
0
            if (lead < LatinCase::LONG_S) {
327
0
                int8_t d = latinToUpper[lead];
328
0
                if (d == LatinCase::EXC) { break; }
329
0
                ++srcIndex;
330
0
                if (d == 0) { continue; }
331
0
                delta = d;
332
0
            } else if (lead >= 0xd800) {
333
0
                break;  // surrogate or higher
334
0
            } else {
335
0
                uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
336
0
                if (UCASE_HAS_EXCEPTION(props)) { break; }
337
0
                ++srcIndex;
338
0
                if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
339
0
                    continue;
340
0
                }
341
0
            }
342
0
            lead += static_cast<char16_t>(delta);
343
0
            destIndex = appendUnchanged(dest, destIndex, destCapacity,
344
0
                                        src + prev, srcIndex - 1 - prev, options, edits);
345
0
            if (destIndex >= 0) {
346
0
                destIndex = appendUChar(dest, destIndex, destCapacity, lead);
347
0
                if (edits != nullptr) {
348
0
                    edits->addReplace(1, 1);
349
0
                }
350
0
            }
351
0
            if (destIndex < 0) {
352
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
353
0
                return 0;
354
0
            }
355
0
            prev = srcIndex;
356
0
        }
357
0
        if (srcIndex >= srcLength) {
358
0
            break;
359
0
        }
360
        // slow path
361
0
        int32_t cpStart;
362
0
        csc->cpStart = cpStart = srcIndex++;
363
0
        char16_t trail;
364
0
        UChar32 c;
365
0
        if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
366
0
            c = U16_GET_SUPPLEMENTARY(lead, trail);
367
0
            ++srcIndex;
368
0
        } else {
369
0
            c = lead;
370
0
        }
371
0
        csc->cpLimit = srcIndex;
372
0
        const char16_t *s = nullptr;
373
0
        c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
374
0
        if (c >= 0) {
375
0
            destIndex = appendUnchanged(dest, destIndex, destCapacity,
376
0
                                        src + prev, cpStart - prev, options, edits);
377
0
            if (destIndex >= 0) {
378
0
                destIndex = appendResult(dest, destIndex, destCapacity, c, s,
379
0
                                         srcIndex - cpStart, options, edits);
380
0
            }
381
0
            if (destIndex < 0) {
382
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
383
0
                return 0;
384
0
            }
385
0
            prev = srcIndex;
386
0
        }
387
0
    }
388
0
    destIndex = appendUnchanged(dest, destIndex, destCapacity,
389
0
                                src + prev, srcIndex - prev, options, edits);
390
0
    if (destIndex < 0) {
391
0
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
392
0
        return 0;
393
0
    }
394
0
    return destIndex;
395
0
}
396
397
}  // namespace
398
399
U_NAMESPACE_END
400
401
U_NAMESPACE_USE
402
403
#if !UCONFIG_NO_BREAK_ITERATION
404
405
namespace {
406
407
/**
408
 * Input: c is a letter I with or without acute accent.
409
 * start is the index in src after c, and is less than segmentLimit.
410
 * If a plain i/I is followed by a plain j/J,
411
 * or an i/I with acute (precomposed or decomposed) is followed by a j/J with acute,
412
 * then we output accordingly.
413
 *
414
 * @return the src index after the titlecased sequence, or the start index if no Dutch IJ
415
 */
416
int32_t maybeTitleDutchIJ(const char16_t *src, UChar32 c, int32_t start, int32_t segmentLimit,
417
                          char16_t *dest, int32_t &destIndex, int32_t destCapacity, uint32_t options,
418
0
                          icu::Edits *edits) {
419
0
    U_ASSERT(start < segmentLimit);
420
421
0
    int32_t index = start;
422
0
    bool withAcute = false;
423
424
    // If the conditions are met, then the following variables tell us what to output.
425
0
    int32_t unchanged1 = 0;  // code units before the j, or the whole sequence (0..3)
426
0
    bool doTitleJ = false;  // true if the j needs to be titlecased
427
0
    int32_t unchanged2 = 0;  // after the j (0 or 1)
428
429
    // next character after the first letter
430
0
    char16_t c2 = src[index++];
431
432
    // Is the first letter an i/I with accent?
433
0
    if (c == u'I') {
434
0
        if (c2 == ACUTE) {
435
0
            withAcute = true;
436
0
            unchanged1 = 1;
437
0
            if (index == segmentLimit) { return start; }
438
0
            c2 = src[index++];
439
0
        }
440
0
    } else {  // Í
441
0
        withAcute = true;
442
0
    }
443
444
    // Is the next character a j/J?
445
0
    if (c2 == u'j') {
446
0
        doTitleJ = true;
447
0
    } else if (c2 == u'J') {
448
0
        ++unchanged1;
449
0
    } else {
450
0
        return start;
451
0
    }
452
453
    // A plain i/I must be followed by a plain j/J.
454
    // An i/I with acute must be followed by a j/J with acute.
455
0
    if (withAcute) {
456
0
        if (index == segmentLimit || src[index++] != ACUTE) { return start; }
457
0
        if (doTitleJ) {
458
0
            unchanged2 = 1;
459
0
        } else {
460
0
            ++unchanged1;
461
0
        }
462
0
    }
463
464
    // There must not be another combining mark.
465
0
    if (index < segmentLimit) {
466
0
        int32_t cp;
467
0
        int32_t i = index;
468
0
        U16_NEXT(src, i, segmentLimit, cp);
469
0
        uint32_t typeMask = U_GET_GC_MASK(cp);
470
0
        if ((typeMask & U_GC_M_MASK) != 0) {
471
0
            return start;
472
0
        }
473
0
    }
474
475
    // Output the rest of the Dutch IJ.
476
0
    destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged1, options, edits);
477
0
    start += unchanged1;
478
0
    if (doTitleJ) {
479
0
        destIndex = appendUChar(dest, destIndex, destCapacity, u'J');
480
0
        if (edits != nullptr) {
481
0
            edits->addReplace(1, 1);
482
0
        }
483
0
        ++start;
484
0
    }
485
0
    destIndex = appendUnchanged(dest, destIndex, destCapacity, src + start, unchanged2, options, edits);
486
487
0
    U_ASSERT(start + unchanged2 == index);
488
0
    return index;
489
0
}
490
491
}  // namespace
492
493
U_CFUNC int32_t U_CALLCONV
494
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
495
                         char16_t *dest, int32_t destCapacity,
496
                         const char16_t *src, int32_t srcLength,
497
                         icu::Edits *edits,
498
0
                         UErrorCode &errorCode) {
499
0
    if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
500
0
        return 0;
501
0
    }
502
503
    /* set up local variables */
504
0
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
505
0
    csc.p=(void *)src;
506
0
    csc.limit=srcLength;
507
0
    int32_t destIndex=0;
508
0
    int32_t prev=0;
509
0
    bool isFirstIndex=true;
510
511
    /* titlecasing loop */
512
0
    while(prev<srcLength) {
513
        /* find next index where to titlecase */
514
0
        int32_t index;
515
0
        if(isFirstIndex) {
516
0
            isFirstIndex=false;
517
0
            index=iter->first();
518
0
        } else {
519
0
            index=iter->next();
520
0
        }
521
0
        if(index==UBRK_DONE || index>srcLength) {
522
0
            index=srcLength;
523
0
        }
524
525
        /*
526
         * Segment [prev..index[ into 3 parts:
527
         * a) skipped characters (copy as-is) [prev..titleStart[
528
         * b) first letter (titlecase)              [titleStart..titleLimit[
529
         * c) subsequent characters (lowercase)                 [titleLimit..index[
530
         */
531
0
        if(prev<index) {
532
            // Find and copy skipped characters [prev..titleStart[
533
0
            int32_t titleStart=prev;
534
0
            int32_t titleLimit=prev;
535
0
            UChar32 c;
536
0
            U16_NEXT(src, titleLimit, index, c);
537
0
            if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
538
                // Adjust the titlecasing index to the next cased character,
539
                // or to the next letter/number/symbol/private use.
540
                // Stop with titleStart<titleLimit<=index
541
                // if there is a character to be titlecased,
542
                // or else stop with titleStart==titleLimit==index.
543
0
                bool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
544
0
                while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
545
0
                    titleStart=titleLimit;
546
0
                    if(titleLimit==index) {
547
0
                        break;
548
0
                    }
549
0
                    U16_NEXT(src, titleLimit, index, c);
550
0
                }
551
0
                if (prev < titleStart) {
552
0
                    destIndex=appendUnchanged(dest, destIndex, destCapacity,
553
0
                                              src+prev, titleStart-prev, options, edits);
554
0
                    if(destIndex<0) {
555
0
                        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
556
0
                        return 0;
557
0
                    }
558
0
                }
559
0
            }
560
561
0
            if(titleStart<titleLimit) {
562
                /* titlecase c which is from [titleStart..titleLimit[ */
563
0
                csc.cpStart=titleStart;
564
0
                csc.cpLimit=titleLimit;
565
0
                const char16_t *s;
566
0
                c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale);
567
0
                destIndex=appendResult(dest, destIndex, destCapacity, c, s,
568
0
                                       titleLimit-titleStart, options, edits);
569
0
                if(destIndex<0) {
570
0
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
571
0
                    return 0;
572
0
                }
573
574
                /* Special case Dutch IJ titlecasing */
575
0
                if (titleStart+1 < index &&
576
0
                        caseLocale == UCASE_LOC_DUTCH) {
577
0
                    if (c < 0) {
578
0
                        c = ~c;
579
0
                    }
580
581
0
                    if (c == u'I' || c == u'Í') {
582
0
                        titleLimit = maybeTitleDutchIJ(src, c, titleStart + 1, index,
583
0
                                                       dest, destIndex, destCapacity, options,
584
0
                                                       edits);
585
0
                    }
586
0
                }
587
588
                /* lowercase [titleLimit..index[ */
589
0
                if(titleLimit<index) {
590
0
                    if((options&U_TITLECASE_NO_LOWERCASE)==0) {
591
                        /* Normal operation: Lowercase the rest of the word. */
592
0
                        destIndex+=
593
0
                            toLower(
594
0
                                caseLocale, options,
595
0
                                (dest==nullptr) ? nullptr: dest+destIndex, destCapacity-destIndex,
596
0
                                src, &csc, titleLimit, index,
597
0
                                edits, errorCode);
598
0
                        if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
599
0
                            errorCode=U_ZERO_ERROR;
600
0
                        }
601
0
                        if(U_FAILURE(errorCode)) {
602
0
                            return destIndex;
603
0
                        }
604
0
                    } else {
605
                        /* Optionally just copy the rest of the word unchanged. */
606
0
                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
607
0
                                                  src+titleLimit, index-titleLimit, options, edits);
608
0
                        if(destIndex<0) {
609
0
                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
610
0
                            return 0;
611
0
                        }
612
0
                    }
613
0
                }
614
0
            }
615
0
        }
616
617
0
        prev=index;
618
0
    }
619
620
0
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
621
0
}
622
623
#endif  // !UCONFIG_NO_BREAK_ITERATION
624
625
U_NAMESPACE_BEGIN
626
namespace GreekUpper {
627
628
// Data generated by prototype code, see
629
// https://icu.unicode.org/design/case/greek-upper
630
// TODO: Move this data into ucase.icu.
631
static const uint16_t data0370[] = {
632
    // U+0370..03FF
633
    0x0370,
634
    0x0370,
635
    0x0372,
636
    0x0372,
637
    0,
638
    0,
639
    0x0376,
640
    0x0376,
641
    0,
642
    0,
643
    0x037A,
644
    0x03FD,
645
    0x03FE,
646
    0x03FF,
647
    0,
648
    0x037F,
649
    0,
650
    0,
651
    0,
652
    0,
653
    0,
654
    0,
655
    0x0391 | HAS_VOWEL | HAS_ACCENT,
656
    0,
657
    0x0395 | HAS_VOWEL | HAS_ACCENT,
658
    0x0397 | HAS_VOWEL | HAS_ACCENT,
659
    0x0399 | HAS_VOWEL | HAS_ACCENT,
660
    0,
661
    0x039F | HAS_VOWEL | HAS_ACCENT,
662
    0,
663
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
664
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
665
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
666
    0x0391 | HAS_VOWEL,
667
    0x0392,
668
    0x0393,
669
    0x0394,
670
    0x0395 | HAS_VOWEL,
671
    0x0396,
672
    0x0397 | HAS_VOWEL,
673
    0x0398,
674
    0x0399 | HAS_VOWEL,
675
    0x039A,
676
    0x039B,
677
    0x039C,
678
    0x039D,
679
    0x039E,
680
    0x039F | HAS_VOWEL,
681
    0x03A0,
682
    0x03A1,
683
    0,
684
    0x03A3,
685
    0x03A4,
686
    0x03A5 | HAS_VOWEL,
687
    0x03A6,
688
    0x03A7,
689
    0x03A8,
690
    0x03A9 | HAS_VOWEL,
691
    0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
692
    0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
693
    0x0391 | HAS_VOWEL | HAS_ACCENT,
694
    0x0395 | HAS_VOWEL | HAS_ACCENT,
695
    0x0397 | HAS_VOWEL | HAS_ACCENT,
696
    0x0399 | HAS_VOWEL | HAS_ACCENT,
697
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
698
    0x0391 | HAS_VOWEL,
699
    0x0392,
700
    0x0393,
701
    0x0394,
702
    0x0395 | HAS_VOWEL,
703
    0x0396,
704
    0x0397 | HAS_VOWEL,
705
    0x0398,
706
    0x0399 | HAS_VOWEL,
707
    0x039A,
708
    0x039B,
709
    0x039C,
710
    0x039D,
711
    0x039E,
712
    0x039F | HAS_VOWEL,
713
    0x03A0,
714
    0x03A1,
715
    0x03A3,
716
    0x03A3,
717
    0x03A4,
718
    0x03A5 | HAS_VOWEL,
719
    0x03A6,
720
    0x03A7,
721
    0x03A8,
722
    0x03A9 | HAS_VOWEL,
723
    0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
724
    0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
725
    0x039F | HAS_VOWEL | HAS_ACCENT,
726
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
727
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
728
    0x03CF,
729
    0x0392,
730
    0x0398,
731
    0x03D2,
732
    0x03D2 | HAS_ACCENT,
733
    0x03D2 | HAS_DIALYTIKA,
734
    0x03A6,
735
    0x03A0,
736
    0x03CF,
737
    0x03D8,
738
    0x03D8,
739
    0x03DA,
740
    0x03DA,
741
    0x03DC,
742
    0x03DC,
743
    0x03DE,
744
    0x03DE,
745
    0x03E0,
746
    0x03E0,
747
    0,
748
    0,
749
    0,
750
    0,
751
    0,
752
    0,
753
    0,
754
    0,
755
    0,
756
    0,
757
    0,
758
    0,
759
    0,
760
    0,
761
    0x039A,
762
    0x03A1,
763
    0x03F9,
764
    0x037F,
765
    0x03F4,
766
    0x0395 | HAS_VOWEL,
767
    0,
768
    0x03F7,
769
    0x03F7,
770
    0x03F9,
771
    0x03FA,
772
    0x03FA,
773
    0x03FC,
774
    0x03FD,
775
    0x03FE,
776
    0x03FF,
777
};
778
779
static const uint16_t data1F00[] = {
780
    // U+1F00..1FFF
781
    0x0391 | HAS_VOWEL,
782
    0x0391 | HAS_VOWEL,
783
    0x0391 | HAS_VOWEL | HAS_ACCENT,
784
    0x0391 | HAS_VOWEL | HAS_ACCENT,
785
    0x0391 | HAS_VOWEL | HAS_ACCENT,
786
    0x0391 | HAS_VOWEL | HAS_ACCENT,
787
    0x0391 | HAS_VOWEL | HAS_ACCENT,
788
    0x0391 | HAS_VOWEL | HAS_ACCENT,
789
    0x0391 | HAS_VOWEL,
790
    0x0391 | HAS_VOWEL,
791
    0x0391 | HAS_VOWEL | HAS_ACCENT,
792
    0x0391 | HAS_VOWEL | HAS_ACCENT,
793
    0x0391 | HAS_VOWEL | HAS_ACCENT,
794
    0x0391 | HAS_VOWEL | HAS_ACCENT,
795
    0x0391 | HAS_VOWEL | HAS_ACCENT,
796
    0x0391 | HAS_VOWEL | HAS_ACCENT,
797
    0x0395 | HAS_VOWEL,
798
    0x0395 | HAS_VOWEL,
799
    0x0395 | HAS_VOWEL | HAS_ACCENT,
800
    0x0395 | HAS_VOWEL | HAS_ACCENT,
801
    0x0395 | HAS_VOWEL | HAS_ACCENT,
802
    0x0395 | HAS_VOWEL | HAS_ACCENT,
803
    0,
804
    0,
805
    0x0395 | HAS_VOWEL,
806
    0x0395 | HAS_VOWEL,
807
    0x0395 | HAS_VOWEL | HAS_ACCENT,
808
    0x0395 | HAS_VOWEL | HAS_ACCENT,
809
    0x0395 | HAS_VOWEL | HAS_ACCENT,
810
    0x0395 | HAS_VOWEL | HAS_ACCENT,
811
    0,
812
    0,
813
    0x0397 | HAS_VOWEL,
814
    0x0397 | HAS_VOWEL,
815
    0x0397 | HAS_VOWEL | HAS_ACCENT,
816
    0x0397 | HAS_VOWEL | HAS_ACCENT,
817
    0x0397 | HAS_VOWEL | HAS_ACCENT,
818
    0x0397 | HAS_VOWEL | HAS_ACCENT,
819
    0x0397 | HAS_VOWEL | HAS_ACCENT,
820
    0x0397 | HAS_VOWEL | HAS_ACCENT,
821
    0x0397 | HAS_VOWEL,
822
    0x0397 | HAS_VOWEL,
823
    0x0397 | HAS_VOWEL | HAS_ACCENT,
824
    0x0397 | HAS_VOWEL | HAS_ACCENT,
825
    0x0397 | HAS_VOWEL | HAS_ACCENT,
826
    0x0397 | HAS_VOWEL | HAS_ACCENT,
827
    0x0397 | HAS_VOWEL | HAS_ACCENT,
828
    0x0397 | HAS_VOWEL | HAS_ACCENT,
829
    0x0399 | HAS_VOWEL,
830
    0x0399 | HAS_VOWEL,
831
    0x0399 | HAS_VOWEL | HAS_ACCENT,
832
    0x0399 | HAS_VOWEL | HAS_ACCENT,
833
    0x0399 | HAS_VOWEL | HAS_ACCENT,
834
    0x0399 | HAS_VOWEL | HAS_ACCENT,
835
    0x0399 | HAS_VOWEL | HAS_ACCENT,
836
    0x0399 | HAS_VOWEL | HAS_ACCENT,
837
    0x0399 | HAS_VOWEL,
838
    0x0399 | HAS_VOWEL,
839
    0x0399 | HAS_VOWEL | HAS_ACCENT,
840
    0x0399 | HAS_VOWEL | HAS_ACCENT,
841
    0x0399 | HAS_VOWEL | HAS_ACCENT,
842
    0x0399 | HAS_VOWEL | HAS_ACCENT,
843
    0x0399 | HAS_VOWEL | HAS_ACCENT,
844
    0x0399 | HAS_VOWEL | HAS_ACCENT,
845
    0x039F | HAS_VOWEL,
846
    0x039F | HAS_VOWEL,
847
    0x039F | HAS_VOWEL | HAS_ACCENT,
848
    0x039F | HAS_VOWEL | HAS_ACCENT,
849
    0x039F | HAS_VOWEL | HAS_ACCENT,
850
    0x039F | HAS_VOWEL | HAS_ACCENT,
851
    0,
852
    0,
853
    0x039F | HAS_VOWEL,
854
    0x039F | HAS_VOWEL,
855
    0x039F | HAS_VOWEL | HAS_ACCENT,
856
    0x039F | HAS_VOWEL | HAS_ACCENT,
857
    0x039F | HAS_VOWEL | HAS_ACCENT,
858
    0x039F | HAS_VOWEL | HAS_ACCENT,
859
    0,
860
    0,
861
    0x03A5 | HAS_VOWEL,
862
    0x03A5 | HAS_VOWEL,
863
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
864
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
865
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
866
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
867
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
868
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
869
    0,
870
    0x03A5 | HAS_VOWEL,
871
    0,
872
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
873
    0,
874
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
875
    0,
876
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
877
    0x03A9 | HAS_VOWEL,
878
    0x03A9 | HAS_VOWEL,
879
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
880
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
881
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
882
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
883
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
884
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
885
    0x03A9 | HAS_VOWEL,
886
    0x03A9 | HAS_VOWEL,
887
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
888
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
889
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
890
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
891
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
892
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
893
    0x0391 | HAS_VOWEL | HAS_ACCENT,
894
    0x0391 | HAS_VOWEL | HAS_ACCENT,
895
    0x0395 | HAS_VOWEL | HAS_ACCENT,
896
    0x0395 | HAS_VOWEL | HAS_ACCENT,
897
    0x0397 | HAS_VOWEL | HAS_ACCENT,
898
    0x0397 | HAS_VOWEL | HAS_ACCENT,
899
    0x0399 | HAS_VOWEL | HAS_ACCENT,
900
    0x0399 | HAS_VOWEL | HAS_ACCENT,
901
    0x039F | HAS_VOWEL | HAS_ACCENT,
902
    0x039F | HAS_VOWEL | HAS_ACCENT,
903
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
904
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
905
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
906
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
907
    0,
908
    0,
909
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
910
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
911
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
912
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
913
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
914
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
915
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
916
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
917
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
918
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
919
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
920
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
921
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
922
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
923
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
924
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
925
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
926
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
927
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
928
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
929
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
930
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
931
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
932
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
933
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
934
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
935
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
936
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
937
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
938
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
939
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
940
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
941
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
942
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
943
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
944
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
945
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
946
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
947
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
948
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
949
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
950
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
951
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
952
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
953
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
954
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
955
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
956
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
957
    0x0391 | HAS_VOWEL,
958
    0x0391 | HAS_VOWEL,
959
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
960
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
961
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
962
    0,
963
    0x0391 | HAS_VOWEL | HAS_ACCENT,
964
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
965
    0x0391 | HAS_VOWEL,
966
    0x0391 | HAS_VOWEL,
967
    0x0391 | HAS_VOWEL | HAS_ACCENT,
968
    0x0391 | HAS_VOWEL | HAS_ACCENT,
969
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
970
    0,
971
    0x0399 | HAS_VOWEL,
972
    0,
973
    0,
974
    0,
975
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
976
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
977
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
978
    0,
979
    0x0397 | HAS_VOWEL | HAS_ACCENT,
980
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
981
    0x0395 | HAS_VOWEL | HAS_ACCENT,
982
    0x0395 | HAS_VOWEL | HAS_ACCENT,
983
    0x0397 | HAS_VOWEL | HAS_ACCENT,
984
    0x0397 | HAS_VOWEL | HAS_ACCENT,
985
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
986
    0,
987
    0,
988
    0,
989
    0x0399 | HAS_VOWEL,
990
    0x0399 | HAS_VOWEL,
991
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
992
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
993
    0,
994
    0,
995
    0x0399 | HAS_VOWEL | HAS_ACCENT,
996
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
997
    0x0399 | HAS_VOWEL,
998
    0x0399 | HAS_VOWEL,
999
    0x0399 | HAS_VOWEL | HAS_ACCENT,
1000
    0x0399 | HAS_VOWEL | HAS_ACCENT,
1001
    0,
1002
    0,
1003
    0,
1004
    0,
1005
    0x03A5 | HAS_VOWEL,
1006
    0x03A5 | HAS_VOWEL,
1007
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
1008
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
1009
    0x03A1,
1010
    0x03A1,
1011
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
1012
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
1013
    0x03A5 | HAS_VOWEL,
1014
    0x03A5 | HAS_VOWEL,
1015
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
1016
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
1017
    0x03A1,
1018
    0,
1019
    0,
1020
    0,
1021
    0,
1022
    0,
1023
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
1024
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
1025
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
1026
    0,
1027
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
1028
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
1029
    0x039F | HAS_VOWEL | HAS_ACCENT,
1030
    0x039F | HAS_VOWEL | HAS_ACCENT,
1031
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
1032
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
1033
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
1034
    0,
1035
    0,
1036
    0,
1037
};
1038
1039
// U+2126 Ohm sign
1040
static const uint16_t data2126 = 0x03A9 | HAS_VOWEL;
1041
1042
13.9M
uint32_t getLetterData(UChar32 c) {
1043
13.9M
    if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
1044
13.5M
        return 0;
1045
13.5M
    } else if (c <= 0x3ff) {
1046
338k
        return data0370[c - 0x370];
1047
338k
    } else if (c <= 0x1fff) {
1048
31.5k
        return data1F00[c - 0x1f00];
1049
31.5k
    } else if (c == 0x2126) {
1050
875
        return data2126;
1051
875
    } else {
1052
625
        return 0;
1053
625
    }
1054
13.9M
}
1055
1056
435k
uint32_t getDiacriticData(UChar32 c) {
1057
435k
    switch (c) {
1058
25.2k
    case 0x0300:  // varia
1059
25.8k
    case 0x0301:  // tonos = oxia
1060
37.7k
    case 0x0342:  // perispomeni
1061
42.2k
    case 0x0302:  // circumflex can look like perispomeni
1062
42.5k
    case 0x0303:  // tilde can look like perispomeni
1063
42.7k
    case 0x0311:  // inverted breve can look like perispomeni
1064
42.7k
        return HAS_ACCENT;
1065
2.43k
    case 0x0308:  // dialytika = diaeresis
1066
2.43k
        return HAS_COMBINING_DIALYTIKA;
1067
10.5k
    case 0x0344:  // dialytika tonos
1068
10.5k
        return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
1069
615
    case 0x0345:  // ypogegrammeni = iota subscript
1070
615
        return HAS_YPOGEGRAMMENI;
1071
160
    case 0x0304:  // macron
1072
545
    case 0x0306:  // breve
1073
736
    case 0x0313:  // comma above
1074
3.80k
    case 0x0314:  // reversed comma above
1075
12.7k
    case 0x0343:  // koronis
1076
12.7k
        return HAS_OTHER_GREEK_DIACRITIC;
1077
365k
    default:
1078
365k
        return 0;
1079
435k
    }
1080
435k
}
1081
1082
0
UBool isFollowedByCasedLetter(const char16_t *s, int32_t i, int32_t length) {
1083
0
    while (i < length) {
1084
0
        UChar32 c;
1085
0
        U16_NEXT(s, i, length, c);
1086
0
        int32_t type = ucase_getTypeOrIgnorable(c);
1087
0
        if ((type & UCASE_IGNORABLE) != 0) {
1088
            // Case-ignorable, continue with the loop.
1089
0
        } else if (type != UCASE_NONE) {
1090
0
            return true;  // Followed by cased letter.
1091
0
        } else {
1092
0
            return false;  // Uncased and not case-ignorable.
1093
0
        }
1094
0
    }
1095
0
    return false;  // Not followed by cased letter.
1096
0
}
1097
1098
/**
1099
 * Greek string uppercasing with a state machine.
1100
 * Probably simpler than a stateless function that has to figure out complex context-before
1101
 * for each character.
1102
 * TODO: Try to re-consolidate one way or another with the non-Greek function.
1103
 */
1104
int32_t toUpper(uint32_t options,
1105
                char16_t *dest, int32_t destCapacity,
1106
                const char16_t *src, int32_t srcLength,
1107
                Edits *edits,
1108
0
                UErrorCode &errorCode) {
1109
0
    int32_t destIndex=0;
1110
0
    uint32_t state = 0;
1111
0
    for (int32_t i = 0; i < srcLength;) {
1112
0
        int32_t nextIndex = i;
1113
0
        UChar32 c;
1114
0
        U16_NEXT(src, nextIndex, srcLength, c);
1115
0
        uint32_t nextState = 0;
1116
0
        int32_t type = ucase_getTypeOrIgnorable(c);
1117
0
        if ((type & UCASE_IGNORABLE) != 0) {
1118
            // c is case-ignorable
1119
0
            nextState |= (state & AFTER_CASED);
1120
0
        } else if (type != UCASE_NONE) {
1121
            // c is cased
1122
0
            nextState |= AFTER_CASED;
1123
0
        }
1124
0
        uint32_t data = getLetterData(c);
1125
0
        if (data > 0) {
1126
0
            uint32_t upper = data & UPPER_MASK;
1127
            // Add a dialytika to this iota or ypsilon vowel
1128
            // if we removed a tonos from the previous vowel,
1129
            // and that previous vowel did not also have (or gain) a dialytika.
1130
            // Adding one only to the final vowel in a longer sequence
1131
            // (which does not occur in normal writing) would require lookahead.
1132
            // Set the same flag as for preserving an existing dialytika.
1133
0
            if ((data & HAS_VOWEL) != 0 &&
1134
0
                (state & (AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT | AFTER_VOWEL_WITH_COMBINING_ACCENT)) !=
1135
0
                    0 &&
1136
0
                (upper == 0x399 || upper == 0x3A5)) {
1137
0
                data |= (state & AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT) ? HAS_DIALYTIKA
1138
0
                                                                      : HAS_COMBINING_DIALYTIKA;
1139
0
            }
1140
0
            int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
1141
0
            if ((data & HAS_YPOGEGRAMMENI) != 0) {
1142
0
                numYpogegrammeni = 1;
1143
0
            }
1144
0
            const UBool hasPrecomposedAccent = (data & HAS_ACCENT) != 0;
1145
            // Skip combining diacritics after this Greek letter.
1146
0
            while (nextIndex < srcLength) {
1147
0
                uint32_t diacriticData = getDiacriticData(src[nextIndex]);
1148
0
                if (diacriticData != 0) {
1149
0
                    data |= diacriticData;
1150
0
                    if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
1151
0
                        ++numYpogegrammeni;
1152
0
                    }
1153
0
                    ++nextIndex;
1154
0
                } else {
1155
0
                    break;  // not a Greek diacritic
1156
0
                }
1157
0
            }
1158
0
            if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
1159
0
                nextState |= hasPrecomposedAccent ? AFTER_VOWEL_WITH_PRECOMPOSED_ACCENT
1160
0
                                                  : AFTER_VOWEL_WITH_COMBINING_ACCENT;
1161
0
            }
1162
            // Map according to Greek rules.
1163
0
            UBool addTonos = false;
1164
0
            if (upper == 0x397 &&
1165
0
                    (data & HAS_ACCENT) != 0 &&
1166
0
                    numYpogegrammeni == 0 &&
1167
0
                    (state & AFTER_CASED) == 0 &&
1168
0
                    !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
1169
                // Keep disjunctive "or" with (only) a tonos.
1170
                // We use the same "word boundary" conditions as for the Final_Sigma test.
1171
0
                if (hasPrecomposedAccent) {
1172
0
                    upper = 0x389;  // Preserve the precomposed form.
1173
0
                } else {
1174
0
                    addTonos = true;
1175
0
                }
1176
0
            } else if ((data & HAS_DIALYTIKA) != 0) {
1177
                // Preserve a vowel with dialytika in precomposed form if it exists.
1178
0
                if (upper == 0x399) {
1179
0
                    upper = 0x3AA;
1180
0
                    data &= ~HAS_EITHER_DIALYTIKA;
1181
0
                } else if (upper == 0x3A5) {
1182
0
                    upper = 0x3AB;
1183
0
                    data &= ~HAS_EITHER_DIALYTIKA;
1184
0
                }
1185
0
            }
1186
1187
0
            UBool change;
1188
0
            if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
1189
0
                change = true;  // common, simple usage
1190
0
            } else {
1191
                // Find out first whether we are changing the text.
1192
0
                change = src[i] != upper || numYpogegrammeni > 0;
1193
0
                int32_t i2 = i + 1;
1194
0
                if ((data & HAS_EITHER_DIALYTIKA) != 0) {
1195
0
                    change |= i2 >= nextIndex || src[i2] != 0x308;
1196
0
                    ++i2;
1197
0
                }
1198
0
                if (addTonos) {
1199
0
                    change |= i2 >= nextIndex || src[i2] != 0x301;
1200
0
                    ++i2;
1201
0
                }
1202
0
                int32_t oldLength = nextIndex - i;
1203
0
                int32_t newLength = (i2 - i) + numYpogegrammeni;
1204
0
                change |= oldLength != newLength;
1205
0
                if (change) {
1206
0
                    if (edits != nullptr) {
1207
0
                        edits->addReplace(oldLength, newLength);
1208
0
                    }
1209
0
                } else {
1210
0
                    if (edits != nullptr) {
1211
0
                        edits->addUnchanged(oldLength);
1212
0
                    }
1213
                    // Write unchanged text?
1214
0
                    change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
1215
0
                }
1216
0
            }
1217
1218
0
            if (change) {
1219
0
                destIndex = appendUChar(dest, destIndex, destCapacity, static_cast<char16_t>(upper));
1220
0
                if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
1221
0
                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika
1222
0
                }
1223
0
                if (destIndex >= 0 && addTonos) {
1224
0
                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
1225
0
                }
1226
0
                while (destIndex >= 0 && numYpogegrammeni > 0) {
1227
0
                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
1228
0
                    --numYpogegrammeni;
1229
0
                }
1230
0
                if(destIndex<0) {
1231
0
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1232
0
                    return 0;
1233
0
                }
1234
0
            }
1235
0
        } else {
1236
0
            const char16_t *s;
1237
0
            c=ucase_toFullUpper(c, nullptr, nullptr, &s, UCASE_LOC_GREEK);
1238
0
            destIndex = appendResult(dest, destIndex, destCapacity, c, s,
1239
0
                                     nextIndex - i, options, edits);
1240
0
            if (destIndex < 0) {
1241
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1242
0
                return 0;
1243
0
            }
1244
0
        }
1245
0
        i = nextIndex;
1246
0
        state = nextState;
1247
0
    }
1248
1249
0
    return destIndex;
1250
0
}
1251
1252
}  // namespace GreekUpper
1253
U_NAMESPACE_END
1254
1255
/* functions available in the common library (for unistr_case.cpp) */
1256
1257
U_CFUNC int32_t U_CALLCONV
1258
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1259
                         char16_t *dest, int32_t destCapacity,
1260
                         const char16_t *src, int32_t srcLength,
1261
                         icu::Edits *edits,
1262
113k
                         UErrorCode &errorCode) {
1263
113k
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
1264
113k
    csc.p=(void *)src;
1265
113k
    csc.limit=srcLength;
1266
113k
    int32_t destIndex = toLower(
1267
113k
        caseLocale, options,
1268
113k
        dest, destCapacity,
1269
113k
        src, &csc, 0, srcLength,
1270
113k
        edits, errorCode);
1271
113k
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1272
113k
}
1273
1274
U_CFUNC int32_t U_CALLCONV
1275
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1276
                         char16_t *dest, int32_t destCapacity,
1277
                         const char16_t *src, int32_t srcLength,
1278
                         icu::Edits *edits,
1279
0
                         UErrorCode &errorCode) {
1280
0
    int32_t destIndex;
1281
0
    if (caseLocale == UCASE_LOC_GREEK) {
1282
0
        destIndex = GreekUpper::toUpper(options, dest, destCapacity,
1283
0
                                        src, srcLength, edits, errorCode);
1284
0
    } else {
1285
0
        UCaseContext csc=UCASECONTEXT_INITIALIZER;
1286
0
        csc.p=(void *)src;
1287
0
        csc.limit=srcLength;
1288
0
        destIndex = toUpper(
1289
0
            caseLocale, options,
1290
0
            dest, destCapacity,
1291
0
            src, &csc, srcLength,
1292
0
            edits, errorCode);
1293
0
    }
1294
0
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1295
0
}
1296
1297
U_CFUNC int32_t U_CALLCONV
1298
ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1299
                      char16_t *dest, int32_t destCapacity,
1300
                      const char16_t *src, int32_t srcLength,
1301
                      icu::Edits *edits,
1302
2.03M
                      UErrorCode &errorCode) {
1303
2.03M
    int32_t destIndex = toLower(
1304
2.03M
        -1, options,
1305
2.03M
        dest, destCapacity,
1306
2.03M
        src, nullptr, 0, srcLength,
1307
2.03M
        edits, errorCode);
1308
2.03M
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1309
2.03M
}
1310
1311
U_CFUNC int32_t
1312
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1313
             char16_t *dest, int32_t destCapacity,
1314
             const char16_t *src, int32_t srcLength,
1315
             UStringCaseMapper *stringCaseMapper,
1316
             icu::Edits *edits,
1317
0
             UErrorCode &errorCode) {
1318
0
    int32_t destLength;
1319
1320
    /* check argument values */
1321
0
    if(U_FAILURE(errorCode)) {
1322
0
        return 0;
1323
0
    }
1324
0
    if( destCapacity<0 ||
1325
0
        (dest==nullptr && destCapacity>0) ||
1326
0
        src==nullptr ||
1327
0
        srcLength<-1
1328
0
    ) {
1329
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1330
0
        return 0;
1331
0
    }
1332
1333
    /* get the string length */
1334
0
    if(srcLength==-1) {
1335
0
        srcLength=u_strlen(src);
1336
0
    }
1337
1338
    /* check for overlapping source and destination */
1339
0
    if( dest!=nullptr &&
1340
0
        ((src>=dest && src<(dest+destCapacity)) ||
1341
0
         (dest>=src && dest<(src+srcLength)))
1342
0
    ) {
1343
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1344
0
        return 0;
1345
0
    }
1346
1347
0
    if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
1348
0
        edits->reset();
1349
0
    }
1350
0
    destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
1351
0
                                dest, destCapacity, src, srcLength, edits, errorCode);
1352
0
    return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
1353
0
}
1354
1355
U_CFUNC int32_t
1356
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1357
                        char16_t *dest, int32_t destCapacity,
1358
                        const char16_t *src, int32_t srcLength,
1359
                        UStringCaseMapper *stringCaseMapper,
1360
5.66k
                        UErrorCode &errorCode) {
1361
5.66k
    char16_t buffer[300];
1362
5.66k
    char16_t *temp;
1363
1364
5.66k
    int32_t destLength;
1365
1366
    /* check argument values */
1367
5.66k
    if(U_FAILURE(errorCode)) {
1368
0
        return 0;
1369
0
    }
1370
5.66k
    if( destCapacity<0 ||
1371
5.66k
        (dest==nullptr && destCapacity>0) ||
1372
5.66k
        src==nullptr ||
1373
5.66k
        srcLength<-1
1374
5.66k
    ) {
1375
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1376
0
        return 0;
1377
0
    }
1378
1379
    /* get the string length */
1380
5.66k
    if(srcLength==-1) {
1381
0
        srcLength=u_strlen(src);
1382
0
    }
1383
1384
    /* check for overlapping source and destination */
1385
5.66k
    if( dest!=nullptr &&
1386
5.66k
        ((src>=dest && src<(dest+destCapacity)) ||
1387
5.66k
         (dest>=src && dest<(src+srcLength)))
1388
5.66k
    ) {
1389
        /* overlap: provide a temporary destination buffer and later copy the result */
1390
0
        if(destCapacity<=UPRV_LENGTHOF(buffer)) {
1391
            /* the stack buffer is large enough */
1392
0
            temp=buffer;
1393
0
        } else {
1394
            /* allocate a buffer */
1395
0
            temp=(char16_t *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
1396
0
            if(temp==nullptr) {
1397
0
                errorCode=U_MEMORY_ALLOCATION_ERROR;
1398
0
                return 0;
1399
0
            }
1400
0
        }
1401
5.66k
    } else {
1402
5.66k
        temp=dest;
1403
5.66k
    }
1404
1405
5.66k
    destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
1406
5.66k
                                temp, destCapacity, src, srcLength, nullptr, errorCode);
1407
5.66k
    if(temp!=dest) {
1408
        /* copy the result string to the destination buffer */
1409
0
        if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) {
1410
0
            u_memmove(dest, temp, destLength);
1411
0
        }
1412
0
        if(temp!=buffer) {
1413
0
            uprv_free(temp);
1414
0
        }
1415
0
    }
1416
1417
5.66k
    return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
1418
5.66k
}
1419
1420
/* public API functions */
1421
1422
U_CAPI int32_t U_EXPORT2
1423
u_strFoldCase(char16_t *dest, int32_t destCapacity,
1424
              const char16_t *src, int32_t srcLength,
1425
              uint32_t options,
1426
5.66k
              UErrorCode *pErrorCode) {
1427
5.66k
    return ustrcase_mapWithOverlap(
1428
5.66k
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1429
5.66k
        dest, destCapacity,
1430
5.66k
        src, srcLength,
1431
5.66k
        ustrcase_internalFold, *pErrorCode);
1432
5.66k
}
1433
1434
U_NAMESPACE_BEGIN
1435
1436
int32_t CaseMap::fold(
1437
        uint32_t options,
1438
        const char16_t *src, int32_t srcLength,
1439
        char16_t *dest, int32_t destCapacity, Edits *edits,
1440
0
        UErrorCode &errorCode) {
1441
0
    return ustrcase_map(
1442
0
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1443
0
        dest, destCapacity,
1444
0
        src, srcLength,
1445
0
        ustrcase_internalFold, edits, errorCode);
1446
0
}
1447
1448
U_NAMESPACE_END
1449
1450
/* case-insensitive string comparisons -------------------------------------- */
1451
1452
/*
1453
 * This function is a copy of unorm_cmpEquivFold() minus the parts for
1454
 * canonical equivalence.
1455
 * Keep the functions in sync, and see there for how this works.
1456
 * The duplication is for modularization:
1457
 * It makes caseless (but not canonical caseless) matches independent of
1458
 * the normalization code.
1459
 */
1460
1461
/* stack element for previous-level source/decomposition pointers */
1462
struct CmpEquivLevel {
1463
    const char16_t *start, *s, *limit;
1464
};
1465
typedef struct CmpEquivLevel CmpEquivLevel;
1466
1467
/**
1468
 * Internal implementation code comparing string with case fold.
1469
 * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().
1470
 *
1471
 * @param s1            input string 1
1472
 * @param length1       length of string 1, or -1 (NUL terminated)
1473
 * @param s2            input string 2
1474
 * @param length2       length of string 2, or -1 (NUL terminated)
1475
 * @param options       compare options
1476
 * @param matchLen1     (output) length of partial prefix match in s1
1477
 * @param matchLen2     (output) length of partial prefix match in s2
1478
 * @param pErrorCode    receives error status
1479
 * @return The result of comparison
1480
 */
1481
static int32_t _cmpFold(
1482
            const char16_t *s1, int32_t length1,
1483
            const char16_t *s2, int32_t length2,
1484
            uint32_t options,
1485
            int32_t *matchLen1, int32_t *matchLen2,
1486
34.0k
            UErrorCode *pErrorCode) {
1487
34.0k
    int32_t cmpRes = 0;
1488
1489
    /* current-level start/limit - s1/s2 as current */
1490
34.0k
    const char16_t *start1, *start2, *limit1, *limit2;
1491
1492
    /* points to the original start address */
1493
34.0k
    const char16_t *org1, *org2;
1494
1495
    /* points to the end of match + 1 */
1496
34.0k
    const char16_t *m1, *m2;
1497
1498
    /* case folding variables */
1499
34.0k
    const char16_t *p;
1500
34.0k
    int32_t length;
1501
1502
    /* stacks of previous-level start/current/limit */
1503
34.0k
    CmpEquivLevel stack1[2], stack2[2];
1504
1505
    /* case folding buffers, only use current-level start/limit */
1506
34.0k
    char16_t fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
1507
1508
    /* track which is the current level per string */
1509
34.0k
    int32_t level1, level2;
1510
1511
    /* current code units, and code points for lookups */
1512
34.0k
    UChar32 c1, c2, cp1, cp2;
1513
1514
    /* no argument error checking because this itself is not an API */
1515
1516
    /*
1517
     * assume that at least the option U_COMPARE_IGNORE_CASE is set
1518
     * otherwise this function would have to behave exactly as uprv_strCompare()
1519
     */
1520
34.0k
    if(U_FAILURE(*pErrorCode)) {
1521
0
        return 0;
1522
0
    }
1523
1524
    /* initialize */
1525
34.0k
    if(matchLen1) {
1526
0
        U_ASSERT(matchLen2 !=nullptr);
1527
0
        *matchLen1=0;
1528
0
        *matchLen2=0;
1529
0
    }
1530
1531
34.0k
    start1=m1=org1=s1;
1532
34.0k
    if(length1==-1) {
1533
0
        limit1=nullptr;
1534
34.0k
    } else {
1535
34.0k
        limit1=s1+length1;
1536
34.0k
    }
1537
1538
34.0k
    start2=m2=org2=s2;
1539
34.0k
    if(length2==-1) {
1540
29.0k
        limit2=nullptr;
1541
29.0k
    } else {
1542
5.08k
        limit2=s2+length2;
1543
5.08k
    }
1544
1545
34.0k
    level1=level2=0;
1546
34.0k
    c1=c2=-1;
1547
1548
    /* comparison loop */
1549
82.1k
    for(;;) {
1550
        /*
1551
         * here a code unit value of -1 means "get another code unit"
1552
         * below it will mean "this source is finished"
1553
         */
1554
1555
82.1k
        if(c1<0) {
1556
            /* get next code unit from string 1, post-increment */
1557
71.5k
            for(;;) {
1558
71.5k
                if(s1==limit1 || ((c1=*s1)==0 && (limit1==nullptr || (options&_STRNCMP_STYLE)))) {
1559
4.22k
                    if(level1==0) {
1560
3.20k
                        c1=-1;
1561
3.20k
                        break;
1562
3.20k
                    }
1563
67.3k
                } else {
1564
67.3k
                    ++s1;
1565
67.3k
                    break;
1566
67.3k
                }
1567
1568
                /* reached end of level buffer, pop one level */
1569
1.02k
                do {
1570
1.02k
                    --level1;
1571
1.02k
                    start1=stack1[level1].start;    /*Not uninitialized*/
1572
1.02k
                } while(start1==nullptr);
1573
1.02k
                s1=stack1[level1].s;                /*Not uninitialized*/
1574
1.02k
                limit1=stack1[level1].limit;        /*Not uninitialized*/
1575
1.02k
            }
1576
70.5k
        }
1577
1578
82.1k
        if(c2<0) {
1579
            /* get next code unit from string 2, post-increment */
1580
58.5k
            for(;;) {
1581
58.5k
                if(s2==limit2 || ((c2=*s2)==0 && (limit2==nullptr || (options&_STRNCMP_STYLE)))) {
1582
5.54k
                    if(level2==0) {
1583
3.11k
                        c2=-1;
1584
3.11k
                        break;
1585
3.11k
                    }
1586
52.9k
                } else {
1587
52.9k
                    ++s2;
1588
52.9k
                    break;
1589
52.9k
                }
1590
1591
                /* reached end of level buffer, pop one level */
1592
2.43k
                do {
1593
2.43k
                    --level2;
1594
2.43k
                    start2=stack2[level2].start;    /*Not uninitialized*/
1595
2.43k
                } while(start2==nullptr);
1596
2.43k
                s2=stack2[level2].s;                /*Not uninitialized*/
1597
2.43k
                limit2=stack2[level2].limit;        /*Not uninitialized*/
1598
2.43k
            }
1599
56.0k
        }
1600
1601
        /*
1602
         * compare c1 and c2
1603
         * either variable c1, c2 is -1 only if the corresponding string is finished
1604
         */
1605
82.1k
        if(c1==c2) {
1606
13.4k
            const char16_t *next1, *next2;
1607
1608
13.4k
            if(c1<0) {
1609
3.10k
                cmpRes=0;   /* c1==c2==-1 indicating end of strings */
1610
3.10k
                break;
1611
3.10k
            }
1612
1613
            /*
1614
             * Note: Move the match positions in both strings at the same time
1615
             *      only when corresponding code point(s) in the original strings
1616
             *      are fully consumed. For example, when comparing s1="Fust" and
1617
             *      s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches
1618
             *      the first code point in the case-folded data. But the second "s"
1619
             *      has no matching code point in s1, so this implementation returns
1620
             *      2 as the prefix match length ("Fu").
1621
             */
1622
10.3k
            next1=next2=nullptr;
1623
10.3k
            if(level1==0) {
1624
9.32k
                next1=s1;
1625
9.32k
            } else if(s1==limit1) {
1626
                /* Note: This implementation only use a single level of stack.
1627
                 *      If this code needs to be changed to use multiple levels
1628
                 *      of stacks, the code above should check if the current
1629
                 *      code is at the end of all stacks.
1630
                 */
1631
1.02k
                U_ASSERT(level1==1);
1632
1633
                /* is s1 at the end of the current stack? */
1634
1.02k
                next1=stack1[0].s;
1635
1.02k
            }
1636
1637
10.3k
            if (next1!=nullptr) {
1638
10.3k
                if(level2==0) {
1639
7.91k
                    next2=s2;
1640
7.91k
                } else if(s2==limit2) {
1641
2.43k
                    U_ASSERT(level2==1);
1642
1643
                    /* is s2 at the end of the current stack? */
1644
2.43k
                    next2=stack2[0].s;
1645
2.43k
                }
1646
10.3k
                if(next2!=nullptr) {
1647
10.3k
                    m1=next1;
1648
10.3k
                    m2=next2;
1649
10.3k
                }
1650
10.3k
            }
1651
10.3k
            c1=c2=-1;       /* make us fetch new code units */
1652
10.3k
            continue;
1653
68.7k
        } else if(c1<0) {
1654
102
            cmpRes=-1;      /* string 1 ends before string 2 */
1655
102
            break;
1656
68.6k
        } else if(c2<0) {
1657
11
            cmpRes=1;       /* string 2 ends before string 1 */
1658
11
            break;
1659
11
        }
1660
        /* c1!=c2 && c1>=0 && c2>=0 */
1661
1662
        /* get complete code points for c1, c2 for lookups if either is a surrogate */
1663
68.6k
        cp1=c1;
1664
68.6k
        if(U_IS_SURROGATE(c1)) {
1665
758
            char16_t c;
1666
1667
758
            if(U_IS_SURROGATE_LEAD(c1)) {
1668
725
                if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
1669
                    /* advance ++s1; only below if cp1 decomposes/case-folds */
1670
512
                    cp1=U16_GET_SUPPLEMENTARY(c1, c);
1671
512
                }
1672
725
            } else /* isTrail(c1) */ {
1673
33
                if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
1674
0
                    cp1=U16_GET_SUPPLEMENTARY(c, c1);
1675
0
                }
1676
33
            }
1677
758
        }
1678
1679
68.6k
        cp2=c2;
1680
68.6k
        if(U_IS_SURROGATE(c2)) {
1681
0
            char16_t c;
1682
1683
0
            if(U_IS_SURROGATE_LEAD(c2)) {
1684
0
                if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
1685
                    /* advance ++s2; only below if cp2 decomposes/case-folds */
1686
0
                    cp2=U16_GET_SUPPLEMENTARY(c2, c);
1687
0
                }
1688
0
            } else /* isTrail(c2) */ {
1689
0
                if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
1690
0
                    cp2=U16_GET_SUPPLEMENTARY(c, c2);
1691
0
                }
1692
0
            }
1693
0
        }
1694
1695
        /*
1696
         * go down one level for each string
1697
         * continue with the main loop as soon as there is a real change
1698
         */
1699
1700
68.6k
        if( level1==0 &&
1701
37.4k
            (length = ucase_toFullFolding(cp1, &p, options)) >= 0
1702
68.6k
        ) {
1703
            /* cp1 case-folds to the code point "length" or to p[length] */
1704
26.1k
            if(U_IS_SURROGATE(c1)) {
1705
24
                if(U_IS_SURROGATE_LEAD(c1)) {
1706
                    /* advance beyond source surrogate pair if it case-folds */
1707
24
                    ++s1;
1708
24
                } else /* isTrail(c1) */ {
1709
                    /*
1710
                     * we got a supplementary code point when hitting its trail surrogate,
1711
                     * therefore the lead surrogate must have been the same as in the other string;
1712
                     * compare this decomposition with the lead surrogate in the other string
1713
                     * remember that this simulates bulk text replacement:
1714
                     * the decomposition would replace the entire code point
1715
                     */
1716
0
                    --s2;
1717
0
                    --m2;
1718
0
                    c2=*(s2-1);
1719
0
                }
1720
24
            }
1721
1722
            /* push current level pointers */
1723
26.1k
            stack1[0].start=start1;
1724
26.1k
            stack1[0].s=s1;
1725
26.1k
            stack1[0].limit=limit1;
1726
26.1k
            ++level1;
1727
1728
            /* copy the folding result to fold1[] */
1729
26.1k
            if(length<=UCASE_MAX_STRING_LENGTH) {
1730
27
                u_memcpy(fold1, p, length);
1731
26.0k
            } else {
1732
26.0k
                int32_t i=0;
1733
26.0k
                U16_APPEND_UNSAFE(fold1, i, length);
1734
26.0k
                length=i;
1735
26.0k
            }
1736
1737
            /* set next level pointers to case folding */
1738
26.1k
            start1=s1=fold1;
1739
26.1k
            limit1=fold1+length;
1740
1741
            /* get ready to read from decomposition, continue with loop */
1742
26.1k
            c1=-1;
1743
26.1k
            continue;
1744
26.1k
        }
1745
1746
42.4k
        if( level2==0 &&
1747
33.3k
            (length = ucase_toFullFolding(cp2, &p, options)) >= 0
1748
42.4k
        ) {
1749
            /* cp2 case-folds to the code point "length" or to p[length] */
1750
11.6k
            if(U_IS_SURROGATE(c2)) {
1751
0
                if(U_IS_SURROGATE_LEAD(c2)) {
1752
                    /* advance beyond source surrogate pair if it case-folds */
1753
0
                    ++s2;
1754
0
                } else /* isTrail(c2) */ {
1755
                    /*
1756
                     * we got a supplementary code point when hitting its trail surrogate,
1757
                     * therefore the lead surrogate must have been the same as in the other string;
1758
                     * compare this decomposition with the lead surrogate in the other string
1759
                     * remember that this simulates bulk text replacement:
1760
                     * the decomposition would replace the entire code point
1761
                     */
1762
0
                    --s1;
1763
0
                    --m2;
1764
0
                    c1=*(s1-1);
1765
0
                }
1766
0
            }
1767
1768
            /* push current level pointers */
1769
11.6k
            stack2[0].start=start2;
1770
11.6k
            stack2[0].s=s2;
1771
11.6k
            stack2[0].limit=limit2;
1772
11.6k
            ++level2;
1773
1774
            /* copy the folding result to fold2[] */
1775
11.6k
            if(length<=UCASE_MAX_STRING_LENGTH) {
1776
0
                u_memcpy(fold2, p, length);
1777
11.6k
            } else {
1778
11.6k
                int32_t i=0;
1779
11.6k
                U16_APPEND_UNSAFE(fold2, i, length);
1780
11.6k
                length=i;
1781
11.6k
            }
1782
1783
            /* set next level pointers to case folding */
1784
11.6k
            start2=s2=fold2;
1785
11.6k
            limit2=fold2+length;
1786
1787
            /* get ready to read from decomposition, continue with loop */
1788
11.6k
            c2=-1;
1789
11.6k
            continue;
1790
11.6k
        }
1791
1792
        /*
1793
         * no decomposition/case folding, max level for both sides:
1794
         * return difference result
1795
         *
1796
         * code point order comparison must not just return cp1-cp2
1797
         * because when single surrogates are present then the surrogate pairs
1798
         * that formed cp1 and cp2 may be from different string indexes
1799
         *
1800
         * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
1801
         * c1=d800 cp1=10001 c2=dc00 cp2=10000
1802
         * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
1803
         *
1804
         * therefore, use same fix-up as in ustring.c/uprv_strCompare()
1805
         * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
1806
         * so we have slightly different pointer/start/limit comparisons here
1807
         */
1808
1809
30.8k
        if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
1810
            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
1811
0
            if(
1812
0
                (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
1813
0
                (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
1814
0
            ) {
1815
                /* part of a surrogate pair, leave >=d800 */
1816
0
            } else {
1817
                /* BMP code point - may be surrogate code point - make <d800 */
1818
0
                c1-=0x2800;
1819
0
            }
1820
1821
0
            if(
1822
0
                (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
1823
0
                (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
1824
0
            ) {
1825
                /* part of a surrogate pair, leave >=d800 */
1826
0
            } else {
1827
                /* BMP code point - may be surrogate code point - make <d800 */
1828
0
                c2-=0x2800;
1829
0
            }
1830
0
        }
1831
1832
30.8k
        cmpRes=c1-c2;
1833
30.8k
        break;
1834
42.4k
    }
1835
1836
34.0k
    if(matchLen1) {
1837
0
        *matchLen1=static_cast<int32_t>(m1-org1);
1838
0
        *matchLen2=static_cast<int32_t>(m2-org2);
1839
0
    }
1840
34.0k
    return cmpRes;
1841
34.0k
}
1842
1843
/* internal function */
1844
U_CFUNC int32_t
1845
u_strcmpFold(const char16_t *s1, int32_t length1,
1846
             const char16_t *s2, int32_t length2,
1847
             uint32_t options,
1848
34.0k
             UErrorCode *pErrorCode) {
1849
34.0k
    return _cmpFold(s1, length1, s2, length2, options, nullptr, nullptr, pErrorCode);
1850
34.0k
}
1851
1852
/* public API functions */
1853
1854
U_CAPI int32_t U_EXPORT2
1855
u_strCaseCompare(const char16_t *s1, int32_t length1,
1856
                 const char16_t *s2, int32_t length2,
1857
                 uint32_t options,
1858
0
                 UErrorCode *pErrorCode) {
1859
    /* argument checking */
1860
0
    if (pErrorCode == nullptr || U_FAILURE(*pErrorCode)) {
1861
0
        return 0;
1862
0
    }
1863
0
    if(s1==nullptr || length1<-1 || s2==nullptr || length2<-1) {
1864
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1865
0
        return 0;
1866
0
    }
1867
0
    return u_strcmpFold(s1, length1, s2, length2,
1868
0
                        options|U_COMPARE_IGNORE_CASE,
1869
0
                        pErrorCode);
1870
0
}
1871
1872
U_CAPI int32_t U_EXPORT2
1873
0
u_strcasecmp(const char16_t *s1, const char16_t *s2, uint32_t options) {
1874
0
    UErrorCode errorCode=U_ZERO_ERROR;
1875
0
    return u_strcmpFold(s1, -1, s2, -1,
1876
0
                        options|U_COMPARE_IGNORE_CASE,
1877
0
                        &errorCode);
1878
0
}
1879
1880
U_CAPI int32_t U_EXPORT2
1881
0
u_memcasecmp(const char16_t *s1, const char16_t *s2, int32_t length, uint32_t options) {
1882
0
    UErrorCode errorCode=U_ZERO_ERROR;
1883
0
    return u_strcmpFold(s1, length, s2, length,
1884
0
                        options|U_COMPARE_IGNORE_CASE,
1885
0
                        &errorCode);
1886
0
}
1887
1888
U_CAPI int32_t U_EXPORT2
1889
5.08k
u_strncasecmp(const char16_t *s1, const char16_t *s2, int32_t n, uint32_t options) {
1890
5.08k
    UErrorCode errorCode=U_ZERO_ERROR;
1891
5.08k
    return u_strcmpFold(s1, n, s2, n,
1892
5.08k
                        options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
1893
5.08k
                        &errorCode);
1894
5.08k
}
1895
1896
/* internal API - detect length of shared prefix */
1897
U_CAPI void
1898
u_caseInsensitivePrefixMatch(const char16_t *s1, int32_t length1,
1899
                             const char16_t *s2, int32_t length2,
1900
                             uint32_t options,
1901
                             int32_t *matchLen1, int32_t *matchLen2,
1902
0
                             UErrorCode *pErrorCode) {
1903
0
    _cmpFold(s1, length1, s2, length2, options,
1904
0
        matchLen1, matchLen2, pErrorCode);
1905
0
}