Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/ustrcase.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2001-2015, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  ustrcase.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2002feb20
16
*   created by: Markus W. Scherer
17
*
18
*   Implementation file for string casing C API functions.
19
*   Uses functions from uchar.c for basic functionality that requires access
20
*   to the Unicode Character Database (uprops.dat).
21
*/
22
23
#include "unicode/utypes.h"
24
#include "unicode/brkiter.h"
25
#include "unicode/casemap.h"
26
#include "unicode/edits.h"
27
#include "unicode/stringoptions.h"
28
#include "unicode/ustring.h"
29
#include "unicode/ucasemap.h"
30
#include "unicode/ubrk.h"
31
#include "unicode/utf.h"
32
#include "unicode/utf16.h"
33
#include "cmemory.h"
34
#include "ucase.h"
35
#include "ucasemap_imp.h"
36
#include "ustr_imp.h"
37
#include "uassert.h"
38
39
U_NAMESPACE_BEGIN
40
41
namespace {
42
43
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
44
0
                                   Edits *edits, UErrorCode &errorCode) {
45
0
    if (U_SUCCESS(errorCode)) {
46
0
        if (destIndex > destCapacity) {
47
0
            errorCode = U_BUFFER_OVERFLOW_ERROR;
48
0
        } else if (edits != NULL) {
49
0
            edits->copyErrorTo(errorCode);
50
0
        }
51
0
    }
52
0
    return destIndex;
53
0
}
54
55
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
56
inline int32_t
57
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
58
             int32_t result, const UChar *s,
59
0
             int32_t cpLength, uint32_t options, icu::Edits *edits) {
60
0
    UChar32 c;
61
0
    int32_t length;
62
63
    /* decode the result */
64
0
    if(result<0) {
65
        /* (not) original code point */
66
0
        if(edits!=NULL) {
67
0
            edits->addUnchanged(cpLength);
68
0
        }
69
0
        if(options & U_OMIT_UNCHANGED_TEXT) {
70
0
            return destIndex;
71
0
        }
72
0
        c=~result;
73
0
        if(destIndex<destCapacity && c<=0xffff) {  // BMP slightly-fastpath
74
0
            dest[destIndex++]=(UChar)c;
75
0
            return destIndex;
76
0
        }
77
0
        length=cpLength;
78
0
    } else {
79
0
        if(result<=UCASE_MAX_STRING_LENGTH) {
80
0
            c=U_SENTINEL;
81
0
            length=result;
82
0
        } else if(destIndex<destCapacity && result<=0xffff) {  // BMP slightly-fastpath
83
0
            dest[destIndex++]=(UChar)result;
84
0
            if(edits!=NULL) {
85
0
                edits->addReplace(cpLength, 1);
86
0
            }
87
0
            return destIndex;
88
0
        } else {
89
0
            c=result;
90
0
            length=U16_LENGTH(c);
91
0
        }
92
0
        if(edits!=NULL) {
93
0
            edits->addReplace(cpLength, length);
94
0
        }
95
0
    }
96
0
    if(length>(INT32_MAX-destIndex)) {
97
0
        return -1;  // integer overflow
98
0
    }
99
100
0
    if(destIndex<destCapacity) {
101
        /* append the result */
102
0
        if(c>=0) {
103
            /* code point */
104
0
            UBool isError=FALSE;
105
0
            U16_APPEND(dest, destIndex, destCapacity, c, isError);
106
0
            if(isError) {
107
                /* overflow, nothing written */
108
0
                destIndex+=length;
109
0
            }
110
0
        } else {
111
            /* string */
112
0
            if((destIndex+length)<=destCapacity) {
113
0
                while(length>0) {
114
0
                    dest[destIndex++]=*s++;
115
0
                    --length;
116
0
                }
117
0
            } else {
118
                /* overflow */
119
0
                destIndex+=length;
120
0
            }
121
0
        }
122
0
    } else {
123
        /* preflight */
124
0
        destIndex+=length;
125
0
    }
126
0
    return destIndex;
127
0
}
128
129
inline int32_t
130
0
appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
131
0
    if(destIndex<destCapacity) {
132
0
        dest[destIndex]=c;
133
0
    } else if(destIndex==INT32_MAX) {
134
0
        return -1;  // integer overflow
135
0
    }
136
0
    return destIndex+1;
137
0
}
138
139
int32_t
140
appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
141
0
                        const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
142
0
    if(edits!=NULL) {
143
0
        edits->addUnchanged(length);
144
0
    }
145
0
    if(options & U_OMIT_UNCHANGED_TEXT) {
146
0
        return destIndex;
147
0
    }
148
0
    if(length>(INT32_MAX-destIndex)) {
149
0
        return -1;  // integer overflow
150
0
    }
151
0
    if((destIndex+length)<=destCapacity) {
152
0
        u_memcpy(dest+destIndex, s, length);
153
0
    }
154
0
    return destIndex + length;
155
0
}
156
157
inline int32_t
158
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
159
0
                const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
160
0
    if (length <= 0) {
161
0
        return destIndex;
162
0
    }
163
0
    return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);
164
0
}
165
166
UChar32 U_CALLCONV
167
0
utf16_caseContextIterator(void *context, int8_t dir) {
168
0
    UCaseContext *csc=(UCaseContext *)context;
169
0
    UChar32 c;
170
171
0
    if(dir<0) {
172
        /* reset for backward iteration */
173
0
        csc->index=csc->cpStart;
174
0
        csc->dir=dir;
175
0
    } else if(dir>0) {
176
        /* reset for forward iteration */
177
0
        csc->index=csc->cpLimit;
178
0
        csc->dir=dir;
179
0
    } else {
180
        /* continue current iteration direction */
181
0
        dir=csc->dir;
182
0
    }
183
184
0
    if(dir<0) {
185
0
        if(csc->start<csc->index) {
186
0
            U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
187
0
            return c;
188
0
        }
189
0
    } else {
190
0
        if(csc->index<csc->limit) {
191
0
            U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
192
0
            return c;
193
0
        }
194
0
    }
195
0
    return U_SENTINEL;
196
0
}
197
198
/**
199
 * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.
200
 * caseLocale < 0: Case-folds [srcStart..srcLimit[.
201
 */
202
int32_t toLower(int32_t caseLocale, uint32_t options,
203
                UChar *dest, int32_t destCapacity,
204
                const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,
205
0
                icu::Edits *edits, UErrorCode &errorCode) {
206
0
    const int8_t *latinToLower;
207
0
    if (caseLocale == UCASE_LOC_ROOT ||
208
0
            (caseLocale >= 0 ?
209
0
                !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :
210
0
                (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) {
211
0
        latinToLower = LatinCase::TO_LOWER_NORMAL;
212
0
    } else {
213
0
        latinToLower = LatinCase::TO_LOWER_TR_LT;
214
0
    }
215
0
    const UTrie2 *trie = ucase_getTrie();
216
0
    int32_t destIndex = 0;
217
0
    int32_t prev = srcStart;
218
0
    int32_t srcIndex = srcStart;
219
0
    for (;;) {
220
        // fast path for simple cases
221
0
        UChar lead = 0;
222
0
        while (srcIndex < srcLimit) {
223
0
            lead = src[srcIndex];
224
0
            int32_t delta;
225
0
            if (lead < LatinCase::LONG_S) {
226
0
                int8_t d = latinToLower[lead];
227
0
                if (d == LatinCase::EXC) { break; }
228
0
                ++srcIndex;
229
0
                if (d == 0) { continue; }
230
0
                delta = d;
231
0
            } else if (lead >= 0xd800) {
232
0
                break;  // surrogate or higher
233
0
            } else {
234
0
                uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
235
0
                if (UCASE_HAS_EXCEPTION(props)) { break; }
236
0
                ++srcIndex;
237
0
                if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) {
238
0
                    continue;
239
0
                }
240
0
            }
241
0
            lead += static_cast<UChar>(delta);
242
0
            destIndex = appendUnchanged(dest, destIndex, destCapacity,
243
0
                                        src + prev, srcIndex - 1 - prev, options, edits);
244
0
            if (destIndex >= 0) {
245
0
                destIndex = appendUChar(dest, destIndex, destCapacity, lead);
246
0
                if (edits != nullptr) {
247
0
                    edits->addReplace(1, 1);
248
0
                }
249
0
            }
250
0
            if (destIndex < 0) {
251
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
252
0
                return 0;
253
0
            }
254
0
            prev = srcIndex;
255
0
        }
256
0
        if (srcIndex >= srcLimit) {
257
0
            break;
258
0
        }
259
        // slow path
260
0
        int32_t cpStart = srcIndex++;
261
0
        UChar trail;
262
0
        UChar32 c;
263
0
        if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) {
264
0
            c = U16_GET_SUPPLEMENTARY(lead, trail);
265
0
            ++srcIndex;
266
0
        } else {
267
0
            c = lead;
268
0
        }
269
0
        const UChar *s;
270
0
        if (caseLocale >= 0) {
271
0
            csc->cpStart = cpStart;
272
0
            csc->cpLimit = srcIndex;
273
0
            c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);
274
0
        } else {
275
0
            c = ucase_toFullFolding(c, &s, options);
276
0
        }
277
0
        if (c >= 0) {
278
0
            destIndex = appendUnchanged(dest, destIndex, destCapacity,
279
0
                                        src + prev, cpStart - prev, options, edits);
280
0
            if (destIndex >= 0) {
281
0
                destIndex = appendResult(dest, destIndex, destCapacity, c, s,
282
0
                                         srcIndex - cpStart, options, edits);
283
0
            }
284
0
            if (destIndex < 0) {
285
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
286
0
                return 0;
287
0
            }
288
0
            prev = srcIndex;
289
0
        }
290
0
    }
291
0
    destIndex = appendUnchanged(dest, destIndex, destCapacity,
292
0
                                src + prev, srcIndex - prev, options, edits);
293
0
    if (destIndex < 0) {
294
0
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
295
0
        return 0;
296
0
    }
297
0
    return destIndex;
298
0
}
299
300
int32_t toUpper(int32_t caseLocale, uint32_t options,
301
                UChar *dest, int32_t destCapacity,
302
                const UChar *src, UCaseContext *csc, int32_t srcLength,
303
0
                icu::Edits *edits, UErrorCode &errorCode) {
304
0
    const int8_t *latinToUpper;
305
0
    if (caseLocale == UCASE_LOC_TURKISH) {
306
0
        latinToUpper = LatinCase::TO_UPPER_TR;
307
0
    } else {
308
0
        latinToUpper = LatinCase::TO_UPPER_NORMAL;
309
0
    }
310
0
    const UTrie2 *trie = ucase_getTrie();
311
0
    int32_t destIndex = 0;
312
0
    int32_t prev = 0;
313
0
    int32_t srcIndex = 0;
314
0
    for (;;) {
315
        // fast path for simple cases
316
0
        UChar lead = 0;
317
0
        while (srcIndex < srcLength) {
318
0
            lead = src[srcIndex];
319
0
            int32_t delta;
320
0
            if (lead < LatinCase::LONG_S) {
321
0
                int8_t d = latinToUpper[lead];
322
0
                if (d == LatinCase::EXC) { break; }
323
0
                ++srcIndex;
324
0
                if (d == 0) { continue; }
325
0
                delta = d;
326
0
            } else if (lead >= 0xd800) {
327
0
                break;  // surrogate or higher
328
0
            } else {
329
0
                uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);
330
0
                if (UCASE_HAS_EXCEPTION(props)) { break; }
331
0
                ++srcIndex;
332
0
                if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) {
333
0
                    continue;
334
0
                }
335
0
            }
336
0
            lead += static_cast<UChar>(delta);
337
0
            destIndex = appendUnchanged(dest, destIndex, destCapacity,
338
0
                                        src + prev, srcIndex - 1 - prev, options, edits);
339
0
            if (destIndex >= 0) {
340
0
                destIndex = appendUChar(dest, destIndex, destCapacity, lead);
341
0
                if (edits != nullptr) {
342
0
                    edits->addReplace(1, 1);
343
0
                }
344
0
            }
345
0
            if (destIndex < 0) {
346
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
347
0
                return 0;
348
0
            }
349
0
            prev = srcIndex;
350
0
        }
351
0
        if (srcIndex >= srcLength) {
352
0
            break;
353
0
        }
354
        // slow path
355
0
        int32_t cpStart;
356
0
        csc->cpStart = cpStart = srcIndex++;
357
0
        UChar trail;
358
0
        UChar32 c;
359
0
        if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) {
360
0
            c = U16_GET_SUPPLEMENTARY(lead, trail);
361
0
            ++srcIndex;
362
0
        } else {
363
0
            c = lead;
364
0
        }
365
0
        csc->cpLimit = srcIndex;
366
0
        const UChar *s;
367
0
        c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);
368
0
        if (c >= 0) {
369
0
            destIndex = appendUnchanged(dest, destIndex, destCapacity,
370
0
                                        src + prev, cpStart - prev, options, edits);
371
0
            if (destIndex >= 0) {
372
0
                destIndex = appendResult(dest, destIndex, destCapacity, c, s,
373
0
                                         srcIndex - cpStart, options, edits);
374
0
            }
375
0
            if (destIndex < 0) {
376
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
377
0
                return 0;
378
0
            }
379
0
            prev = srcIndex;
380
0
        }
381
0
    }
382
0
    destIndex = appendUnchanged(dest, destIndex, destCapacity,
383
0
                                src + prev, srcIndex - prev, options, edits);
384
0
    if (destIndex < 0) {
385
0
        errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
386
0
        return 0;
387
0
    }
388
0
    return destIndex;
389
0
}
390
391
}  // namespace
392
393
U_NAMESPACE_END
394
395
U_NAMESPACE_USE
396
397
#if !UCONFIG_NO_BREAK_ITERATION
398
399
U_CFUNC int32_t U_CALLCONV
400
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
401
                         UChar *dest, int32_t destCapacity,
402
                         const UChar *src, int32_t srcLength,
403
                         icu::Edits *edits,
404
0
                         UErrorCode &errorCode) {
405
0
    if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) {
406
0
        return 0;
407
0
    }
408
409
    /* set up local variables */
410
0
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
411
0
    csc.p=(void *)src;
412
0
    csc.limit=srcLength;
413
0
    int32_t destIndex=0;
414
0
    int32_t prev=0;
415
0
    UBool isFirstIndex=TRUE;
416
417
    /* titlecasing loop */
418
0
    while(prev<srcLength) {
419
        /* find next index where to titlecase */
420
0
        int32_t index;
421
0
        if(isFirstIndex) {
422
0
            isFirstIndex=FALSE;
423
0
            index=iter->first();
424
0
        } else {
425
0
            index=iter->next();
426
0
        }
427
0
        if(index==UBRK_DONE || index>srcLength) {
428
0
            index=srcLength;
429
0
        }
430
431
        /*
432
         * Segment [prev..index[ into 3 parts:
433
         * a) skipped characters (copy as-is) [prev..titleStart[
434
         * b) first letter (titlecase)              [titleStart..titleLimit[
435
         * c) subsequent characters (lowercase)                 [titleLimit..index[
436
         */
437
0
        if(prev<index) {
438
            // Find and copy skipped characters [prev..titleStart[
439
0
            int32_t titleStart=prev;
440
0
            int32_t titleLimit=prev;
441
0
            UChar32 c;
442
0
            U16_NEXT(src, titleLimit, index, c);
443
0
            if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) {
444
                // Adjust the titlecasing index to the next cased character,
445
                // or to the next letter/number/symbol/private use.
446
                // Stop with titleStart<titleLimit<=index
447
                // if there is a character to be titlecased,
448
                // or else stop with titleStart==titleLimit==index.
449
0
                UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;
450
0
                while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) {
451
0
                    titleStart=titleLimit;
452
0
                    if(titleLimit==index) {
453
0
                        break;
454
0
                    }
455
0
                    U16_NEXT(src, titleLimit, index, c);
456
0
                }
457
0
                if (prev < titleStart) {
458
0
                    destIndex=appendUnchanged(dest, destIndex, destCapacity,
459
0
                                              src+prev, titleStart-prev, options, edits);
460
0
                    if(destIndex<0) {
461
0
                        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
462
0
                        return 0;
463
0
                    }
464
0
                }
465
0
            }
466
467
0
            if(titleStart<titleLimit) {
468
                /* titlecase c which is from [titleStart..titleLimit[ */
469
0
                csc.cpStart=titleStart;
470
0
                csc.cpLimit=titleLimit;
471
0
                const UChar *s;
472
0
                c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale);
473
0
                destIndex=appendResult(dest, destIndex, destCapacity, c, s,
474
0
                                       titleLimit-titleStart, options, edits);
475
0
                if(destIndex<0) {
476
0
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
477
0
                    return 0;
478
0
                }
479
480
                /* Special case Dutch IJ titlecasing */
481
0
                if (titleStart+1 < index &&
482
0
                        caseLocale == UCASE_LOC_DUTCH &&
483
0
                        (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
484
0
                    if (src[titleStart+1] == 0x006A) {
485
0
                        destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
486
0
                        if(destIndex<0) {
487
0
                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
488
0
                            return 0;
489
0
                        }
490
0
                        if(edits!=NULL) {
491
0
                            edits->addReplace(1, 1);
492
0
                        }
493
0
                        titleLimit++;
494
0
                    } else if (src[titleStart+1] == 0x004A) {
495
                        // Keep the capital J from getting lowercased.
496
0
                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
497
0
                                                  src+titleStart+1, 1, options, edits);
498
0
                        if(destIndex<0) {
499
0
                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
500
0
                            return 0;
501
0
                        }
502
0
                        titleLimit++;
503
0
                    }
504
0
                }
505
506
                /* lowercase [titleLimit..index[ */
507
0
                if(titleLimit<index) {
508
0
                    if((options&U_TITLECASE_NO_LOWERCASE)==0) {
509
                        /* Normal operation: Lowercase the rest of the word. */
510
0
                        destIndex+=
511
0
                            toLower(
512
0
                                caseLocale, options,
513
0
                                dest+destIndex, destCapacity-destIndex,
514
0
                                src, &csc, titleLimit, index,
515
0
                                edits, errorCode);
516
0
                        if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
517
0
                            errorCode=U_ZERO_ERROR;
518
0
                        }
519
0
                        if(U_FAILURE(errorCode)) {
520
0
                            return destIndex;
521
0
                        }
522
0
                    } else {
523
                        /* Optionally just copy the rest of the word unchanged. */
524
0
                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
525
0
                                                  src+titleLimit, index-titleLimit, options, edits);
526
0
                        if(destIndex<0) {
527
0
                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
528
0
                            return 0;
529
0
                        }
530
0
                    }
531
0
                }
532
0
            }
533
0
        }
534
535
0
        prev=index;
536
0
    }
537
538
0
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
539
0
}
540
541
#endif  // !UCONFIG_NO_BREAK_ITERATION
542
543
U_NAMESPACE_BEGIN
544
namespace GreekUpper {
545
546
// Data generated by prototype code, see
547
// http://site.icu-project.org/design/case/greek-upper
548
// TODO: Move this data into ucase.icu.
549
static const uint16_t data0370[] = {
550
    // U+0370..03FF
551
    0x0370,
552
    0x0370,
553
    0x0372,
554
    0x0372,
555
    0,
556
    0,
557
    0x0376,
558
    0x0376,
559
    0,
560
    0,
561
    0x037A,
562
    0x03FD,
563
    0x03FE,
564
    0x03FF,
565
    0,
566
    0x037F,
567
    0,
568
    0,
569
    0,
570
    0,
571
    0,
572
    0,
573
    0x0391 | HAS_VOWEL | HAS_ACCENT,
574
    0,
575
    0x0395 | HAS_VOWEL | HAS_ACCENT,
576
    0x0397 | HAS_VOWEL | HAS_ACCENT,
577
    0x0399 | HAS_VOWEL | HAS_ACCENT,
578
    0,
579
    0x039F | HAS_VOWEL | HAS_ACCENT,
580
    0,
581
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
582
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
583
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
584
    0x0391 | HAS_VOWEL,
585
    0x0392,
586
    0x0393,
587
    0x0394,
588
    0x0395 | HAS_VOWEL,
589
    0x0396,
590
    0x0397 | HAS_VOWEL,
591
    0x0398,
592
    0x0399 | HAS_VOWEL,
593
    0x039A,
594
    0x039B,
595
    0x039C,
596
    0x039D,
597
    0x039E,
598
    0x039F | HAS_VOWEL,
599
    0x03A0,
600
    0x03A1,
601
    0,
602
    0x03A3,
603
    0x03A4,
604
    0x03A5 | HAS_VOWEL,
605
    0x03A6,
606
    0x03A7,
607
    0x03A8,
608
    0x03A9 | HAS_VOWEL,
609
    0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
610
    0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
611
    0x0391 | HAS_VOWEL | HAS_ACCENT,
612
    0x0395 | HAS_VOWEL | HAS_ACCENT,
613
    0x0397 | HAS_VOWEL | HAS_ACCENT,
614
    0x0399 | HAS_VOWEL | HAS_ACCENT,
615
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
616
    0x0391 | HAS_VOWEL,
617
    0x0392,
618
    0x0393,
619
    0x0394,
620
    0x0395 | HAS_VOWEL,
621
    0x0396,
622
    0x0397 | HAS_VOWEL,
623
    0x0398,
624
    0x0399 | HAS_VOWEL,
625
    0x039A,
626
    0x039B,
627
    0x039C,
628
    0x039D,
629
    0x039E,
630
    0x039F | HAS_VOWEL,
631
    0x03A0,
632
    0x03A1,
633
    0x03A3,
634
    0x03A3,
635
    0x03A4,
636
    0x03A5 | HAS_VOWEL,
637
    0x03A6,
638
    0x03A7,
639
    0x03A8,
640
    0x03A9 | HAS_VOWEL,
641
    0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
642
    0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
643
    0x039F | HAS_VOWEL | HAS_ACCENT,
644
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
645
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
646
    0x03CF,
647
    0x0392,
648
    0x0398,
649
    0x03D2,
650
    0x03D2 | HAS_ACCENT,
651
    0x03D2 | HAS_DIALYTIKA,
652
    0x03A6,
653
    0x03A0,
654
    0x03CF,
655
    0x03D8,
656
    0x03D8,
657
    0x03DA,
658
    0x03DA,
659
    0x03DC,
660
    0x03DC,
661
    0x03DE,
662
    0x03DE,
663
    0x03E0,
664
    0x03E0,
665
    0,
666
    0,
667
    0,
668
    0,
669
    0,
670
    0,
671
    0,
672
    0,
673
    0,
674
    0,
675
    0,
676
    0,
677
    0,
678
    0,
679
    0x039A,
680
    0x03A1,
681
    0x03F9,
682
    0x037F,
683
    0x03F4,
684
    0x0395 | HAS_VOWEL,
685
    0,
686
    0x03F7,
687
    0x03F7,
688
    0x03F9,
689
    0x03FA,
690
    0x03FA,
691
    0x03FC,
692
    0x03FD,
693
    0x03FE,
694
    0x03FF,
695
};
696
697
static const uint16_t data1F00[] = {
698
    // U+1F00..1FFF
699
    0x0391 | HAS_VOWEL,
700
    0x0391 | HAS_VOWEL,
701
    0x0391 | HAS_VOWEL | HAS_ACCENT,
702
    0x0391 | HAS_VOWEL | HAS_ACCENT,
703
    0x0391 | HAS_VOWEL | HAS_ACCENT,
704
    0x0391 | HAS_VOWEL | HAS_ACCENT,
705
    0x0391 | HAS_VOWEL | HAS_ACCENT,
706
    0x0391 | HAS_VOWEL | HAS_ACCENT,
707
    0x0391 | HAS_VOWEL,
708
    0x0391 | HAS_VOWEL,
709
    0x0391 | HAS_VOWEL | HAS_ACCENT,
710
    0x0391 | HAS_VOWEL | HAS_ACCENT,
711
    0x0391 | HAS_VOWEL | HAS_ACCENT,
712
    0x0391 | HAS_VOWEL | HAS_ACCENT,
713
    0x0391 | HAS_VOWEL | HAS_ACCENT,
714
    0x0391 | HAS_VOWEL | HAS_ACCENT,
715
    0x0395 | HAS_VOWEL,
716
    0x0395 | HAS_VOWEL,
717
    0x0395 | HAS_VOWEL | HAS_ACCENT,
718
    0x0395 | HAS_VOWEL | HAS_ACCENT,
719
    0x0395 | HAS_VOWEL | HAS_ACCENT,
720
    0x0395 | HAS_VOWEL | HAS_ACCENT,
721
    0,
722
    0,
723
    0x0395 | HAS_VOWEL,
724
    0x0395 | HAS_VOWEL,
725
    0x0395 | HAS_VOWEL | HAS_ACCENT,
726
    0x0395 | HAS_VOWEL | HAS_ACCENT,
727
    0x0395 | HAS_VOWEL | HAS_ACCENT,
728
    0x0395 | HAS_VOWEL | HAS_ACCENT,
729
    0,
730
    0,
731
    0x0397 | HAS_VOWEL,
732
    0x0397 | HAS_VOWEL,
733
    0x0397 | HAS_VOWEL | HAS_ACCENT,
734
    0x0397 | HAS_VOWEL | HAS_ACCENT,
735
    0x0397 | HAS_VOWEL | HAS_ACCENT,
736
    0x0397 | HAS_VOWEL | HAS_ACCENT,
737
    0x0397 | HAS_VOWEL | HAS_ACCENT,
738
    0x0397 | HAS_VOWEL | HAS_ACCENT,
739
    0x0397 | HAS_VOWEL,
740
    0x0397 | HAS_VOWEL,
741
    0x0397 | HAS_VOWEL | HAS_ACCENT,
742
    0x0397 | HAS_VOWEL | HAS_ACCENT,
743
    0x0397 | HAS_VOWEL | HAS_ACCENT,
744
    0x0397 | HAS_VOWEL | HAS_ACCENT,
745
    0x0397 | HAS_VOWEL | HAS_ACCENT,
746
    0x0397 | HAS_VOWEL | HAS_ACCENT,
747
    0x0399 | HAS_VOWEL,
748
    0x0399 | HAS_VOWEL,
749
    0x0399 | HAS_VOWEL | HAS_ACCENT,
750
    0x0399 | HAS_VOWEL | HAS_ACCENT,
751
    0x0399 | HAS_VOWEL | HAS_ACCENT,
752
    0x0399 | HAS_VOWEL | HAS_ACCENT,
753
    0x0399 | HAS_VOWEL | HAS_ACCENT,
754
    0x0399 | HAS_VOWEL | HAS_ACCENT,
755
    0x0399 | HAS_VOWEL,
756
    0x0399 | HAS_VOWEL,
757
    0x0399 | HAS_VOWEL | HAS_ACCENT,
758
    0x0399 | HAS_VOWEL | HAS_ACCENT,
759
    0x0399 | HAS_VOWEL | HAS_ACCENT,
760
    0x0399 | HAS_VOWEL | HAS_ACCENT,
761
    0x0399 | HAS_VOWEL | HAS_ACCENT,
762
    0x0399 | HAS_VOWEL | HAS_ACCENT,
763
    0x039F | HAS_VOWEL,
764
    0x039F | HAS_VOWEL,
765
    0x039F | HAS_VOWEL | HAS_ACCENT,
766
    0x039F | HAS_VOWEL | HAS_ACCENT,
767
    0x039F | HAS_VOWEL | HAS_ACCENT,
768
    0x039F | HAS_VOWEL | HAS_ACCENT,
769
    0,
770
    0,
771
    0x039F | HAS_VOWEL,
772
    0x039F | HAS_VOWEL,
773
    0x039F | HAS_VOWEL | HAS_ACCENT,
774
    0x039F | HAS_VOWEL | HAS_ACCENT,
775
    0x039F | HAS_VOWEL | HAS_ACCENT,
776
    0x039F | HAS_VOWEL | HAS_ACCENT,
777
    0,
778
    0,
779
    0x03A5 | HAS_VOWEL,
780
    0x03A5 | HAS_VOWEL,
781
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
782
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
783
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
784
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
785
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
786
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
787
    0,
788
    0x03A5 | HAS_VOWEL,
789
    0,
790
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
791
    0,
792
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
793
    0,
794
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
795
    0x03A9 | HAS_VOWEL,
796
    0x03A9 | HAS_VOWEL,
797
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
798
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
799
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
800
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
801
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
802
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
803
    0x03A9 | HAS_VOWEL,
804
    0x03A9 | HAS_VOWEL,
805
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
806
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
807
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
808
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
809
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
810
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
811
    0x0391 | HAS_VOWEL | HAS_ACCENT,
812
    0x0391 | HAS_VOWEL | HAS_ACCENT,
813
    0x0395 | HAS_VOWEL | HAS_ACCENT,
814
    0x0395 | HAS_VOWEL | HAS_ACCENT,
815
    0x0397 | HAS_VOWEL | HAS_ACCENT,
816
    0x0397 | HAS_VOWEL | HAS_ACCENT,
817
    0x0399 | HAS_VOWEL | HAS_ACCENT,
818
    0x0399 | HAS_VOWEL | HAS_ACCENT,
819
    0x039F | HAS_VOWEL | HAS_ACCENT,
820
    0x039F | HAS_VOWEL | HAS_ACCENT,
821
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
822
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
823
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
824
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
825
    0,
826
    0,
827
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
828
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
829
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
830
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
831
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
832
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
833
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
834
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
835
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
836
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
837
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
838
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
839
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
840
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
841
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
842
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
843
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
844
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
845
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
846
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
847
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
848
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
849
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
850
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
851
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
852
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
853
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
854
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
855
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
856
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
857
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
858
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
859
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
860
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
861
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
862
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
863
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
864
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
865
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
866
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
867
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
868
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
869
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
870
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
871
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
872
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
873
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
874
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
875
    0x0391 | HAS_VOWEL,
876
    0x0391 | HAS_VOWEL,
877
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
878
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
879
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
880
    0,
881
    0x0391 | HAS_VOWEL | HAS_ACCENT,
882
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
883
    0x0391 | HAS_VOWEL,
884
    0x0391 | HAS_VOWEL,
885
    0x0391 | HAS_VOWEL | HAS_ACCENT,
886
    0x0391 | HAS_VOWEL | HAS_ACCENT,
887
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
888
    0,
889
    0x0399 | HAS_VOWEL,
890
    0,
891
    0,
892
    0,
893
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
894
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
895
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
896
    0,
897
    0x0397 | HAS_VOWEL | HAS_ACCENT,
898
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
899
    0x0395 | HAS_VOWEL | HAS_ACCENT,
900
    0x0395 | HAS_VOWEL | HAS_ACCENT,
901
    0x0397 | HAS_VOWEL | HAS_ACCENT,
902
    0x0397 | HAS_VOWEL | HAS_ACCENT,
903
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
904
    0,
905
    0,
906
    0,
907
    0x0399 | HAS_VOWEL,
908
    0x0399 | HAS_VOWEL,
909
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
910
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
911
    0,
912
    0,
913
    0x0399 | HAS_VOWEL | HAS_ACCENT,
914
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
915
    0x0399 | HAS_VOWEL,
916
    0x0399 | HAS_VOWEL,
917
    0x0399 | HAS_VOWEL | HAS_ACCENT,
918
    0x0399 | HAS_VOWEL | HAS_ACCENT,
919
    0,
920
    0,
921
    0,
922
    0,
923
    0x03A5 | HAS_VOWEL,
924
    0x03A5 | HAS_VOWEL,
925
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
926
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
927
    0x03A1,
928
    0x03A1,
929
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
930
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
931
    0x03A5 | HAS_VOWEL,
932
    0x03A5 | HAS_VOWEL,
933
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
934
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
935
    0x03A1,
936
    0,
937
    0,
938
    0,
939
    0,
940
    0,
941
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
942
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
943
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
944
    0,
945
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
946
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
947
    0x039F | HAS_VOWEL | HAS_ACCENT,
948
    0x039F | HAS_VOWEL | HAS_ACCENT,
949
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
950
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
951
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
952
    0,
953
    0,
954
    0,
955
};
956
957
// U+2126 Ohm sign
958
static const uint16_t data2126 = 0x03A9 | HAS_VOWEL;
959
960
0
uint32_t getLetterData(UChar32 c) {
961
0
    if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
962
0
        return 0;
963
0
    } else if (c <= 0x3ff) {
964
0
        return data0370[c - 0x370];
965
0
    } else if (c <= 0x1fff) {
966
0
        return data1F00[c - 0x1f00];
967
0
    } else if (c == 0x2126) {
968
0
        return data2126;
969
0
    } else {
970
0
        return 0;
971
0
    }
972
0
}
973
974
0
uint32_t getDiacriticData(UChar32 c) {
975
0
    switch (c) {
976
0
    case 0x0300:  // varia
977
0
    case 0x0301:  // tonos = oxia
978
0
    case 0x0342:  // perispomeni
979
0
    case 0x0302:  // circumflex can look like perispomeni
980
0
    case 0x0303:  // tilde can look like perispomeni
981
0
    case 0x0311:  // inverted breve can look like perispomeni
982
0
        return HAS_ACCENT;
983
0
    case 0x0308:  // dialytika = diaeresis
984
0
        return HAS_COMBINING_DIALYTIKA;
985
0
    case 0x0344:  // dialytika tonos
986
0
        return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
987
0
    case 0x0345:  // ypogegrammeni = iota subscript
988
0
        return HAS_YPOGEGRAMMENI;
989
0
    case 0x0304:  // macron
990
0
    case 0x0306:  // breve
991
0
    case 0x0313:  // comma above
992
0
    case 0x0314:  // reversed comma above
993
0
    case 0x0343:  // koronis
994
0
        return HAS_OTHER_GREEK_DIACRITIC;
995
0
    default:
996
0
        return 0;
997
0
    }
998
0
}
999
1000
0
UBool isFollowedByCasedLetter(const UChar *s, int32_t i, int32_t length) {
1001
0
    while (i < length) {
1002
0
        UChar32 c;
1003
0
        U16_NEXT(s, i, length, c);
1004
0
        int32_t type = ucase_getTypeOrIgnorable(c);
1005
0
        if ((type & UCASE_IGNORABLE) != 0) {
1006
            // Case-ignorable, continue with the loop.
1007
0
        } else if (type != UCASE_NONE) {
1008
0
            return TRUE;  // Followed by cased letter.
1009
0
        } else {
1010
0
            return FALSE;  // Uncased and not case-ignorable.
1011
0
        }
1012
0
    }
1013
0
    return FALSE;  // Not followed by cased letter.
1014
0
}
1015
1016
/**
1017
 * Greek string uppercasing with a state machine.
1018
 * Probably simpler than a stateless function that has to figure out complex context-before
1019
 * for each character.
1020
 * TODO: Try to re-consolidate one way or another with the non-Greek function.
1021
 */
1022
int32_t toUpper(uint32_t options,
1023
                UChar *dest, int32_t destCapacity,
1024
                const UChar *src, int32_t srcLength,
1025
                Edits *edits,
1026
0
                UErrorCode &errorCode) {
1027
0
    int32_t destIndex=0;
1028
0
    uint32_t state = 0;
1029
0
    for (int32_t i = 0; i < srcLength;) {
1030
0
        int32_t nextIndex = i;
1031
0
        UChar32 c;
1032
0
        U16_NEXT(src, nextIndex, srcLength, c);
1033
0
        uint32_t nextState = 0;
1034
0
        int32_t type = ucase_getTypeOrIgnorable(c);
1035
0
        if ((type & UCASE_IGNORABLE) != 0) {
1036
            // c is case-ignorable
1037
0
            nextState |= (state & AFTER_CASED);
1038
0
        } else if (type != UCASE_NONE) {
1039
            // c is cased
1040
0
            nextState |= AFTER_CASED;
1041
0
        }
1042
0
        uint32_t data = getLetterData(c);
1043
0
        if (data > 0) {
1044
0
            uint32_t upper = data & UPPER_MASK;
1045
            // Add a dialytika to this iota or ypsilon vowel
1046
            // if we removed a tonos from the previous vowel,
1047
            // and that previous vowel did not also have (or gain) a dialytika.
1048
            // Adding one only to the final vowel in a longer sequence
1049
            // (which does not occur in normal writing) would require lookahead.
1050
            // Set the same flag as for preserving an existing dialytika.
1051
0
            if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
1052
0
                    (upper == 0x399 || upper == 0x3A5)) {
1053
0
                data |= HAS_DIALYTIKA;
1054
0
            }
1055
0
            int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
1056
0
            if ((data & HAS_YPOGEGRAMMENI) != 0) {
1057
0
                numYpogegrammeni = 1;
1058
0
            }
1059
            // Skip combining diacritics after this Greek letter.
1060
0
            while (nextIndex < srcLength) {
1061
0
                uint32_t diacriticData = getDiacriticData(src[nextIndex]);
1062
0
                if (diacriticData != 0) {
1063
0
                    data |= diacriticData;
1064
0
                    if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
1065
0
                        ++numYpogegrammeni;
1066
0
                    }
1067
0
                    ++nextIndex;
1068
0
                } else {
1069
0
                    break;  // not a Greek diacritic
1070
0
                }
1071
0
            }
1072
0
            if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
1073
0
                nextState |= AFTER_VOWEL_WITH_ACCENT;
1074
0
            }
1075
            // Map according to Greek rules.
1076
0
            UBool addTonos = FALSE;
1077
0
            if (upper == 0x397 &&
1078
0
                    (data & HAS_ACCENT) != 0 &&
1079
0
                    numYpogegrammeni == 0 &&
1080
0
                    (state & AFTER_CASED) == 0 &&
1081
0
                    !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
1082
                // Keep disjunctive "or" with (only) a tonos.
1083
                // We use the same "word boundary" conditions as for the Final_Sigma test.
1084
0
                if (i == nextIndex) {
1085
0
                    upper = 0x389;  // Preserve the precomposed form.
1086
0
                } else {
1087
0
                    addTonos = TRUE;
1088
0
                }
1089
0
            } else if ((data & HAS_DIALYTIKA) != 0) {
1090
                // Preserve a vowel with dialytika in precomposed form if it exists.
1091
0
                if (upper == 0x399) {
1092
0
                    upper = 0x3AA;
1093
0
                    data &= ~HAS_EITHER_DIALYTIKA;
1094
0
                } else if (upper == 0x3A5) {
1095
0
                    upper = 0x3AB;
1096
0
                    data &= ~HAS_EITHER_DIALYTIKA;
1097
0
                }
1098
0
            }
1099
1100
0
            UBool change;
1101
0
            if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) {
1102
0
                change = TRUE;  // common, simple usage
1103
0
            } else {
1104
                // Find out first whether we are changing the text.
1105
0
                change = src[i] != upper || numYpogegrammeni > 0;
1106
0
                int32_t i2 = i + 1;
1107
0
                if ((data & HAS_EITHER_DIALYTIKA) != 0) {
1108
0
                    change |= i2 >= nextIndex || src[i2] != 0x308;
1109
0
                    ++i2;
1110
0
                }
1111
0
                if (addTonos) {
1112
0
                    change |= i2 >= nextIndex || src[i2] != 0x301;
1113
0
                    ++i2;
1114
0
                }
1115
0
                int32_t oldLength = nextIndex - i;
1116
0
                int32_t newLength = (i2 - i) + numYpogegrammeni;
1117
0
                change |= oldLength != newLength;
1118
0
                if (change) {
1119
0
                    if (edits != NULL) {
1120
0
                        edits->addReplace(oldLength, newLength);
1121
0
                    }
1122
0
                } else {
1123
0
                    if (edits != NULL) {
1124
0
                        edits->addUnchanged(oldLength);
1125
0
                    }
1126
                    // Write unchanged text?
1127
0
                    change = (options & U_OMIT_UNCHANGED_TEXT) == 0;
1128
0
                }
1129
0
            }
1130
1131
0
            if (change) {
1132
0
                destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
1133
0
                if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
1134
0
                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika
1135
0
                }
1136
0
                if (destIndex >= 0 && addTonos) {
1137
0
                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
1138
0
                }
1139
0
                while (destIndex >= 0 && numYpogegrammeni > 0) {
1140
0
                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
1141
0
                    --numYpogegrammeni;
1142
0
                }
1143
0
                if(destIndex<0) {
1144
0
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1145
0
                    return 0;
1146
0
                }
1147
0
            }
1148
0
        } else {
1149
0
            const UChar *s;
1150
0
            c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
1151
0
            destIndex = appendResult(dest, destIndex, destCapacity, c, s,
1152
0
                                     nextIndex - i, options, edits);
1153
0
            if (destIndex < 0) {
1154
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1155
0
                return 0;
1156
0
            }
1157
0
        }
1158
0
        i = nextIndex;
1159
0
        state = nextState;
1160
0
    }
1161
1162
0
    return destIndex;
1163
0
}
1164
1165
}  // namespace GreekUpper
1166
U_NAMESPACE_END
1167
1168
/* functions available in the common library (for unistr_case.cpp) */
1169
1170
U_CFUNC int32_t U_CALLCONV
1171
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1172
                         UChar *dest, int32_t destCapacity,
1173
                         const UChar *src, int32_t srcLength,
1174
                         icu::Edits *edits,
1175
0
                         UErrorCode &errorCode) {
1176
0
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
1177
0
    csc.p=(void *)src;
1178
0
    csc.limit=srcLength;
1179
0
    int32_t destIndex = toLower(
1180
0
        caseLocale, options,
1181
0
        dest, destCapacity,
1182
0
        src, &csc, 0, srcLength,
1183
0
        edits, errorCode);
1184
0
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1185
0
}
1186
1187
U_CFUNC int32_t U_CALLCONV
1188
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1189
                         UChar *dest, int32_t destCapacity,
1190
                         const UChar *src, int32_t srcLength,
1191
                         icu::Edits *edits,
1192
0
                         UErrorCode &errorCode) {
1193
0
    int32_t destIndex;
1194
0
    if (caseLocale == UCASE_LOC_GREEK) {
1195
0
        destIndex = GreekUpper::toUpper(options, dest, destCapacity,
1196
0
                                        src, srcLength, edits, errorCode);
1197
0
    } else {
1198
0
        UCaseContext csc=UCASECONTEXT_INITIALIZER;
1199
0
        csc.p=(void *)src;
1200
0
        csc.limit=srcLength;
1201
0
        destIndex = toUpper(
1202
0
            caseLocale, options,
1203
0
            dest, destCapacity,
1204
0
            src, &csc, srcLength,
1205
0
            edits, errorCode);
1206
0
    }
1207
0
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1208
0
}
1209
1210
U_CFUNC int32_t U_CALLCONV
1211
ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1212
                      UChar *dest, int32_t destCapacity,
1213
                      const UChar *src, int32_t srcLength,
1214
                      icu::Edits *edits,
1215
0
                      UErrorCode &errorCode) {
1216
0
    int32_t destIndex = toLower(
1217
0
        -1, options,
1218
0
        dest, destCapacity,
1219
0
        src, nullptr, 0, srcLength,
1220
0
        edits, errorCode);
1221
0
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1222
0
}
1223
1224
U_CFUNC int32_t
1225
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1226
             UChar *dest, int32_t destCapacity,
1227
             const UChar *src, int32_t srcLength,
1228
             UStringCaseMapper *stringCaseMapper,
1229
             icu::Edits *edits,
1230
0
             UErrorCode &errorCode) {
1231
0
    int32_t destLength;
1232
1233
    /* check argument values */
1234
0
    if(U_FAILURE(errorCode)) {
1235
0
        return 0;
1236
0
    }
1237
0
    if( destCapacity<0 ||
1238
0
        (dest==NULL && destCapacity>0) ||
1239
0
        src==NULL ||
1240
0
        srcLength<-1
1241
0
    ) {
1242
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1243
0
        return 0;
1244
0
    }
1245
1246
    /* get the string length */
1247
0
    if(srcLength==-1) {
1248
0
        srcLength=u_strlen(src);
1249
0
    }
1250
1251
    /* check for overlapping source and destination */
1252
0
    if( dest!=NULL &&
1253
0
        ((src>=dest && src<(dest+destCapacity)) ||
1254
0
         (dest>=src && dest<(src+srcLength)))
1255
0
    ) {
1256
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1257
0
        return 0;
1258
0
    }
1259
1260
0
    if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
1261
0
        edits->reset();
1262
0
    }
1263
0
    destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
1264
0
                                dest, destCapacity, src, srcLength, edits, errorCode);
1265
0
    return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
1266
0
}
1267
1268
U_CFUNC int32_t
1269
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1270
                        UChar *dest, int32_t destCapacity,
1271
                        const UChar *src, int32_t srcLength,
1272
                        UStringCaseMapper *stringCaseMapper,
1273
0
                        UErrorCode &errorCode) {
1274
0
    UChar buffer[300];
1275
0
    UChar *temp;
1276
1277
0
    int32_t destLength;
1278
1279
    /* check argument values */
1280
0
    if(U_FAILURE(errorCode)) {
1281
0
        return 0;
1282
0
    }
1283
0
    if( destCapacity<0 ||
1284
0
        (dest==NULL && destCapacity>0) ||
1285
0
        src==NULL ||
1286
0
        srcLength<-1
1287
0
    ) {
1288
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1289
0
        return 0;
1290
0
    }
1291
1292
    /* get the string length */
1293
0
    if(srcLength==-1) {
1294
0
        srcLength=u_strlen(src);
1295
0
    }
1296
1297
    /* check for overlapping source and destination */
1298
0
    if( dest!=NULL &&
1299
0
        ((src>=dest && src<(dest+destCapacity)) ||
1300
0
         (dest>=src && dest<(src+srcLength)))
1301
0
    ) {
1302
        /* overlap: provide a temporary destination buffer and later copy the result */
1303
0
        if(destCapacity<=UPRV_LENGTHOF(buffer)) {
1304
            /* the stack buffer is large enough */
1305
0
            temp=buffer;
1306
0
        } else {
1307
            /* allocate a buffer */
1308
0
            temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
1309
0
            if(temp==NULL) {
1310
0
                errorCode=U_MEMORY_ALLOCATION_ERROR;
1311
0
                return 0;
1312
0
            }
1313
0
        }
1314
0
    } else {
1315
0
        temp=dest;
1316
0
    }
1317
1318
0
    destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
1319
0
                                temp, destCapacity, src, srcLength, NULL, errorCode);
1320
0
    if(temp!=dest) {
1321
        /* copy the result string to the destination buffer */
1322
0
        if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) {
1323
0
            u_memmove(dest, temp, destLength);
1324
0
        }
1325
0
        if(temp!=buffer) {
1326
0
            uprv_free(temp);
1327
0
        }
1328
0
    }
1329
1330
0
    return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
1331
0
}
1332
1333
/* public API functions */
1334
1335
U_CAPI int32_t U_EXPORT2
1336
u_strFoldCase(UChar *dest, int32_t destCapacity,
1337
              const UChar *src, int32_t srcLength,
1338
              uint32_t options,
1339
0
              UErrorCode *pErrorCode) {
1340
0
    return ustrcase_mapWithOverlap(
1341
0
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1342
0
        dest, destCapacity,
1343
0
        src, srcLength,
1344
0
        ustrcase_internalFold, *pErrorCode);
1345
0
}
1346
1347
U_NAMESPACE_BEGIN
1348
1349
int32_t CaseMap::fold(
1350
        uint32_t options,
1351
        const UChar *src, int32_t srcLength,
1352
        UChar *dest, int32_t destCapacity, Edits *edits,
1353
0
        UErrorCode &errorCode) {
1354
0
    return ustrcase_map(
1355
0
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1356
0
        dest, destCapacity,
1357
0
        src, srcLength,
1358
0
        ustrcase_internalFold, edits, errorCode);
1359
0
}
1360
1361
U_NAMESPACE_END
1362
1363
/* case-insensitive string comparisons -------------------------------------- */
1364
1365
/*
1366
 * This function is a copy of unorm_cmpEquivFold() minus the parts for
1367
 * canonical equivalence.
1368
 * Keep the functions in sync, and see there for how this works.
1369
 * The duplication is for modularization:
1370
 * It makes caseless (but not canonical caseless) matches independent of
1371
 * the normalization code.
1372
 */
1373
1374
/* stack element for previous-level source/decomposition pointers */
1375
struct CmpEquivLevel {
1376
    const UChar *start, *s, *limit;
1377
};
1378
typedef struct CmpEquivLevel CmpEquivLevel;
1379
1380
/**
1381
 * Internal implementation code comparing string with case fold.
1382
 * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().
1383
 *
1384
 * @param s1            input string 1
1385
 * @param length1       length of string 1, or -1 (NULL terminated)
1386
 * @param s2            input string 2
1387
 * @param length2       length of string 2, or -1 (NULL terminated)
1388
 * @param options       compare options
1389
 * @param matchLen1     (output) length of partial prefix match in s1
1390
 * @param matchLen2     (output) length of partial prefix match in s2
1391
 * @param pErrorCode    receives error status
1392
 * @return The result of comparison
1393
 */
1394
static int32_t _cmpFold(
1395
            const UChar *s1, int32_t length1,
1396
            const UChar *s2, int32_t length2,
1397
            uint32_t options,
1398
            int32_t *matchLen1, int32_t *matchLen2,
1399
0
            UErrorCode *pErrorCode) {
1400
0
    int32_t cmpRes = 0;
1401
1402
    /* current-level start/limit - s1/s2 as current */
1403
0
    const UChar *start1, *start2, *limit1, *limit2;
1404
1405
    /* points to the original start address */
1406
0
    const UChar *org1, *org2;
1407
1408
    /* points to the end of match + 1 */
1409
0
    const UChar *m1, *m2;
1410
1411
    /* case folding variables */
1412
0
    const UChar *p;
1413
0
    int32_t length;
1414
1415
    /* stacks of previous-level start/current/limit */
1416
0
    CmpEquivLevel stack1[2], stack2[2];
1417
1418
    /* case folding buffers, only use current-level start/limit */
1419
0
    UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
1420
1421
    /* track which is the current level per string */
1422
0
    int32_t level1, level2;
1423
1424
    /* current code units, and code points for lookups */
1425
0
    UChar32 c1, c2, cp1, cp2;
1426
1427
    /* no argument error checking because this itself is not an API */
1428
1429
    /*
1430
     * assume that at least the option U_COMPARE_IGNORE_CASE is set
1431
     * otherwise this function would have to behave exactly as uprv_strCompare()
1432
     */
1433
0
    if(U_FAILURE(*pErrorCode)) {
1434
0
        return 0;
1435
0
    }
1436
1437
    /* initialize */
1438
0
    if(matchLen1) {
1439
0
        U_ASSERT(matchLen2 !=NULL);
1440
0
        *matchLen1=0;
1441
0
        *matchLen2=0;
1442
0
    }
1443
1444
0
    start1=m1=org1=s1;
1445
0
    if(length1==-1) {
1446
0
        limit1=NULL;
1447
0
    } else {
1448
0
        limit1=s1+length1;
1449
0
    }
1450
1451
0
    start2=m2=org2=s2;
1452
0
    if(length2==-1) {
1453
0
        limit2=NULL;
1454
0
    } else {
1455
0
        limit2=s2+length2;
1456
0
    }
1457
1458
0
    level1=level2=0;
1459
0
    c1=c2=-1;
1460
1461
    /* comparison loop */
1462
0
    for(;;) {
1463
        /*
1464
         * here a code unit value of -1 means "get another code unit"
1465
         * below it will mean "this source is finished"
1466
         */
1467
1468
0
        if(c1<0) {
1469
            /* get next code unit from string 1, post-increment */
1470
0
            for(;;) {
1471
0
                if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
1472
0
                    if(level1==0) {
1473
0
                        c1=-1;
1474
0
                        break;
1475
0
                    }
1476
0
                } else {
1477
0
                    ++s1;
1478
0
                    break;
1479
0
                }
1480
1481
                /* reached end of level buffer, pop one level */
1482
0
                do {
1483
0
                    --level1;
1484
0
                    start1=stack1[level1].start;    /*Not uninitialized*/
1485
0
                } while(start1==NULL);
1486
0
                s1=stack1[level1].s;                /*Not uninitialized*/
1487
0
                limit1=stack1[level1].limit;        /*Not uninitialized*/
1488
0
            }
1489
0
        }
1490
1491
0
        if(c2<0) {
1492
            /* get next code unit from string 2, post-increment */
1493
0
            for(;;) {
1494
0
                if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
1495
0
                    if(level2==0) {
1496
0
                        c2=-1;
1497
0
                        break;
1498
0
                    }
1499
0
                } else {
1500
0
                    ++s2;
1501
0
                    break;
1502
0
                }
1503
1504
                /* reached end of level buffer, pop one level */
1505
0
                do {
1506
0
                    --level2;
1507
0
                    start2=stack2[level2].start;    /*Not uninitialized*/
1508
0
                } while(start2==NULL);
1509
0
                s2=stack2[level2].s;                /*Not uninitialized*/
1510
0
                limit2=stack2[level2].limit;        /*Not uninitialized*/
1511
0
            }
1512
0
        }
1513
1514
        /*
1515
         * compare c1 and c2
1516
         * either variable c1, c2 is -1 only if the corresponding string is finished
1517
         */
1518
0
        if(c1==c2) {
1519
0
            const UChar *next1, *next2;
1520
1521
0
            if(c1<0) {
1522
0
                cmpRes=0;   /* c1==c2==-1 indicating end of strings */
1523
0
                break;
1524
0
            }
1525
1526
            /*
1527
             * Note: Move the match positions in both strings at the same time
1528
             *      only when corresponding code point(s) in the original strings
1529
             *      are fully consumed. For example, when comparing s1="Fust" and
1530
             *      s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches
1531
             *      the first code point in the case-folded data. But the second "s"
1532
             *      has no matching code point in s1, so this implementation returns
1533
             *      2 as the prefix match length ("Fu").
1534
             */
1535
0
            next1=next2=NULL;
1536
0
            if(level1==0) {
1537
0
                next1=s1;
1538
0
            } else if(s1==limit1) {
1539
                /* Note: This implementation only use a single level of stack.
1540
                 *      If this code needs to be changed to use multiple levels
1541
                 *      of stacks, the code above should check if the current
1542
                 *      code is at the end of all stacks.
1543
                 */
1544
0
                U_ASSERT(level1==1);
1545
1546
                /* is s1 at the end of the current stack? */
1547
0
                next1=stack1[0].s;
1548
0
            }
1549
1550
0
            if (next1!=NULL) {
1551
0
                if(level2==0) {
1552
0
                    next2=s2;
1553
0
                } else if(s2==limit2) {
1554
0
                    U_ASSERT(level2==1);
1555
1556
                    /* is s2 at the end of the current stack? */
1557
0
                    next2=stack2[0].s;
1558
0
                }
1559
0
                if(next2!=NULL) {
1560
0
                    m1=next1;
1561
0
                    m2=next2;
1562
0
                }
1563
0
            }
1564
0
            c1=c2=-1;       /* make us fetch new code units */
1565
0
            continue;
1566
0
        } else if(c1<0) {
1567
0
            cmpRes=-1;      /* string 1 ends before string 2 */
1568
0
            break;
1569
0
        } else if(c2<0) {
1570
0
            cmpRes=1;       /* string 2 ends before string 1 */
1571
0
            break;
1572
0
        }
1573
        /* c1!=c2 && c1>=0 && c2>=0 */
1574
1575
        /* get complete code points for c1, c2 for lookups if either is a surrogate */
1576
0
        cp1=c1;
1577
0
        if(U_IS_SURROGATE(c1)) {
1578
0
            UChar c;
1579
1580
0
            if(U_IS_SURROGATE_LEAD(c1)) {
1581
0
                if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
1582
                    /* advance ++s1; only below if cp1 decomposes/case-folds */
1583
0
                    cp1=U16_GET_SUPPLEMENTARY(c1, c);
1584
0
                }
1585
0
            } else /* isTrail(c1) */ {
1586
0
                if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
1587
0
                    cp1=U16_GET_SUPPLEMENTARY(c, c1);
1588
0
                }
1589
0
            }
1590
0
        }
1591
1592
0
        cp2=c2;
1593
0
        if(U_IS_SURROGATE(c2)) {
1594
0
            UChar c;
1595
1596
0
            if(U_IS_SURROGATE_LEAD(c2)) {
1597
0
                if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
1598
                    /* advance ++s2; only below if cp2 decomposes/case-folds */
1599
0
                    cp2=U16_GET_SUPPLEMENTARY(c2, c);
1600
0
                }
1601
0
            } else /* isTrail(c2) */ {
1602
0
                if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
1603
0
                    cp2=U16_GET_SUPPLEMENTARY(c, c2);
1604
0
                }
1605
0
            }
1606
0
        }
1607
1608
        /*
1609
         * go down one level for each string
1610
         * continue with the main loop as soon as there is a real change
1611
         */
1612
1613
0
        if( level1==0 &&
1614
0
            (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
1615
0
        ) {
1616
            /* cp1 case-folds to the code point "length" or to p[length] */
1617
0
            if(U_IS_SURROGATE(c1)) {
1618
0
                if(U_IS_SURROGATE_LEAD(c1)) {
1619
                    /* advance beyond source surrogate pair if it case-folds */
1620
0
                    ++s1;
1621
0
                } else /* isTrail(c1) */ {
1622
                    /*
1623
                     * we got a supplementary code point when hitting its trail surrogate,
1624
                     * therefore the lead surrogate must have been the same as in the other string;
1625
                     * compare this decomposition with the lead surrogate in the other string
1626
                     * remember that this simulates bulk text replacement:
1627
                     * the decomposition would replace the entire code point
1628
                     */
1629
0
                    --s2;
1630
0
                    --m2;
1631
0
                    c2=*(s2-1);
1632
0
                }
1633
0
            }
1634
1635
            /* push current level pointers */
1636
0
            stack1[0].start=start1;
1637
0
            stack1[0].s=s1;
1638
0
            stack1[0].limit=limit1;
1639
0
            ++level1;
1640
1641
            /* copy the folding result to fold1[] */
1642
0
            if(length<=UCASE_MAX_STRING_LENGTH) {
1643
0
                u_memcpy(fold1, p, length);
1644
0
            } else {
1645
0
                int32_t i=0;
1646
0
                U16_APPEND_UNSAFE(fold1, i, length);
1647
0
                length=i;
1648
0
            }
1649
1650
            /* set next level pointers to case folding */
1651
0
            start1=s1=fold1;
1652
0
            limit1=fold1+length;
1653
1654
            /* get ready to read from decomposition, continue with loop */
1655
0
            c1=-1;
1656
0
            continue;
1657
0
        }
1658
1659
0
        if( level2==0 &&
1660
0
            (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
1661
0
        ) {
1662
            /* cp2 case-folds to the code point "length" or to p[length] */
1663
0
            if(U_IS_SURROGATE(c2)) {
1664
0
                if(U_IS_SURROGATE_LEAD(c2)) {
1665
                    /* advance beyond source surrogate pair if it case-folds */
1666
0
                    ++s2;
1667
0
                } else /* isTrail(c2) */ {
1668
                    /*
1669
                     * we got a supplementary code point when hitting its trail surrogate,
1670
                     * therefore the lead surrogate must have been the same as in the other string;
1671
                     * compare this decomposition with the lead surrogate in the other string
1672
                     * remember that this simulates bulk text replacement:
1673
                     * the decomposition would replace the entire code point
1674
                     */
1675
0
                    --s1;
1676
0
                    --m2;
1677
0
                    c1=*(s1-1);
1678
0
                }
1679
0
            }
1680
1681
            /* push current level pointers */
1682
0
            stack2[0].start=start2;
1683
0
            stack2[0].s=s2;
1684
0
            stack2[0].limit=limit2;
1685
0
            ++level2;
1686
1687
            /* copy the folding result to fold2[] */
1688
0
            if(length<=UCASE_MAX_STRING_LENGTH) {
1689
0
                u_memcpy(fold2, p, length);
1690
0
            } else {
1691
0
                int32_t i=0;
1692
0
                U16_APPEND_UNSAFE(fold2, i, length);
1693
0
                length=i;
1694
0
            }
1695
1696
            /* set next level pointers to case folding */
1697
0
            start2=s2=fold2;
1698
0
            limit2=fold2+length;
1699
1700
            /* get ready to read from decomposition, continue with loop */
1701
0
            c2=-1;
1702
0
            continue;
1703
0
        }
1704
1705
        /*
1706
         * no decomposition/case folding, max level for both sides:
1707
         * return difference result
1708
         *
1709
         * code point order comparison must not just return cp1-cp2
1710
         * because when single surrogates are present then the surrogate pairs
1711
         * that formed cp1 and cp2 may be from different string indexes
1712
         *
1713
         * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
1714
         * c1=d800 cp1=10001 c2=dc00 cp2=10000
1715
         * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
1716
         *
1717
         * therefore, use same fix-up as in ustring.c/uprv_strCompare()
1718
         * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
1719
         * so we have slightly different pointer/start/limit comparisons here
1720
         */
1721
1722
0
        if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
1723
            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
1724
0
            if(
1725
0
                (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
1726
0
                (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
1727
0
            ) {
1728
                /* part of a surrogate pair, leave >=d800 */
1729
0
            } else {
1730
                /* BMP code point - may be surrogate code point - make <d800 */
1731
0
                c1-=0x2800;
1732
0
            }
1733
1734
0
            if(
1735
0
                (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
1736
0
                (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
1737
0
            ) {
1738
                /* part of a surrogate pair, leave >=d800 */
1739
0
            } else {
1740
                /* BMP code point - may be surrogate code point - make <d800 */
1741
0
                c2-=0x2800;
1742
0
            }
1743
0
        }
1744
1745
0
        cmpRes=c1-c2;
1746
0
        break;
1747
0
    }
1748
1749
0
    if(matchLen1) {
1750
0
        *matchLen1=static_cast<int32_t>(m1-org1);
1751
0
        *matchLen2=static_cast<int32_t>(m2-org2);
1752
0
    }
1753
0
    return cmpRes;
1754
0
}
1755
1756
/* internal function */
1757
U_CFUNC int32_t
1758
u_strcmpFold(const UChar *s1, int32_t length1,
1759
             const UChar *s2, int32_t length2,
1760
             uint32_t options,
1761
0
             UErrorCode *pErrorCode) {
1762
0
    return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode);
1763
0
}
1764
1765
/* public API functions */
1766
1767
U_CAPI int32_t U_EXPORT2
1768
u_strCaseCompare(const UChar *s1, int32_t length1,
1769
                 const UChar *s2, int32_t length2,
1770
                 uint32_t options,
1771
0
                 UErrorCode *pErrorCode) {
1772
    /* argument checking */
1773
0
    if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
1774
0
        return 0;
1775
0
    }
1776
0
    if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
1777
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1778
0
        return 0;
1779
0
    }
1780
0
    return u_strcmpFold(s1, length1, s2, length2,
1781
0
                        options|U_COMPARE_IGNORE_CASE,
1782
0
                        pErrorCode);
1783
0
}
1784
1785
U_CAPI int32_t U_EXPORT2
1786
0
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
1787
0
    UErrorCode errorCode=U_ZERO_ERROR;
1788
0
    return u_strcmpFold(s1, -1, s2, -1,
1789
0
                        options|U_COMPARE_IGNORE_CASE,
1790
0
                        &errorCode);
1791
0
}
1792
1793
U_CAPI int32_t U_EXPORT2
1794
0
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
1795
0
    UErrorCode errorCode=U_ZERO_ERROR;
1796
0
    return u_strcmpFold(s1, length, s2, length,
1797
0
                        options|U_COMPARE_IGNORE_CASE,
1798
0
                        &errorCode);
1799
0
}
1800
1801
U_CAPI int32_t U_EXPORT2
1802
0
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
1803
0
    UErrorCode errorCode=U_ZERO_ERROR;
1804
0
    return u_strcmpFold(s1, n, s2, n,
1805
0
                        options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
1806
0
                        &errorCode);
1807
0
}
1808
1809
/* internal API - detect length of shared prefix */
1810
U_CAPI void
1811
u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
1812
                             const UChar *s2, int32_t length2,
1813
                             uint32_t options,
1814
                             int32_t *matchLen1, int32_t *matchLen2,
1815
0
                             UErrorCode *pErrorCode) {
1816
0
    _cmpFold(s1, length1, s2, length2, options,
1817
0
        matchLen1, matchLen2, pErrorCode);
1818
0
}