Coverage Report

Created: 2023-06-07 07:17

/src/icu/source/common/ustrcase.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2001-2015, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  ustrcase.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2002feb20
16
*   created by: Markus W. Scherer
17
*
18
*   Implementation file for string casing C API functions.
19
*   Uses functions from uchar.c for basic functionality that requires access
20
*   to the Unicode Character Database (uprops.dat).
21
*/
22
23
#include "unicode/utypes.h"
24
#include "unicode/brkiter.h"
25
#include "unicode/casemap.h"
26
#include "unicode/edits.h"
27
#include "unicode/ustring.h"
28
#include "unicode/ucasemap.h"
29
#include "unicode/ubrk.h"
30
#include "unicode/utf.h"
31
#include "unicode/utf16.h"
32
#include "cmemory.h"
33
#include "ucase.h"
34
#include "ucasemap_imp.h"
35
#include "ustr_imp.h"
36
#include "uassert.h"
37
38
U_NAMESPACE_BEGIN
39
40
namespace {
41
42
int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
43
3.57k
                                   Edits *edits, UErrorCode &errorCode) {
44
3.57k
    if (U_SUCCESS(errorCode)) {
45
3.57k
        if (destIndex > destCapacity) {
46
0
            errorCode = U_BUFFER_OVERFLOW_ERROR;
47
3.57k
        } else if (edits != NULL) {
48
0
            edits->copyErrorTo(errorCode);
49
0
        }
50
3.57k
    }
51
3.57k
    return destIndex;
52
3.57k
}
53
54
}  // namespace
55
56
U_NAMESPACE_END
57
58
U_NAMESPACE_USE
59
60
/* string casing ------------------------------------------------------------ */
61
62
/* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */
63
static inline int32_t
64
appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,
65
             int32_t result, const UChar *s,
66
7.31M
             int32_t cpLength, uint32_t options, icu::Edits *edits) {
67
7.31M
    UChar32 c;
68
7.31M
    int32_t length;
69
70
    /* decode the result */
71
7.31M
    if(result<0) {
72
        /* (not) original code point */
73
6.79M
        if(edits!=NULL) {
74
0
            edits->addUnchanged(cpLength);
75
0
            if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
76
0
                return destIndex;
77
0
            }
78
0
        }
79
6.79M
        c=~result;
80
6.79M
        if(destIndex<destCapacity && c<=0xffff) {  // BMP slightly-fastpath
81
6.77M
            dest[destIndex++]=(UChar)c;
82
6.77M
            return destIndex;
83
6.77M
        }
84
19.4k
        length=cpLength;
85
516k
    } else {
86
516k
        if(result<=UCASE_MAX_STRING_LENGTH) {
87
128k
            c=U_SENTINEL;
88
128k
            length=result;
89
388k
        } else if(destIndex<destCapacity && result<=0xffff) {  // BMP slightly-fastpath
90
383k
            dest[destIndex++]=(UChar)result;
91
383k
            if(edits!=NULL) {
92
0
                edits->addReplace(cpLength, 1);
93
0
            }
94
383k
            return destIndex;
95
383k
        } else {
96
5.29k
            c=result;
97
5.29k
            length=U16_LENGTH(c);
98
5.29k
        }
99
133k
        if(edits!=NULL) {
100
0
            edits->addReplace(cpLength, length);
101
0
        }
102
133k
    }
103
152k
    if(length>(INT32_MAX-destIndex)) {
104
0
        return -1;  // integer overflow
105
0
    }
106
107
152k
    if(destIndex<destCapacity) {
108
        /* append the result */
109
152k
        if(c>=0) {
110
            /* code point */
111
24.7k
            UBool isError=FALSE;
112
24.7k
            U16_APPEND(dest, destIndex, destCapacity, c, isError);
113
24.7k
            if(isError) {
114
                /* overflow, nothing written */
115
0
                destIndex+=length;
116
0
            }
117
128k
        } else {
118
            /* string */
119
128k
            if((destIndex+length)<=destCapacity) {
120
384k
                while(length>0) {
121
256k
                    dest[destIndex++]=*s++;
122
256k
                    --length;
123
256k
                }
124
128k
            } else {
125
                /* overflow */
126
0
                destIndex+=length;
127
0
            }
128
128k
        }
129
152k
    } else {
130
        /* preflight */
131
0
        destIndex+=length;
132
0
    }
133
152k
    return destIndex;
134
152k
}
135
136
static inline int32_t
137
0
appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
138
0
    if(destIndex<destCapacity) {
139
0
        dest[destIndex]=c;
140
0
    } else if(destIndex==INT32_MAX) {
141
0
        return -1;  // integer overflow
142
0
    }
143
0
    return destIndex+1;
144
0
}
145
146
static inline int32_t
147
appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,
148
0
                const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) {
149
0
    if(length>0) {
150
0
        if(edits!=NULL) {
151
0
            edits->addUnchanged(length);
152
0
            if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
153
0
                return destIndex;
154
0
            }
155
0
        }
156
0
        if(length>(INT32_MAX-destIndex)) {
157
0
            return -1;  // integer overflow
158
0
        }
159
0
        if((destIndex+length)<=destCapacity) {
160
0
            u_memcpy(dest+destIndex, s, length);
161
0
        }
162
0
        destIndex+=length;
163
0
    }
164
0
    return destIndex;
165
0
}
166
167
static UChar32 U_CALLCONV
168
58.2k
utf16_caseContextIterator(void *context, int8_t dir) {
169
58.2k
    UCaseContext *csc=(UCaseContext *)context;
170
58.2k
    UChar32 c;
171
172
58.2k
    if(dir<0) {
173
        /* reset for backward iteration */
174
25.6k
        csc->index=csc->cpStart;
175
25.6k
        csc->dir=dir;
176
32.6k
    } else if(dir>0) {
177
        /* reset for forward iteration */
178
28.7k
        csc->index=csc->cpLimit;
179
28.7k
        csc->dir=dir;
180
28.7k
    } else {
181
        /* continue current iteration direction */
182
3.90k
        dir=csc->dir;
183
3.90k
    }
184
185
58.2k
    if(dir<0) {
186
26.6k
        if(csc->start<csc->index) {
187
26.5k
            U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);
188
26.5k
            return c;
189
26.5k
        }
190
31.5k
    } else {
191
31.5k
        if(csc->index<csc->limit) {
192
31.4k
            U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);
193
31.4k
            return c;
194
31.4k
        }
195
31.5k
    }
196
260
    return U_SENTINEL;
197
58.2k
}
198
199
/*
200
 * Case-maps [srcStart..srcLimit[ but takes
201
 * context [0..srcLength[ into account.
202
 */
203
static int32_t
204
_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
205
         UChar *dest, int32_t destCapacity,
206
         const UChar *src, UCaseContext *csc,
207
         int32_t srcStart, int32_t srcLimit,
208
         icu::Edits *edits,
209
3.57k
         UErrorCode &errorCode) {
210
    /* case mapping loop */
211
3.57k
    int32_t srcIndex=srcStart;
212
3.57k
    int32_t destIndex=0;
213
7.31M
    while(srcIndex<srcLimit) {
214
7.31M
        int32_t cpStart;
215
7.31M
        csc->cpStart=cpStart=srcIndex;
216
7.31M
        UChar32 c;
217
7.31M
        U16_NEXT(src, srcIndex, srcLimit, c);
218
7.31M
        csc->cpLimit=srcIndex;
219
7.31M
        const UChar *s;
220
7.31M
        c=map(c, utf16_caseContextIterator, csc, &s, caseLocale);
221
7.31M
        destIndex = appendResult(dest, destIndex, destCapacity, c, s,
222
7.31M
                                 srcIndex - cpStart, options, edits);
223
7.31M
        if (destIndex < 0) {
224
0
            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
225
0
            return 0;
226
0
        }
227
7.31M
    }
228
229
3.57k
    return destIndex;
230
3.57k
}
231
232
#if !UCONFIG_NO_BREAK_ITERATION
233
234
U_CFUNC int32_t U_CALLCONV
235
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,
236
                         UChar *dest, int32_t destCapacity,
237
                         const UChar *src, int32_t srcLength,
238
                         icu::Edits *edits,
239
0
                         UErrorCode &errorCode) {
240
0
    if(U_FAILURE(errorCode)) {
241
0
        return 0;
242
0
    }
243
244
    /* set up local variables */
245
0
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
246
0
    csc.p=(void *)src;
247
0
    csc.limit=srcLength;
248
0
    int32_t destIndex=0;
249
0
    int32_t prev=0;
250
0
    UBool isFirstIndex=TRUE;
251
252
    /* titlecasing loop */
253
0
    while(prev<srcLength) {
254
        /* find next index where to titlecase */
255
0
        int32_t index;
256
0
        if(isFirstIndex) {
257
0
            isFirstIndex=FALSE;
258
0
            index=iter->first();
259
0
        } else {
260
0
            index=iter->next();
261
0
        }
262
0
        if(index==UBRK_DONE || index>srcLength) {
263
0
            index=srcLength;
264
0
        }
265
266
        /*
267
         * Unicode 4 & 5 section 3.13 Default Case Operations:
268
         *
269
         * R3  toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
270
         * #29, "Text Boundaries." Between each pair of word boundaries, find the first
271
         * cased character F. If F exists, map F to default_title(F); then map each
272
         * subsequent character C to default_lower(C).
273
         *
274
         * In this implementation, segment [prev..index[ into 3 parts:
275
         * a) uncased characters (copy as-is) [prev..titleStart[
276
         * b) first case letter (titlecase)         [titleStart..titleLimit[
277
         * c) subsequent characters (lowercase)                 [titleLimit..index[
278
         */
279
0
        if(prev<index) {
280
            /* find and copy uncased characters [prev..titleStart[ */
281
0
            int32_t titleStart=prev;
282
0
            int32_t titleLimit=prev;
283
0
            UChar32 c;
284
0
            U16_NEXT(src, titleLimit, index, c);
285
0
            if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) {
286
                /* Adjust the titlecasing index (titleStart) to the next cased character. */
287
0
                for(;;) {
288
0
                    titleStart=titleLimit;
289
0
                    if(titleLimit==index) {
290
                        /*
291
                         * only uncased characters in [prev..index[
292
                         * stop with titleStart==titleLimit==index
293
                         */
294
0
                        break;
295
0
                    }
296
0
                    U16_NEXT(src, titleLimit, index, c);
297
0
                    if(UCASE_NONE!=ucase_getType(c)) {
298
0
                        break; /* cased letter at [titleStart..titleLimit[ */
299
0
                    }
300
0
                }
301
0
                destIndex=appendUnchanged(dest, destIndex, destCapacity,
302
0
                                          src+prev, titleStart-prev, options, edits);
303
0
                if(destIndex<0) {
304
0
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
305
0
                    return 0;
306
0
                }
307
0
            }
308
309
0
            if(titleStart<titleLimit) {
310
                /* titlecase c which is from [titleStart..titleLimit[ */
311
0
                csc.cpStart=titleStart;
312
0
                csc.cpLimit=titleLimit;
313
0
                const UChar *s;
314
0
                c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale);
315
0
                destIndex=appendResult(dest, destIndex, destCapacity, c, s,
316
0
                                       titleLimit-titleStart, options, edits);
317
0
                if(destIndex<0) {
318
0
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
319
0
                    return 0;
320
0
                }
321
322
                /* Special case Dutch IJ titlecasing */
323
0
                if (titleStart+1 < index &&
324
0
                        caseLocale == UCASE_LOC_DUTCH &&
325
0
                        (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
326
0
                    if (src[titleStart+1] == 0x006A) {
327
0
                        destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
328
0
                        if(destIndex<0) {
329
0
                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
330
0
                            return 0;
331
0
                        }
332
0
                        if(edits!=NULL) {
333
0
                            edits->addReplace(1, 1);
334
0
                        }
335
0
                        titleLimit++;
336
0
                    } else if (src[titleStart+1] == 0x004A) {
337
                        // Keep the capital J from getting lowercased.
338
0
                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
339
0
                                                  src+titleStart+1, 1, options, edits);
340
0
                        if(destIndex<0) {
341
0
                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
342
0
                            return 0;
343
0
                        }
344
0
                        titleLimit++;
345
0
                    }
346
0
                }
347
348
                /* lowercase [titleLimit..index[ */
349
0
                if(titleLimit<index) {
350
0
                    if((options&U_TITLECASE_NO_LOWERCASE)==0) {
351
                        /* Normal operation: Lowercase the rest of the word. */
352
0
                        destIndex+=
353
0
                            _caseMap(
354
0
                                caseLocale, options, ucase_toFullLower,
355
0
                                dest+destIndex, destCapacity-destIndex,
356
0
                                src, &csc,
357
0
                                titleLimit, index,
358
0
                                edits, errorCode);
359
0
                        if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
360
0
                            errorCode=U_ZERO_ERROR;
361
0
                        }
362
0
                        if(U_FAILURE(errorCode)) {
363
0
                            return destIndex;
364
0
                        }
365
0
                    } else {
366
                        /* Optionally just copy the rest of the word unchanged. */
367
0
                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
368
0
                                                  src+titleLimit, index-titleLimit, options, edits);
369
0
                        if(destIndex<0) {
370
0
                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
371
0
                            return 0;
372
0
                        }
373
0
                    }
374
0
                }
375
0
            }
376
0
        }
377
378
0
        prev=index;
379
0
    }
380
381
0
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
382
0
}
383
384
#endif  // !UCONFIG_NO_BREAK_ITERATION
385
386
U_NAMESPACE_BEGIN
387
namespace GreekUpper {
388
389
// Data generated by prototype code, see
390
// http://site.icu-project.org/design/case/greek-upper
391
// TODO: Move this data into ucase.icu.
392
static const uint16_t data0370[] = {
393
    // U+0370..03FF
394
    0x0370,
395
    0x0370,
396
    0x0372,
397
    0x0372,
398
    0,
399
    0,
400
    0x0376,
401
    0x0376,
402
    0,
403
    0,
404
    0x037A,
405
    0x03FD,
406
    0x03FE,
407
    0x03FF,
408
    0,
409
    0x037F,
410
    0,
411
    0,
412
    0,
413
    0,
414
    0,
415
    0,
416
    0x0391 | HAS_VOWEL | HAS_ACCENT,
417
    0,
418
    0x0395 | HAS_VOWEL | HAS_ACCENT,
419
    0x0397 | HAS_VOWEL | HAS_ACCENT,
420
    0x0399 | HAS_VOWEL | HAS_ACCENT,
421
    0,
422
    0x039F | HAS_VOWEL | HAS_ACCENT,
423
    0,
424
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
425
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
426
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
427
    0x0391 | HAS_VOWEL,
428
    0x0392,
429
    0x0393,
430
    0x0394,
431
    0x0395 | HAS_VOWEL,
432
    0x0396,
433
    0x0397 | HAS_VOWEL,
434
    0x0398,
435
    0x0399 | HAS_VOWEL,
436
    0x039A,
437
    0x039B,
438
    0x039C,
439
    0x039D,
440
    0x039E,
441
    0x039F | HAS_VOWEL,
442
    0x03A0,
443
    0x03A1,
444
    0,
445
    0x03A3,
446
    0x03A4,
447
    0x03A5 | HAS_VOWEL,
448
    0x03A6,
449
    0x03A7,
450
    0x03A8,
451
    0x03A9 | HAS_VOWEL,
452
    0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
453
    0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
454
    0x0391 | HAS_VOWEL | HAS_ACCENT,
455
    0x0395 | HAS_VOWEL | HAS_ACCENT,
456
    0x0397 | HAS_VOWEL | HAS_ACCENT,
457
    0x0399 | HAS_VOWEL | HAS_ACCENT,
458
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
459
    0x0391 | HAS_VOWEL,
460
    0x0392,
461
    0x0393,
462
    0x0394,
463
    0x0395 | HAS_VOWEL,
464
    0x0396,
465
    0x0397 | HAS_VOWEL,
466
    0x0398,
467
    0x0399 | HAS_VOWEL,
468
    0x039A,
469
    0x039B,
470
    0x039C,
471
    0x039D,
472
    0x039E,
473
    0x039F | HAS_VOWEL,
474
    0x03A0,
475
    0x03A1,
476
    0x03A3,
477
    0x03A3,
478
    0x03A4,
479
    0x03A5 | HAS_VOWEL,
480
    0x03A6,
481
    0x03A7,
482
    0x03A8,
483
    0x03A9 | HAS_VOWEL,
484
    0x0399 | HAS_VOWEL | HAS_DIALYTIKA,
485
    0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,
486
    0x039F | HAS_VOWEL | HAS_ACCENT,
487
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
488
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
489
    0x03CF,
490
    0x0392,
491
    0x0398,
492
    0x03D2,
493
    0x03D2 | HAS_ACCENT,
494
    0x03D2 | HAS_DIALYTIKA,
495
    0x03A6,
496
    0x03A0,
497
    0x03CF,
498
    0x03D8,
499
    0x03D8,
500
    0x03DA,
501
    0x03DA,
502
    0x03DC,
503
    0x03DC,
504
    0x03DE,
505
    0x03DE,
506
    0x03E0,
507
    0x03E0,
508
    0,
509
    0,
510
    0,
511
    0,
512
    0,
513
    0,
514
    0,
515
    0,
516
    0,
517
    0,
518
    0,
519
    0,
520
    0,
521
    0,
522
    0x039A,
523
    0x03A1,
524
    0x03F9,
525
    0x037F,
526
    0x03F4,
527
    0x0395 | HAS_VOWEL,
528
    0,
529
    0x03F7,
530
    0x03F7,
531
    0x03F9,
532
    0x03FA,
533
    0x03FA,
534
    0x03FC,
535
    0x03FD,
536
    0x03FE,
537
    0x03FF,
538
};
539
540
static const uint16_t data1F00[] = {
541
    // U+1F00..1FFF
542
    0x0391 | HAS_VOWEL,
543
    0x0391 | HAS_VOWEL,
544
    0x0391 | HAS_VOWEL | HAS_ACCENT,
545
    0x0391 | HAS_VOWEL | HAS_ACCENT,
546
    0x0391 | HAS_VOWEL | HAS_ACCENT,
547
    0x0391 | HAS_VOWEL | HAS_ACCENT,
548
    0x0391 | HAS_VOWEL | HAS_ACCENT,
549
    0x0391 | HAS_VOWEL | HAS_ACCENT,
550
    0x0391 | HAS_VOWEL,
551
    0x0391 | HAS_VOWEL,
552
    0x0391 | HAS_VOWEL | HAS_ACCENT,
553
    0x0391 | HAS_VOWEL | HAS_ACCENT,
554
    0x0391 | HAS_VOWEL | HAS_ACCENT,
555
    0x0391 | HAS_VOWEL | HAS_ACCENT,
556
    0x0391 | HAS_VOWEL | HAS_ACCENT,
557
    0x0391 | HAS_VOWEL | HAS_ACCENT,
558
    0x0395 | HAS_VOWEL,
559
    0x0395 | HAS_VOWEL,
560
    0x0395 | HAS_VOWEL | HAS_ACCENT,
561
    0x0395 | HAS_VOWEL | HAS_ACCENT,
562
    0x0395 | HAS_VOWEL | HAS_ACCENT,
563
    0x0395 | HAS_VOWEL | HAS_ACCENT,
564
    0,
565
    0,
566
    0x0395 | HAS_VOWEL,
567
    0x0395 | HAS_VOWEL,
568
    0x0395 | HAS_VOWEL | HAS_ACCENT,
569
    0x0395 | HAS_VOWEL | HAS_ACCENT,
570
    0x0395 | HAS_VOWEL | HAS_ACCENT,
571
    0x0395 | HAS_VOWEL | HAS_ACCENT,
572
    0,
573
    0,
574
    0x0397 | HAS_VOWEL,
575
    0x0397 | HAS_VOWEL,
576
    0x0397 | HAS_VOWEL | HAS_ACCENT,
577
    0x0397 | HAS_VOWEL | HAS_ACCENT,
578
    0x0397 | HAS_VOWEL | HAS_ACCENT,
579
    0x0397 | HAS_VOWEL | HAS_ACCENT,
580
    0x0397 | HAS_VOWEL | HAS_ACCENT,
581
    0x0397 | HAS_VOWEL | HAS_ACCENT,
582
    0x0397 | HAS_VOWEL,
583
    0x0397 | HAS_VOWEL,
584
    0x0397 | HAS_VOWEL | HAS_ACCENT,
585
    0x0397 | HAS_VOWEL | HAS_ACCENT,
586
    0x0397 | HAS_VOWEL | HAS_ACCENT,
587
    0x0397 | HAS_VOWEL | HAS_ACCENT,
588
    0x0397 | HAS_VOWEL | HAS_ACCENT,
589
    0x0397 | HAS_VOWEL | HAS_ACCENT,
590
    0x0399 | HAS_VOWEL,
591
    0x0399 | HAS_VOWEL,
592
    0x0399 | HAS_VOWEL | HAS_ACCENT,
593
    0x0399 | HAS_VOWEL | HAS_ACCENT,
594
    0x0399 | HAS_VOWEL | HAS_ACCENT,
595
    0x0399 | HAS_VOWEL | HAS_ACCENT,
596
    0x0399 | HAS_VOWEL | HAS_ACCENT,
597
    0x0399 | HAS_VOWEL | HAS_ACCENT,
598
    0x0399 | HAS_VOWEL,
599
    0x0399 | HAS_VOWEL,
600
    0x0399 | HAS_VOWEL | HAS_ACCENT,
601
    0x0399 | HAS_VOWEL | HAS_ACCENT,
602
    0x0399 | HAS_VOWEL | HAS_ACCENT,
603
    0x0399 | HAS_VOWEL | HAS_ACCENT,
604
    0x0399 | HAS_VOWEL | HAS_ACCENT,
605
    0x0399 | HAS_VOWEL | HAS_ACCENT,
606
    0x039F | HAS_VOWEL,
607
    0x039F | HAS_VOWEL,
608
    0x039F | HAS_VOWEL | HAS_ACCENT,
609
    0x039F | HAS_VOWEL | HAS_ACCENT,
610
    0x039F | HAS_VOWEL | HAS_ACCENT,
611
    0x039F | HAS_VOWEL | HAS_ACCENT,
612
    0,
613
    0,
614
    0x039F | HAS_VOWEL,
615
    0x039F | HAS_VOWEL,
616
    0x039F | HAS_VOWEL | HAS_ACCENT,
617
    0x039F | HAS_VOWEL | HAS_ACCENT,
618
    0x039F | HAS_VOWEL | HAS_ACCENT,
619
    0x039F | HAS_VOWEL | HAS_ACCENT,
620
    0,
621
    0,
622
    0x03A5 | HAS_VOWEL,
623
    0x03A5 | HAS_VOWEL,
624
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
625
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
626
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
627
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
628
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
629
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
630
    0,
631
    0x03A5 | HAS_VOWEL,
632
    0,
633
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
634
    0,
635
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
636
    0,
637
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
638
    0x03A9 | HAS_VOWEL,
639
    0x03A9 | HAS_VOWEL,
640
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
641
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
642
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
643
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
644
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
645
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
646
    0x03A9 | HAS_VOWEL,
647
    0x03A9 | HAS_VOWEL,
648
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
649
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
650
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
651
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
652
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
653
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
654
    0x0391 | HAS_VOWEL | HAS_ACCENT,
655
    0x0391 | HAS_VOWEL | HAS_ACCENT,
656
    0x0395 | HAS_VOWEL | HAS_ACCENT,
657
    0x0395 | HAS_VOWEL | HAS_ACCENT,
658
    0x0397 | HAS_VOWEL | HAS_ACCENT,
659
    0x0397 | HAS_VOWEL | HAS_ACCENT,
660
    0x0399 | HAS_VOWEL | HAS_ACCENT,
661
    0x0399 | HAS_VOWEL | HAS_ACCENT,
662
    0x039F | HAS_VOWEL | HAS_ACCENT,
663
    0x039F | HAS_VOWEL | HAS_ACCENT,
664
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
665
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
666
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
667
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
668
    0,
669
    0,
670
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
671
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
672
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
673
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
674
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
675
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
676
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
677
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
678
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
679
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
680
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
681
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
682
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
683
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
684
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
685
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
686
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
687
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
688
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
689
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
690
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
691
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
692
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
693
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
694
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
695
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
696
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
697
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
698
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
699
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
700
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
701
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
702
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
703
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
704
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
705
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
706
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
707
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
708
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
709
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
710
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
711
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
712
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
713
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
714
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
715
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
716
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
717
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
718
    0x0391 | HAS_VOWEL,
719
    0x0391 | HAS_VOWEL,
720
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
721
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
722
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
723
    0,
724
    0x0391 | HAS_VOWEL | HAS_ACCENT,
725
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
726
    0x0391 | HAS_VOWEL,
727
    0x0391 | HAS_VOWEL,
728
    0x0391 | HAS_VOWEL | HAS_ACCENT,
729
    0x0391 | HAS_VOWEL | HAS_ACCENT,
730
    0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
731
    0,
732
    0x0399 | HAS_VOWEL,
733
    0,
734
    0,
735
    0,
736
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
737
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
738
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
739
    0,
740
    0x0397 | HAS_VOWEL | HAS_ACCENT,
741
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
742
    0x0395 | HAS_VOWEL | HAS_ACCENT,
743
    0x0395 | HAS_VOWEL | HAS_ACCENT,
744
    0x0397 | HAS_VOWEL | HAS_ACCENT,
745
    0x0397 | HAS_VOWEL | HAS_ACCENT,
746
    0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
747
    0,
748
    0,
749
    0,
750
    0x0399 | HAS_VOWEL,
751
    0x0399 | HAS_VOWEL,
752
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
753
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
754
    0,
755
    0,
756
    0x0399 | HAS_VOWEL | HAS_ACCENT,
757
    0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
758
    0x0399 | HAS_VOWEL,
759
    0x0399 | HAS_VOWEL,
760
    0x0399 | HAS_VOWEL | HAS_ACCENT,
761
    0x0399 | HAS_VOWEL | HAS_ACCENT,
762
    0,
763
    0,
764
    0,
765
    0,
766
    0x03A5 | HAS_VOWEL,
767
    0x03A5 | HAS_VOWEL,
768
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
769
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
770
    0x03A1,
771
    0x03A1,
772
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
773
    0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,
774
    0x03A5 | HAS_VOWEL,
775
    0x03A5 | HAS_VOWEL,
776
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
777
    0x03A5 | HAS_VOWEL | HAS_ACCENT,
778
    0x03A1,
779
    0,
780
    0,
781
    0,
782
    0,
783
    0,
784
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
785
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
786
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
787
    0,
788
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
789
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,
790
    0x039F | HAS_VOWEL | HAS_ACCENT,
791
    0x039F | HAS_VOWEL | HAS_ACCENT,
792
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
793
    0x03A9 | HAS_VOWEL | HAS_ACCENT,
794
    0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,
795
    0,
796
    0,
797
    0,
798
};
799
800
// U+2126 Ohm sign
801
static const uint16_t data2126 = 0x03A9 | HAS_VOWEL;
802
803
0
uint32_t getLetterData(UChar32 c) {
804
0
    if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) {
805
0
        return 0;
806
0
    } else if (c <= 0x3ff) {
807
0
        return data0370[c - 0x370];
808
0
    } else if (c <= 0x1fff) {
809
0
        return data1F00[c - 0x1f00];
810
0
    } else if (c == 0x2126) {
811
0
        return data2126;
812
0
    } else {
813
0
        return 0;
814
0
    }
815
0
}
816
817
0
uint32_t getDiacriticData(UChar32 c) {
818
0
    switch (c) {
819
0
    case 0x0300:  // varia
820
0
    case 0x0301:  // tonos = oxia
821
0
    case 0x0342:  // perispomeni
822
0
    case 0x0302:  // circumflex can look like perispomeni
823
0
    case 0x0303:  // tilde can look like perispomeni
824
0
    case 0x0311:  // inverted breve can look like perispomeni
825
0
        return HAS_ACCENT;
826
0
    case 0x0308:  // dialytika = diaeresis
827
0
        return HAS_COMBINING_DIALYTIKA;
828
0
    case 0x0344:  // dialytika tonos
829
0
        return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;
830
0
    case 0x0345:  // ypogegrammeni = iota subscript
831
0
        return HAS_YPOGEGRAMMENI;
832
0
    case 0x0304:  // macron
833
0
    case 0x0306:  // breve
834
0
    case 0x0313:  // comma above
835
0
    case 0x0314:  // reversed comma above
836
0
    case 0x0343:  // koronis
837
0
        return HAS_OTHER_GREEK_DIACRITIC;
838
0
    default:
839
0
        return 0;
840
0
    }
841
0
}
842
843
0
UBool isFollowedByCasedLetter(const UChar *s, int32_t i, int32_t length) {
844
0
    while (i < length) {
845
0
        UChar32 c;
846
0
        U16_NEXT(s, i, length, c);
847
0
        int32_t type = ucase_getTypeOrIgnorable(c);
848
0
        if ((type & UCASE_IGNORABLE) != 0) {
849
            // Case-ignorable, continue with the loop.
850
0
        } else if (type != UCASE_NONE) {
851
0
            return TRUE;  // Followed by cased letter.
852
0
        } else {
853
0
            return FALSE;  // Uncased and not case-ignorable.
854
0
        }
855
0
    }
856
0
    return FALSE;  // Not followed by cased letter.
857
0
}
858
859
/**
860
 * Greek string uppercasing with a state machine.
861
 * Probably simpler than a stateless function that has to figure out complex context-before
862
 * for each character.
863
 * TODO: Try to re-consolidate one way or another with the non-Greek function.
864
 */
865
int32_t toUpper(uint32_t options,
866
                UChar *dest, int32_t destCapacity,
867
                const UChar *src, int32_t srcLength,
868
                Edits *edits,
869
0
                UErrorCode &errorCode) {
870
0
    int32_t destIndex=0;
871
0
    uint32_t state = 0;
872
0
    for (int32_t i = 0; i < srcLength;) {
873
0
        int32_t nextIndex = i;
874
0
        UChar32 c;
875
0
        U16_NEXT(src, nextIndex, srcLength, c);
876
0
        uint32_t nextState = 0;
877
0
        int32_t type = ucase_getTypeOrIgnorable(c);
878
0
        if ((type & UCASE_IGNORABLE) != 0) {
879
            // c is case-ignorable
880
0
            nextState |= (state & AFTER_CASED);
881
0
        } else if (type != UCASE_NONE) {
882
            // c is cased
883
0
            nextState |= AFTER_CASED;
884
0
        }
885
0
        uint32_t data = getLetterData(c);
886
0
        if (data > 0) {
887
0
            uint32_t upper = data & UPPER_MASK;
888
            // Add a dialytika to this iota or ypsilon vowel
889
            // if we removed a tonos from the previous vowel,
890
            // and that previous vowel did not also have (or gain) a dialytika.
891
            // Adding one only to the final vowel in a longer sequence
892
            // (which does not occur in normal writing) would require lookahead.
893
            // Set the same flag as for preserving an existing dialytika.
894
0
            if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&
895
0
                    (upper == 0x399 || upper == 0x3A5)) {
896
0
                data |= HAS_DIALYTIKA;
897
0
            }
898
0
            int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.
899
0
            if ((data & HAS_YPOGEGRAMMENI) != 0) {
900
0
                numYpogegrammeni = 1;
901
0
            }
902
            // Skip combining diacritics after this Greek letter.
903
0
            while (nextIndex < srcLength) {
904
0
                uint32_t diacriticData = getDiacriticData(src[nextIndex]);
905
0
                if (diacriticData != 0) {
906
0
                    data |= diacriticData;
907
0
                    if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) {
908
0
                        ++numYpogegrammeni;
909
0
                    }
910
0
                    ++nextIndex;
911
0
                } else {
912
0
                    break;  // not a Greek diacritic
913
0
                }
914
0
            }
915
0
            if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) {
916
0
                nextState |= AFTER_VOWEL_WITH_ACCENT;
917
0
            }
918
            // Map according to Greek rules.
919
0
            UBool addTonos = FALSE;
920
0
            if (upper == 0x397 &&
921
0
                    (data & HAS_ACCENT) != 0 &&
922
0
                    numYpogegrammeni == 0 &&
923
0
                    (state & AFTER_CASED) == 0 &&
924
0
                    !isFollowedByCasedLetter(src, nextIndex, srcLength)) {
925
                // Keep disjunctive "or" with (only) a tonos.
926
                // We use the same "word boundary" conditions as for the Final_Sigma test.
927
0
                if (i == nextIndex) {
928
0
                    upper = 0x389;  // Preserve the precomposed form.
929
0
                } else {
930
0
                    addTonos = TRUE;
931
0
                }
932
0
            } else if ((data & HAS_DIALYTIKA) != 0) {
933
                // Preserve a vowel with dialytika in precomposed form if it exists.
934
0
                if (upper == 0x399) {
935
0
                    upper = 0x3AA;
936
0
                    data &= ~HAS_EITHER_DIALYTIKA;
937
0
                } else if (upper == 0x3A5) {
938
0
                    upper = 0x3AB;
939
0
                    data &= ~HAS_EITHER_DIALYTIKA;
940
0
                }
941
0
            }
942
943
0
            UBool change = TRUE;
944
0
            if (edits != NULL) {
945
                // Find out first whether we are changing the text.
946
0
                change = src[i] != upper || numYpogegrammeni > 0;
947
0
                int32_t i2 = i + 1;
948
0
                if ((data & HAS_EITHER_DIALYTIKA) != 0) {
949
0
                    change |= i2 >= nextIndex || src[i2] != 0x308;
950
0
                    ++i2;
951
0
                }
952
0
                if (addTonos) {
953
0
                    change |= i2 >= nextIndex || src[i2] != 0x301;
954
0
                    ++i2;
955
0
                }
956
0
                int32_t oldLength = nextIndex - i;
957
0
                int32_t newLength = (i2 - i) + numYpogegrammeni;
958
0
                change |= oldLength != newLength;
959
0
                if (change) {
960
0
                    if (edits != NULL) {
961
0
                        edits->addReplace(oldLength, newLength);
962
0
                    }
963
0
                } else {
964
0
                    if (edits != NULL) {
965
0
                        edits->addUnchanged(oldLength);
966
0
                    }
967
                    // Write unchanged text?
968
0
                    change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0;
969
0
                }
970
0
            }
971
972
0
            if (change) {
973
0
                destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
974
0
                if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
975
0
                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika
976
0
                }
977
0
                if (destIndex >= 0 && addTonos) {
978
0
                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
979
0
                }
980
0
                while (destIndex >= 0 && numYpogegrammeni > 0) {
981
0
                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
982
0
                    --numYpogegrammeni;
983
0
                }
984
0
                if(destIndex<0) {
985
0
                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
986
0
                    return 0;
987
0
                }
988
0
            }
989
0
        } else {
990
0
            const UChar *s;
991
0
            c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
992
0
            destIndex = appendResult(dest, destIndex, destCapacity, c, s,
993
0
                                     nextIndex - i, options, edits);
994
0
            if (destIndex < 0) {
995
0
                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
996
0
                return 0;
997
0
            }
998
0
        }
999
0
        i = nextIndex;
1000
0
        state = nextState;
1001
0
    }
1002
1003
0
    return destIndex;
1004
0
}
1005
1006
}  // namespace GreekUpper
1007
U_NAMESPACE_END
1008
1009
/* functions available in the common library (for unistr_case.cpp) */
1010
1011
U_CFUNC int32_t U_CALLCONV
1012
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1013
                         UChar *dest, int32_t destCapacity,
1014
                         const UChar *src, int32_t srcLength,
1015
                         icu::Edits *edits,
1016
3.57k
                         UErrorCode &errorCode) {
1017
3.57k
    UCaseContext csc=UCASECONTEXT_INITIALIZER;
1018
3.57k
    csc.p=(void *)src;
1019
3.57k
    csc.limit=srcLength;
1020
3.57k
    int32_t destIndex = _caseMap(
1021
3.57k
        caseLocale, options, ucase_toFullLower,
1022
3.57k
        dest, destCapacity,
1023
3.57k
        src, &csc, 0, srcLength,
1024
3.57k
        edits, errorCode);
1025
3.57k
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1026
3.57k
}
1027
1028
U_CFUNC int32_t U_CALLCONV
1029
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1030
                         UChar *dest, int32_t destCapacity,
1031
                         const UChar *src, int32_t srcLength,
1032
                         icu::Edits *edits,
1033
0
                         UErrorCode &errorCode) {
1034
0
    int32_t destIndex;
1035
0
    if (caseLocale == UCASE_LOC_GREEK) {
1036
0
        destIndex = GreekUpper::toUpper(options, dest, destCapacity,
1037
0
                                        src, srcLength, edits, errorCode);
1038
0
    } else {
1039
0
        UCaseContext csc=UCASECONTEXT_INITIALIZER;
1040
0
        csc.p=(void *)src;
1041
0
        csc.limit=srcLength;
1042
0
        destIndex = _caseMap(
1043
0
            caseLocale, options, ucase_toFullUpper,
1044
0
            dest, destCapacity,
1045
0
            src, &csc, 0, srcLength,
1046
0
            edits, errorCode);
1047
0
    }
1048
0
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1049
0
}
1050
1051
U_CFUNC int32_t U_CALLCONV
1052
ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
1053
                      UChar *dest, int32_t destCapacity,
1054
                      const UChar *src, int32_t srcLength,
1055
                      icu::Edits *edits,
1056
0
                      UErrorCode &errorCode) {
1057
    /* case mapping loop */
1058
0
    int32_t srcIndex = 0;
1059
0
    int32_t destIndex = 0;
1060
0
    while (srcIndex < srcLength) {
1061
0
        int32_t cpStart = srcIndex;
1062
0
        UChar32 c;
1063
0
        U16_NEXT(src, srcIndex, srcLength, c);
1064
0
        const UChar *s;
1065
0
        c = ucase_toFullFolding(c, &s, options);
1066
0
        destIndex = appendResult(dest, destIndex, destCapacity, c, s,
1067
0
                                 srcIndex - cpStart, options, edits);
1068
0
        if (destIndex < 0) {
1069
0
            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
1070
0
            return 0;
1071
0
        }
1072
0
    }
1073
1074
0
    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
1075
0
}
1076
1077
U_CFUNC int32_t
1078
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1079
             UChar *dest, int32_t destCapacity,
1080
             const UChar *src, int32_t srcLength,
1081
             UStringCaseMapper *stringCaseMapper,
1082
             icu::Edits *edits,
1083
0
             UErrorCode &errorCode) {
1084
0
    int32_t destLength;
1085
1086
    /* check argument values */
1087
0
    if(U_FAILURE(errorCode)) {
1088
0
        return 0;
1089
0
    }
1090
0
    if( destCapacity<0 ||
1091
0
        (dest==NULL && destCapacity>0) ||
1092
0
        src==NULL ||
1093
0
        srcLength<-1
1094
0
    ) {
1095
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1096
0
        return 0;
1097
0
    }
1098
1099
    /* get the string length */
1100
0
    if(srcLength==-1) {
1101
0
        srcLength=u_strlen(src);
1102
0
    }
1103
1104
    /* check for overlapping source and destination */
1105
0
    if( dest!=NULL &&
1106
0
        ((src>=dest && src<(dest+destCapacity)) ||
1107
0
         (dest>=src && dest<(src+srcLength)))
1108
0
    ) {
1109
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1110
0
        return 0;
1111
0
    }
1112
1113
0
    if(edits!=NULL) {
1114
0
        edits->reset();
1115
0
    }
1116
0
    destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
1117
0
                                dest, destCapacity, src, srcLength, edits, errorCode);
1118
0
    return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
1119
0
}
1120
1121
U_CFUNC int32_t
1122
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
1123
                        UChar *dest, int32_t destCapacity,
1124
                        const UChar *src, int32_t srcLength,
1125
                        UStringCaseMapper *stringCaseMapper,
1126
3.57k
                        UErrorCode &errorCode) {
1127
3.57k
    UChar buffer[300];
1128
3.57k
    UChar *temp;
1129
1130
3.57k
    int32_t destLength;
1131
1132
    /* check argument values */
1133
3.57k
    if(U_FAILURE(errorCode)) {
1134
0
        return 0;
1135
0
    }
1136
3.57k
    if( destCapacity<0 ||
1137
3.57k
        (dest==NULL && destCapacity>0) ||
1138
3.57k
        src==NULL ||
1139
3.57k
        srcLength<-1
1140
3.57k
    ) {
1141
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
1142
0
        return 0;
1143
0
    }
1144
1145
    /* get the string length */
1146
3.57k
    if(srcLength==-1) {
1147
0
        srcLength=u_strlen(src);
1148
0
    }
1149
1150
    /* check for overlapping source and destination */
1151
3.57k
    if( dest!=NULL &&
1152
3.57k
        ((src>=dest && src<(dest+destCapacity)) ||
1153
3.57k
         (dest>=src && dest<(src+srcLength)))
1154
3.57k
    ) {
1155
        /* overlap: provide a temporary destination buffer and later copy the result */
1156
0
        if(destCapacity<=UPRV_LENGTHOF(buffer)) {
1157
            /* the stack buffer is large enough */
1158
0
            temp=buffer;
1159
0
        } else {
1160
            /* allocate a buffer */
1161
0
            temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
1162
0
            if(temp==NULL) {
1163
0
                errorCode=U_MEMORY_ALLOCATION_ERROR;
1164
0
                return 0;
1165
0
            }
1166
0
        }
1167
3.57k
    } else {
1168
3.57k
        temp=dest;
1169
3.57k
    }
1170
1171
3.57k
    destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
1172
3.57k
                                temp, destCapacity, src, srcLength, NULL, errorCode);
1173
3.57k
    if(temp!=dest) {
1174
        /* copy the result string to the destination buffer */
1175
0
        if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) {
1176
0
            u_memmove(dest, temp, destLength);
1177
0
        }
1178
0
        if(temp!=buffer) {
1179
0
            uprv_free(temp);
1180
0
        }
1181
0
    }
1182
1183
3.57k
    return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
1184
3.57k
}
1185
1186
/* public API functions */
1187
1188
U_CAPI int32_t U_EXPORT2
1189
u_strFoldCase(UChar *dest, int32_t destCapacity,
1190
              const UChar *src, int32_t srcLength,
1191
              uint32_t options,
1192
0
              UErrorCode *pErrorCode) {
1193
0
    return ustrcase_mapWithOverlap(
1194
0
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1195
0
        dest, destCapacity,
1196
0
        src, srcLength,
1197
0
        ustrcase_internalFold, *pErrorCode);
1198
0
}
1199
1200
U_NAMESPACE_BEGIN
1201
1202
int32_t CaseMap::fold(
1203
        uint32_t options,
1204
        const UChar *src, int32_t srcLength,
1205
        UChar *dest, int32_t destCapacity, Edits *edits,
1206
0
        UErrorCode &errorCode) {
1207
0
    return ustrcase_map(
1208
0
        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
1209
0
        dest, destCapacity,
1210
0
        src, srcLength,
1211
0
        ustrcase_internalFold, edits, errorCode);
1212
0
}
1213
1214
U_NAMESPACE_END
1215
1216
/* case-insensitive string comparisons -------------------------------------- */
1217
1218
/*
1219
 * This function is a copy of unorm_cmpEquivFold() minus the parts for
1220
 * canonical equivalence.
1221
 * Keep the functions in sync, and see there for how this works.
1222
 * The duplication is for modularization:
1223
 * It makes caseless (but not canonical caseless) matches independent of
1224
 * the normalization code.
1225
 */
1226
1227
/* stack element for previous-level source/decomposition pointers */
1228
struct CmpEquivLevel {
1229
    const UChar *start, *s, *limit;
1230
};
1231
typedef struct CmpEquivLevel CmpEquivLevel;
1232
1233
/**
1234
 * Internal implementation code comparing string with case fold.
1235
 * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().
1236
 *
1237
 * @param s1            input string 1
1238
 * @param length1       length of string 1, or -1 (NULL terminated)
1239
 * @param s2            input string 2
1240
 * @param length2       length of string 2, or -1 (NULL terminated)
1241
 * @param options       compare options
1242
 * @param matchLen1     (output) length of partial prefix match in s1
1243
 * @param matchLen2     (output) length of partial prefix match in s2
1244
 * @param pErrorCode    receives error status
1245
 * @return The result of comparison
1246
 */
1247
static int32_t _cmpFold(
1248
            const UChar *s1, int32_t length1,
1249
            const UChar *s2, int32_t length2,
1250
            uint32_t options,
1251
            int32_t *matchLen1, int32_t *matchLen2,
1252
0
            UErrorCode *pErrorCode) {
1253
0
    int32_t cmpRes = 0;
1254
1255
    /* current-level start/limit - s1/s2 as current */
1256
0
    const UChar *start1, *start2, *limit1, *limit2;
1257
1258
    /* points to the original start address */
1259
0
    const UChar *org1, *org2;
1260
1261
    /* points to the end of match + 1 */
1262
0
    const UChar *m1, *m2;
1263
1264
    /* case folding variables */
1265
0
    const UChar *p;
1266
0
    int32_t length;
1267
1268
    /* stacks of previous-level start/current/limit */
1269
0
    CmpEquivLevel stack1[2], stack2[2];
1270
1271
    /* case folding buffers, only use current-level start/limit */
1272
0
    UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
1273
1274
    /* track which is the current level per string */
1275
0
    int32_t level1, level2;
1276
1277
    /* current code units, and code points for lookups */
1278
0
    UChar32 c1, c2, cp1, cp2;
1279
1280
    /* no argument error checking because this itself is not an API */
1281
1282
    /*
1283
     * assume that at least the option U_COMPARE_IGNORE_CASE is set
1284
     * otherwise this function would have to behave exactly as uprv_strCompare()
1285
     */
1286
0
    if(U_FAILURE(*pErrorCode)) {
1287
0
        return 0;
1288
0
    }
1289
1290
    /* initialize */
1291
0
    if(matchLen1) {
1292
0
        U_ASSERT(matchLen2 !=NULL);
1293
0
        *matchLen1=0;
1294
0
        *matchLen2=0;
1295
0
    }
1296
1297
0
    start1=m1=org1=s1;
1298
0
    if(length1==-1) {
1299
0
        limit1=NULL;
1300
0
    } else {
1301
0
        limit1=s1+length1;
1302
0
    }
1303
1304
0
    start2=m2=org2=s2;
1305
0
    if(length2==-1) {
1306
0
        limit2=NULL;
1307
0
    } else {
1308
0
        limit2=s2+length2;
1309
0
    }
1310
1311
0
    level1=level2=0;
1312
0
    c1=c2=-1;
1313
1314
    /* comparison loop */
1315
0
    for(;;) {
1316
        /*
1317
         * here a code unit value of -1 means "get another code unit"
1318
         * below it will mean "this source is finished"
1319
         */
1320
1321
0
        if(c1<0) {
1322
            /* get next code unit from string 1, post-increment */
1323
0
            for(;;) {
1324
0
                if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
1325
0
                    if(level1==0) {
1326
0
                        c1=-1;
1327
0
                        break;
1328
0
                    }
1329
0
                } else {
1330
0
                    ++s1;
1331
0
                    break;
1332
0
                }
1333
1334
                /* reached end of level buffer, pop one level */
1335
0
                do {
1336
0
                    --level1;
1337
0
                    start1=stack1[level1].start;    /*Not uninitialized*/
1338
0
                } while(start1==NULL);
1339
0
                s1=stack1[level1].s;                /*Not uninitialized*/
1340
0
                limit1=stack1[level1].limit;        /*Not uninitialized*/
1341
0
            }
1342
0
        }
1343
1344
0
        if(c2<0) {
1345
            /* get next code unit from string 2, post-increment */
1346
0
            for(;;) {
1347
0
                if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
1348
0
                    if(level2==0) {
1349
0
                        c2=-1;
1350
0
                        break;
1351
0
                    }
1352
0
                } else {
1353
0
                    ++s2;
1354
0
                    break;
1355
0
                }
1356
1357
                /* reached end of level buffer, pop one level */
1358
0
                do {
1359
0
                    --level2;
1360
0
                    start2=stack2[level2].start;    /*Not uninitialized*/
1361
0
                } while(start2==NULL);
1362
0
                s2=stack2[level2].s;                /*Not uninitialized*/
1363
0
                limit2=stack2[level2].limit;        /*Not uninitialized*/
1364
0
            }
1365
0
        }
1366
1367
        /*
1368
         * compare c1 and c2
1369
         * either variable c1, c2 is -1 only if the corresponding string is finished
1370
         */
1371
0
        if(c1==c2) {
1372
0
            const UChar *next1, *next2;
1373
1374
0
            if(c1<0) {
1375
0
                cmpRes=0;   /* c1==c2==-1 indicating end of strings */
1376
0
                break;
1377
0
            }
1378
1379
            /*
1380
             * Note: Move the match positions in both strings at the same time
1381
             *      only when corresponding code point(s) in the original strings
1382
             *      are fully consumed. For example, when comparing s1="Fust" and
1383
             *      s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches
1384
             *      the first code point in the case-folded data. But the second "s"
1385
             *      has no matching code point in s1, so this implementation returns
1386
             *      2 as the prefix match length ("Fu").
1387
             */
1388
0
            next1=next2=NULL;
1389
0
            if(level1==0) {
1390
0
                next1=s1;
1391
0
            } else if(s1==limit1) {
1392
                /* Note: This implementation only use a single level of stack.
1393
                 *      If this code needs to be changed to use multiple levels
1394
                 *      of stacks, the code above should check if the current
1395
                 *      code is at the end of all stacks.
1396
                 */
1397
0
                U_ASSERT(level1==1);
1398
1399
                /* is s1 at the end of the current stack? */
1400
0
                next1=stack1[0].s;
1401
0
            }
1402
1403
0
            if (next1!=NULL) {
1404
0
                if(level2==0) {
1405
0
                    next2=s2;
1406
0
                } else if(s2==limit2) {
1407
0
                    U_ASSERT(level2==1);
1408
1409
                    /* is s2 at the end of the current stack? */
1410
0
                    next2=stack2[0].s;
1411
0
                }
1412
0
                if(next2!=NULL) {
1413
0
                    m1=next1;
1414
0
                    m2=next2;
1415
0
                }
1416
0
            }
1417
0
            c1=c2=-1;       /* make us fetch new code units */
1418
0
            continue;
1419
0
        } else if(c1<0) {
1420
0
            cmpRes=-1;      /* string 1 ends before string 2 */
1421
0
            break;
1422
0
        } else if(c2<0) {
1423
0
            cmpRes=1;       /* string 2 ends before string 1 */
1424
0
            break;
1425
0
        }
1426
        /* c1!=c2 && c1>=0 && c2>=0 */
1427
1428
        /* get complete code points for c1, c2 for lookups if either is a surrogate */
1429
0
        cp1=c1;
1430
0
        if(U_IS_SURROGATE(c1)) {
1431
0
            UChar c;
1432
1433
0
            if(U_IS_SURROGATE_LEAD(c1)) {
1434
0
                if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
1435
                    /* advance ++s1; only below if cp1 decomposes/case-folds */
1436
0
                    cp1=U16_GET_SUPPLEMENTARY(c1, c);
1437
0
                }
1438
0
            } else /* isTrail(c1) */ {
1439
0
                if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
1440
0
                    cp1=U16_GET_SUPPLEMENTARY(c, c1);
1441
0
                }
1442
0
            }
1443
0
        }
1444
1445
0
        cp2=c2;
1446
0
        if(U_IS_SURROGATE(c2)) {
1447
0
            UChar c;
1448
1449
0
            if(U_IS_SURROGATE_LEAD(c2)) {
1450
0
                if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
1451
                    /* advance ++s2; only below if cp2 decomposes/case-folds */
1452
0
                    cp2=U16_GET_SUPPLEMENTARY(c2, c);
1453
0
                }
1454
0
            } else /* isTrail(c2) */ {
1455
0
                if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
1456
0
                    cp2=U16_GET_SUPPLEMENTARY(c, c2);
1457
0
                }
1458
0
            }
1459
0
        }
1460
1461
        /*
1462
         * go down one level for each string
1463
         * continue with the main loop as soon as there is a real change
1464
         */
1465
1466
0
        if( level1==0 &&
1467
0
            (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
1468
0
        ) {
1469
            /* cp1 case-folds to the code point "length" or to p[length] */
1470
0
            if(U_IS_SURROGATE(c1)) {
1471
0
                if(U_IS_SURROGATE_LEAD(c1)) {
1472
                    /* advance beyond source surrogate pair if it case-folds */
1473
0
                    ++s1;
1474
0
                } else /* isTrail(c1) */ {
1475
                    /*
1476
                     * we got a supplementary code point when hitting its trail surrogate,
1477
                     * therefore the lead surrogate must have been the same as in the other string;
1478
                     * compare this decomposition with the lead surrogate in the other string
1479
                     * remember that this simulates bulk text replacement:
1480
                     * the decomposition would replace the entire code point
1481
                     */
1482
0
                    --s2;
1483
0
                    --m2;
1484
0
                    c2=*(s2-1);
1485
0
                }
1486
0
            }
1487
1488
            /* push current level pointers */
1489
0
            stack1[0].start=start1;
1490
0
            stack1[0].s=s1;
1491
0
            stack1[0].limit=limit1;
1492
0
            ++level1;
1493
1494
            /* copy the folding result to fold1[] */
1495
0
            if(length<=UCASE_MAX_STRING_LENGTH) {
1496
0
                u_memcpy(fold1, p, length);
1497
0
            } else {
1498
0
                int32_t i=0;
1499
0
                U16_APPEND_UNSAFE(fold1, i, length);
1500
0
                length=i;
1501
0
            }
1502
1503
            /* set next level pointers to case folding */
1504
0
            start1=s1=fold1;
1505
0
            limit1=fold1+length;
1506
1507
            /* get ready to read from decomposition, continue with loop */
1508
0
            c1=-1;
1509
0
            continue;
1510
0
        }
1511
1512
0
        if( level2==0 &&
1513
0
            (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
1514
0
        ) {
1515
            /* cp2 case-folds to the code point "length" or to p[length] */
1516
0
            if(U_IS_SURROGATE(c2)) {
1517
0
                if(U_IS_SURROGATE_LEAD(c2)) {
1518
                    /* advance beyond source surrogate pair if it case-folds */
1519
0
                    ++s2;
1520
0
                } else /* isTrail(c2) */ {
1521
                    /*
1522
                     * we got a supplementary code point when hitting its trail surrogate,
1523
                     * therefore the lead surrogate must have been the same as in the other string;
1524
                     * compare this decomposition with the lead surrogate in the other string
1525
                     * remember that this simulates bulk text replacement:
1526
                     * the decomposition would replace the entire code point
1527
                     */
1528
0
                    --s1;
1529
0
                    --m2;
1530
0
                    c1=*(s1-1);
1531
0
                }
1532
0
            }
1533
1534
            /* push current level pointers */
1535
0
            stack2[0].start=start2;
1536
0
            stack2[0].s=s2;
1537
0
            stack2[0].limit=limit2;
1538
0
            ++level2;
1539
1540
            /* copy the folding result to fold2[] */
1541
0
            if(length<=UCASE_MAX_STRING_LENGTH) {
1542
0
                u_memcpy(fold2, p, length);
1543
0
            } else {
1544
0
                int32_t i=0;
1545
0
                U16_APPEND_UNSAFE(fold2, i, length);
1546
0
                length=i;
1547
0
            }
1548
1549
            /* set next level pointers to case folding */
1550
0
            start2=s2=fold2;
1551
0
            limit2=fold2+length;
1552
1553
            /* get ready to read from decomposition, continue with loop */
1554
0
            c2=-1;
1555
0
            continue;
1556
0
        }
1557
1558
        /*
1559
         * no decomposition/case folding, max level for both sides:
1560
         * return difference result
1561
         *
1562
         * code point order comparison must not just return cp1-cp2
1563
         * because when single surrogates are present then the surrogate pairs
1564
         * that formed cp1 and cp2 may be from different string indexes
1565
         *
1566
         * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
1567
         * c1=d800 cp1=10001 c2=dc00 cp2=10000
1568
         * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
1569
         *
1570
         * therefore, use same fix-up as in ustring.c/uprv_strCompare()
1571
         * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
1572
         * so we have slightly different pointer/start/limit comparisons here
1573
         */
1574
1575
0
        if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
1576
            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
1577
0
            if(
1578
0
                (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
1579
0
                (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
1580
0
            ) {
1581
                /* part of a surrogate pair, leave >=d800 */
1582
0
            } else {
1583
                /* BMP code point - may be surrogate code point - make <d800 */
1584
0
                c1-=0x2800;
1585
0
            }
1586
1587
0
            if(
1588
0
                (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
1589
0
                (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
1590
0
            ) {
1591
                /* part of a surrogate pair, leave >=d800 */
1592
0
            } else {
1593
                /* BMP code point - may be surrogate code point - make <d800 */
1594
0
                c2-=0x2800;
1595
0
            }
1596
0
        }
1597
1598
0
        cmpRes=c1-c2;
1599
0
        break;
1600
0
    }
1601
1602
0
    if(matchLen1) {
1603
0
        *matchLen1=m1-org1;
1604
0
        *matchLen2=m2-org2;
1605
0
    }
1606
0
    return cmpRes;
1607
0
}
1608
1609
/* internal function */
1610
U_CFUNC int32_t
1611
u_strcmpFold(const UChar *s1, int32_t length1,
1612
             const UChar *s2, int32_t length2,
1613
             uint32_t options,
1614
0
             UErrorCode *pErrorCode) {
1615
0
    return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode);
1616
0
}
1617
1618
/* public API functions */
1619
1620
U_CAPI int32_t U_EXPORT2
1621
u_strCaseCompare(const UChar *s1, int32_t length1,
1622
                 const UChar *s2, int32_t length2,
1623
                 uint32_t options,
1624
0
                 UErrorCode *pErrorCode) {
1625
    /* argument checking */
1626
0
    if(pErrorCode==0 || U_FAILURE(*pErrorCode)) {
1627
0
        return 0;
1628
0
    }
1629
0
    if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
1630
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1631
0
        return 0;
1632
0
    }
1633
0
    return u_strcmpFold(s1, length1, s2, length2,
1634
0
                        options|U_COMPARE_IGNORE_CASE,
1635
0
                        pErrorCode);
1636
0
}
1637
1638
U_CAPI int32_t U_EXPORT2
1639
0
u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) {
1640
0
    UErrorCode errorCode=U_ZERO_ERROR;
1641
0
    return u_strcmpFold(s1, -1, s2, -1,
1642
0
                        options|U_COMPARE_IGNORE_CASE,
1643
0
                        &errorCode);
1644
0
}
1645
1646
U_CAPI int32_t U_EXPORT2
1647
0
u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) {
1648
0
    UErrorCode errorCode=U_ZERO_ERROR;
1649
0
    return u_strcmpFold(s1, length, s2, length,
1650
0
                        options|U_COMPARE_IGNORE_CASE,
1651
0
                        &errorCode);
1652
0
}
1653
1654
U_CAPI int32_t U_EXPORT2
1655
0
u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) {
1656
0
    UErrorCode errorCode=U_ZERO_ERROR;
1657
0
    return u_strcmpFold(s1, n, s2, n,
1658
0
                        options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),
1659
0
                        &errorCode);
1660
0
}
1661
1662
/* internal API - detect length of shared prefix */
1663
U_CAPI void
1664
u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
1665
                             const UChar *s2, int32_t length2,
1666
                             uint32_t options,
1667
                             int32_t *matchLen1, int32_t *matchLen2,
1668
0
                             UErrorCode *pErrorCode) {
1669
0
    _cmpFold(s1, length1, s2, length2, options,
1670
0
        matchLen1, matchLen2, pErrorCode);
1671
0
}