/src/icu/source/common/ustrcase.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | *  | 
6  |  | *   Copyright (C) 2001-2015, International Business Machines  | 
7  |  | *   Corporation and others.  All Rights Reserved.  | 
8  |  | *  | 
9  |  | *******************************************************************************  | 
10  |  | *   file name:  ustrcase.cpp  | 
11  |  | *   encoding:   UTF-8  | 
12  |  | *   tab size:   8 (not used)  | 
13  |  | *   indentation:4  | 
14  |  | *  | 
15  |  | *   created on: 2002feb20  | 
16  |  | *   created by: Markus W. Scherer  | 
17  |  | *  | 
18  |  | *   Implementation file for string casing C API functions.  | 
19  |  | *   Uses functions from uchar.c for basic functionality that requires access  | 
20  |  | *   to the Unicode Character Database (uprops.dat).  | 
21  |  | */  | 
22  |  |  | 
23  |  | #include "unicode/utypes.h"  | 
24  |  | #include "unicode/brkiter.h"  | 
25  |  | #include "unicode/casemap.h"  | 
26  |  | #include "unicode/edits.h"  | 
27  |  | #include "unicode/stringoptions.h"  | 
28  |  | #include "unicode/ustring.h"  | 
29  |  | #include "unicode/ucasemap.h"  | 
30  |  | #include "unicode/ubrk.h"  | 
31  |  | #include "unicode/utf.h"  | 
32  |  | #include "unicode/utf16.h"  | 
33  |  | #include "cmemory.h"  | 
34  |  | #include "ucase.h"  | 
35  |  | #include "ucasemap_imp.h"  | 
36  |  | #include "ustr_imp.h"  | 
37  |  | #include "uassert.h"  | 
38  |  |  | 
39  |  | U_NAMESPACE_BEGIN  | 
40  |  |  | 
41  |  | namespace { | 
42  |  |  | 
43  |  | int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,  | 
44  | 0  |                                    Edits *edits, UErrorCode &errorCode) { | 
45  | 0  |     if (U_SUCCESS(errorCode)) { | 
46  | 0  |         if (destIndex > destCapacity) { | 
47  | 0  |             errorCode = U_BUFFER_OVERFLOW_ERROR;  | 
48  | 0  |         } else if (edits != NULL) { | 
49  | 0  |             edits->copyErrorTo(errorCode);  | 
50  | 0  |         }  | 
51  | 0  |     }  | 
52  | 0  |     return destIndex;  | 
53  | 0  | }  | 
54  |  |  | 
55  |  | /* Appends a full case mapping result, see UCASE_MAX_STRING_LENGTH. */  | 
56  |  | inline int32_t  | 
57  |  | appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity,  | 
58  |  |              int32_t result, const UChar *s,  | 
59  | 0  |              int32_t cpLength, uint32_t options, icu::Edits *edits) { | 
60  | 0  |     UChar32 c;  | 
61  | 0  |     int32_t length;  | 
62  |  |  | 
63  |  |     /* decode the result */  | 
64  | 0  |     if(result<0) { | 
65  |  |         /* (not) original code point */  | 
66  | 0  |         if(edits!=NULL) { | 
67  | 0  |             edits->addUnchanged(cpLength);  | 
68  | 0  |         }  | 
69  | 0  |         if(options & U_OMIT_UNCHANGED_TEXT) { | 
70  | 0  |             return destIndex;  | 
71  | 0  |         }  | 
72  | 0  |         c=~result;  | 
73  | 0  |         if(destIndex<destCapacity && c<=0xffff) {  // BMP slightly-fastpath | 
74  | 0  |             dest[destIndex++]=(UChar)c;  | 
75  | 0  |             return destIndex;  | 
76  | 0  |         }  | 
77  | 0  |         length=cpLength;  | 
78  | 0  |     } else { | 
79  | 0  |         if(result<=UCASE_MAX_STRING_LENGTH) { | 
80  | 0  |             c=U_SENTINEL;  | 
81  | 0  |             length=result;  | 
82  | 0  |         } else if(destIndex<destCapacity && result<=0xffff) {  // BMP slightly-fastpath | 
83  | 0  |             dest[destIndex++]=(UChar)result;  | 
84  | 0  |             if(edits!=NULL) { | 
85  | 0  |                 edits->addReplace(cpLength, 1);  | 
86  | 0  |             }  | 
87  | 0  |             return destIndex;  | 
88  | 0  |         } else { | 
89  | 0  |             c=result;  | 
90  | 0  |             length=U16_LENGTH(c);  | 
91  | 0  |         }  | 
92  | 0  |         if(edits!=NULL) { | 
93  | 0  |             edits->addReplace(cpLength, length);  | 
94  | 0  |         }  | 
95  | 0  |     }  | 
96  | 0  |     if(length>(INT32_MAX-destIndex)) { | 
97  | 0  |         return -1;  // integer overflow  | 
98  | 0  |     }  | 
99  |  |  | 
100  | 0  |     if(destIndex<destCapacity) { | 
101  |  |         /* append the result */  | 
102  | 0  |         if(c>=0) { | 
103  |  |             /* code point */  | 
104  | 0  |             UBool isError=FALSE;  | 
105  | 0  |             U16_APPEND(dest, destIndex, destCapacity, c, isError);  | 
106  | 0  |             if(isError) { | 
107  |  |                 /* overflow, nothing written */  | 
108  | 0  |                 destIndex+=length;  | 
109  | 0  |             }  | 
110  | 0  |         } else { | 
111  |  |             /* string */  | 
112  | 0  |             if((destIndex+length)<=destCapacity) { | 
113  | 0  |                 while(length>0) { | 
114  | 0  |                     dest[destIndex++]=*s++;  | 
115  | 0  |                     --length;  | 
116  | 0  |                 }  | 
117  | 0  |             } else { | 
118  |  |                 /* overflow */  | 
119  | 0  |                 destIndex+=length;  | 
120  | 0  |             }  | 
121  | 0  |         }  | 
122  | 0  |     } else { | 
123  |  |         /* preflight */  | 
124  | 0  |         destIndex+=length;  | 
125  | 0  |     }  | 
126  | 0  |     return destIndex;  | 
127  | 0  | }  | 
128  |  |  | 
129  |  | inline int32_t  | 
130  | 0  | appendUChar(UChar *dest, int32_t destIndex, int32_t destCapacity, UChar c) { | 
131  | 0  |     if(destIndex<destCapacity) { | 
132  | 0  |         dest[destIndex]=c;  | 
133  | 0  |     } else if(destIndex==INT32_MAX) { | 
134  | 0  |         return -1;  // integer overflow  | 
135  | 0  |     }  | 
136  | 0  |     return destIndex+1;  | 
137  | 0  | }  | 
138  |  |  | 
139  |  | int32_t  | 
140  |  | appendNonEmptyUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,  | 
141  | 0  |                         const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { | 
142  | 0  |     if(edits!=NULL) { | 
143  | 0  |         edits->addUnchanged(length);  | 
144  | 0  |     }  | 
145  | 0  |     if(options & U_OMIT_UNCHANGED_TEXT) { | 
146  | 0  |         return destIndex;  | 
147  | 0  |     }  | 
148  | 0  |     if(length>(INT32_MAX-destIndex)) { | 
149  | 0  |         return -1;  // integer overflow  | 
150  | 0  |     }  | 
151  | 0  |     if((destIndex+length)<=destCapacity) { | 
152  | 0  |         u_memcpy(dest+destIndex, s, length);  | 
153  | 0  |     }  | 
154  | 0  |     return destIndex + length;  | 
155  | 0  | }  | 
156  |  |  | 
157  |  | inline int32_t  | 
158  |  | appendUnchanged(UChar *dest, int32_t destIndex, int32_t destCapacity,  | 
159  | 0  |                 const UChar *s, int32_t length, uint32_t options, icu::Edits *edits) { | 
160  | 0  |     if (length <= 0) { | 
161  | 0  |         return destIndex;  | 
162  | 0  |     }  | 
163  | 0  |     return appendNonEmptyUnchanged(dest, destIndex, destCapacity, s, length, options, edits);  | 
164  | 0  | }  | 
165  |  |  | 
166  |  | UChar32 U_CALLCONV  | 
167  | 0  | utf16_caseContextIterator(void *context, int8_t dir) { | 
168  | 0  |     UCaseContext *csc=(UCaseContext *)context;  | 
169  | 0  |     UChar32 c;  | 
170  |  | 
  | 
171  | 0  |     if(dir<0) { | 
172  |  |         /* reset for backward iteration */  | 
173  | 0  |         csc->index=csc->cpStart;  | 
174  | 0  |         csc->dir=dir;  | 
175  | 0  |     } else if(dir>0) { | 
176  |  |         /* reset for forward iteration */  | 
177  | 0  |         csc->index=csc->cpLimit;  | 
178  | 0  |         csc->dir=dir;  | 
179  | 0  |     } else { | 
180  |  |         /* continue current iteration direction */  | 
181  | 0  |         dir=csc->dir;  | 
182  | 0  |     }  | 
183  |  | 
  | 
184  | 0  |     if(dir<0) { | 
185  | 0  |         if(csc->start<csc->index) { | 
186  | 0  |             U16_PREV((const UChar *)csc->p, csc->start, csc->index, c);  | 
187  | 0  |             return c;  | 
188  | 0  |         }  | 
189  | 0  |     } else { | 
190  | 0  |         if(csc->index<csc->limit) { | 
191  | 0  |             U16_NEXT((const UChar *)csc->p, csc->index, csc->limit, c);  | 
192  | 0  |             return c;  | 
193  | 0  |         }  | 
194  | 0  |     }  | 
195  | 0  |     return U_SENTINEL;  | 
196  | 0  | }  | 
197  |  |  | 
198  |  | /**  | 
199  |  |  * caseLocale >= 0: Lowercases [srcStart..srcLimit[ but takes context [0..srcLength[ into account.  | 
200  |  |  * caseLocale < 0: Case-folds [srcStart..srcLimit[.  | 
201  |  |  */  | 
202  |  | int32_t toLower(int32_t caseLocale, uint32_t options,  | 
203  |  |                 UChar *dest, int32_t destCapacity,  | 
204  |  |                 const UChar *src, UCaseContext *csc, int32_t srcStart, int32_t srcLimit,  | 
205  | 0  |                 icu::Edits *edits, UErrorCode &errorCode) { | 
206  | 0  |     const int8_t *latinToLower;  | 
207  | 0  |     if (caseLocale == UCASE_LOC_ROOT ||  | 
208  | 0  |             (caseLocale >= 0 ?  | 
209  | 0  |                 !(caseLocale == UCASE_LOC_TURKISH || caseLocale == UCASE_LOC_LITHUANIAN) :  | 
210  | 0  |                 (options & _FOLD_CASE_OPTIONS_MASK) == U_FOLD_CASE_DEFAULT)) { | 
211  | 0  |         latinToLower = LatinCase::TO_LOWER_NORMAL;  | 
212  | 0  |     } else { | 
213  | 0  |         latinToLower = LatinCase::TO_LOWER_TR_LT;  | 
214  | 0  |     }  | 
215  | 0  |     const UTrie2 *trie = ucase_getTrie();  | 
216  | 0  |     int32_t destIndex = 0;  | 
217  | 0  |     int32_t prev = srcStart;  | 
218  | 0  |     int32_t srcIndex = srcStart;  | 
219  | 0  |     for (;;) { | 
220  |  |         // fast path for simple cases  | 
221  | 0  |         UChar lead = 0;  | 
222  | 0  |         while (srcIndex < srcLimit) { | 
223  | 0  |             lead = src[srcIndex];  | 
224  | 0  |             int32_t delta;  | 
225  | 0  |             if (lead < LatinCase::LONG_S) { | 
226  | 0  |                 int8_t d = latinToLower[lead];  | 
227  | 0  |                 if (d == LatinCase::EXC) { break; } | 
228  | 0  |                 ++srcIndex;  | 
229  | 0  |                 if (d == 0) { continue; } | 
230  | 0  |                 delta = d;  | 
231  | 0  |             } else if (lead >= 0xd800) { | 
232  | 0  |                 break;  // surrogate or higher  | 
233  | 0  |             } else { | 
234  | 0  |                 uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);  | 
235  | 0  |                 if (UCASE_HAS_EXCEPTION(props)) { break; } | 
236  | 0  |                 ++srcIndex;  | 
237  | 0  |                 if (!UCASE_IS_UPPER_OR_TITLE(props) || (delta = UCASE_GET_DELTA(props)) == 0) { | 
238  | 0  |                     continue;  | 
239  | 0  |                 }  | 
240  | 0  |             }  | 
241  | 0  |             lead += static_cast<UChar>(delta);  | 
242  | 0  |             destIndex = appendUnchanged(dest, destIndex, destCapacity,  | 
243  | 0  |                                         src + prev, srcIndex - 1 - prev, options, edits);  | 
244  | 0  |             if (destIndex >= 0) { | 
245  | 0  |                 destIndex = appendUChar(dest, destIndex, destCapacity, lead);  | 
246  | 0  |                 if (edits != nullptr) { | 
247  | 0  |                     edits->addReplace(1, 1);  | 
248  | 0  |                 }  | 
249  | 0  |             }  | 
250  | 0  |             if (destIndex < 0) { | 
251  | 0  |                 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
252  | 0  |                 return 0;  | 
253  | 0  |             }  | 
254  | 0  |             prev = srcIndex;  | 
255  | 0  |         }  | 
256  | 0  |         if (srcIndex >= srcLimit) { | 
257  | 0  |             break;  | 
258  | 0  |         }  | 
259  |  |         // slow path  | 
260  | 0  |         int32_t cpStart = srcIndex++;  | 
261  | 0  |         UChar trail;  | 
262  | 0  |         UChar32 c;  | 
263  | 0  |         if (U16_IS_LEAD(lead) && srcIndex < srcLimit && U16_IS_TRAIL(trail = src[srcIndex])) { | 
264  | 0  |             c = U16_GET_SUPPLEMENTARY(lead, trail);  | 
265  | 0  |             ++srcIndex;  | 
266  | 0  |         } else { | 
267  | 0  |             c = lead;  | 
268  | 0  |         }  | 
269  | 0  |         const UChar *s;  | 
270  | 0  |         if (caseLocale >= 0) { | 
271  | 0  |             csc->cpStart = cpStart;  | 
272  | 0  |             csc->cpLimit = srcIndex;  | 
273  | 0  |             c = ucase_toFullLower(c, utf16_caseContextIterator, csc, &s, caseLocale);  | 
274  | 0  |         } else { | 
275  | 0  |             c = ucase_toFullFolding(c, &s, options);  | 
276  | 0  |         }  | 
277  | 0  |         if (c >= 0) { | 
278  | 0  |             destIndex = appendUnchanged(dest, destIndex, destCapacity,  | 
279  | 0  |                                         src + prev, cpStart - prev, options, edits);  | 
280  | 0  |             if (destIndex >= 0) { | 
281  | 0  |                 destIndex = appendResult(dest, destIndex, destCapacity, c, s,  | 
282  | 0  |                                          srcIndex - cpStart, options, edits);  | 
283  | 0  |             }  | 
284  | 0  |             if (destIndex < 0) { | 
285  | 0  |                 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
286  | 0  |                 return 0;  | 
287  | 0  |             }  | 
288  | 0  |             prev = srcIndex;  | 
289  | 0  |         }  | 
290  | 0  |     }  | 
291  | 0  |     destIndex = appendUnchanged(dest, destIndex, destCapacity,  | 
292  | 0  |                                 src + prev, srcIndex - prev, options, edits);  | 
293  | 0  |     if (destIndex < 0) { | 
294  | 0  |         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
295  | 0  |         return 0;  | 
296  | 0  |     }  | 
297  | 0  |     return destIndex;  | 
298  | 0  | }  | 
299  |  |  | 
300  |  | int32_t toUpper(int32_t caseLocale, uint32_t options,  | 
301  |  |                 UChar *dest, int32_t destCapacity,  | 
302  |  |                 const UChar *src, UCaseContext *csc, int32_t srcLength,  | 
303  | 0  |                 icu::Edits *edits, UErrorCode &errorCode) { | 
304  | 0  |     const int8_t *latinToUpper;  | 
305  | 0  |     if (caseLocale == UCASE_LOC_TURKISH) { | 
306  | 0  |         latinToUpper = LatinCase::TO_UPPER_TR;  | 
307  | 0  |     } else { | 
308  | 0  |         latinToUpper = LatinCase::TO_UPPER_NORMAL;  | 
309  | 0  |     }  | 
310  | 0  |     const UTrie2 *trie = ucase_getTrie();  | 
311  | 0  |     int32_t destIndex = 0;  | 
312  | 0  |     int32_t prev = 0;  | 
313  | 0  |     int32_t srcIndex = 0;  | 
314  | 0  |     for (;;) { | 
315  |  |         // fast path for simple cases  | 
316  | 0  |         UChar lead = 0;  | 
317  | 0  |         while (srcIndex < srcLength) { | 
318  | 0  |             lead = src[srcIndex];  | 
319  | 0  |             int32_t delta;  | 
320  | 0  |             if (lead < LatinCase::LONG_S) { | 
321  | 0  |                 int8_t d = latinToUpper[lead];  | 
322  | 0  |                 if (d == LatinCase::EXC) { break; } | 
323  | 0  |                 ++srcIndex;  | 
324  | 0  |                 if (d == 0) { continue; } | 
325  | 0  |                 delta = d;  | 
326  | 0  |             } else if (lead >= 0xd800) { | 
327  | 0  |                 break;  // surrogate or higher  | 
328  | 0  |             } else { | 
329  | 0  |                 uint16_t props = UTRIE2_GET16_FROM_U16_SINGLE_LEAD(trie, lead);  | 
330  | 0  |                 if (UCASE_HAS_EXCEPTION(props)) { break; } | 
331  | 0  |                 ++srcIndex;  | 
332  | 0  |                 if (UCASE_GET_TYPE(props) != UCASE_LOWER || (delta = UCASE_GET_DELTA(props)) == 0) { | 
333  | 0  |                     continue;  | 
334  | 0  |                 }  | 
335  | 0  |             }  | 
336  | 0  |             lead += static_cast<UChar>(delta);  | 
337  | 0  |             destIndex = appendUnchanged(dest, destIndex, destCapacity,  | 
338  | 0  |                                         src + prev, srcIndex - 1 - prev, options, edits);  | 
339  | 0  |             if (destIndex >= 0) { | 
340  | 0  |                 destIndex = appendUChar(dest, destIndex, destCapacity, lead);  | 
341  | 0  |                 if (edits != nullptr) { | 
342  | 0  |                     edits->addReplace(1, 1);  | 
343  | 0  |                 }  | 
344  | 0  |             }  | 
345  | 0  |             if (destIndex < 0) { | 
346  | 0  |                 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
347  | 0  |                 return 0;  | 
348  | 0  |             }  | 
349  | 0  |             prev = srcIndex;  | 
350  | 0  |         }  | 
351  | 0  |         if (srcIndex >= srcLength) { | 
352  | 0  |             break;  | 
353  | 0  |         }  | 
354  |  |         // slow path  | 
355  | 0  |         int32_t cpStart;  | 
356  | 0  |         csc->cpStart = cpStart = srcIndex++;  | 
357  | 0  |         UChar trail;  | 
358  | 0  |         UChar32 c;  | 
359  | 0  |         if (U16_IS_LEAD(lead) && srcIndex < srcLength && U16_IS_TRAIL(trail = src[srcIndex])) { | 
360  | 0  |             c = U16_GET_SUPPLEMENTARY(lead, trail);  | 
361  | 0  |             ++srcIndex;  | 
362  | 0  |         } else { | 
363  | 0  |             c = lead;  | 
364  | 0  |         }  | 
365  | 0  |         csc->cpLimit = srcIndex;  | 
366  | 0  |         const UChar *s;  | 
367  | 0  |         c = ucase_toFullUpper(c, utf16_caseContextIterator, csc, &s, caseLocale);  | 
368  | 0  |         if (c >= 0) { | 
369  | 0  |             destIndex = appendUnchanged(dest, destIndex, destCapacity,  | 
370  | 0  |                                         src + prev, cpStart - prev, options, edits);  | 
371  | 0  |             if (destIndex >= 0) { | 
372  | 0  |                 destIndex = appendResult(dest, destIndex, destCapacity, c, s,  | 
373  | 0  |                                          srcIndex - cpStart, options, edits);  | 
374  | 0  |             }  | 
375  | 0  |             if (destIndex < 0) { | 
376  | 0  |                 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
377  | 0  |                 return 0;  | 
378  | 0  |             }  | 
379  | 0  |             prev = srcIndex;  | 
380  | 0  |         }  | 
381  | 0  |     }  | 
382  | 0  |     destIndex = appendUnchanged(dest, destIndex, destCapacity,  | 
383  | 0  |                                 src + prev, srcIndex - prev, options, edits);  | 
384  | 0  |     if (destIndex < 0) { | 
385  | 0  |         errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
386  | 0  |         return 0;  | 
387  | 0  |     }  | 
388  | 0  |     return destIndex;  | 
389  | 0  | }  | 
390  |  |  | 
391  |  | }  // namespace  | 
392  |  |  | 
393  |  | U_NAMESPACE_END  | 
394  |  |  | 
395  |  | U_NAMESPACE_USE  | 
396  |  |  | 
397  |  | #if !UCONFIG_NO_BREAK_ITERATION  | 
398  |  |  | 
399  |  | U_CFUNC int32_t U_CALLCONV  | 
400  |  | ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *iter,  | 
401  |  |                          UChar *dest, int32_t destCapacity,  | 
402  |  |                          const UChar *src, int32_t srcLength,  | 
403  |  |                          icu::Edits *edits,  | 
404  | 0  |                          UErrorCode &errorCode) { | 
405  | 0  |     if (!ustrcase_checkTitleAdjustmentOptions(options, errorCode)) { | 
406  | 0  |         return 0;  | 
407  | 0  |     }  | 
408  |  |  | 
409  |  |     /* set up local variables */  | 
410  | 0  |     UCaseContext csc=UCASECONTEXT_INITIALIZER;  | 
411  | 0  |     csc.p=(void *)src;  | 
412  | 0  |     csc.limit=srcLength;  | 
413  | 0  |     int32_t destIndex=0;  | 
414  | 0  |     int32_t prev=0;  | 
415  | 0  |     UBool isFirstIndex=TRUE;  | 
416  |  |  | 
417  |  |     /* titlecasing loop */  | 
418  | 0  |     while(prev<srcLength) { | 
419  |  |         /* find next index where to titlecase */  | 
420  | 0  |         int32_t index;  | 
421  | 0  |         if(isFirstIndex) { | 
422  | 0  |             isFirstIndex=FALSE;  | 
423  | 0  |             index=iter->first();  | 
424  | 0  |         } else { | 
425  | 0  |             index=iter->next();  | 
426  | 0  |         }  | 
427  | 0  |         if(index==UBRK_DONE || index>srcLength) { | 
428  | 0  |             index=srcLength;  | 
429  | 0  |         }  | 
430  |  |  | 
431  |  |         /*  | 
432  |  |          * Segment [prev..index[ into 3 parts:  | 
433  |  |          * a) skipped characters (copy as-is) [prev..titleStart[  | 
434  |  |          * b) first letter (titlecase)              [titleStart..titleLimit[  | 
435  |  |          * c) subsequent characters (lowercase)                 [titleLimit..index[  | 
436  |  |          */  | 
437  | 0  |         if(prev<index) { | 
438  |  |             // Find and copy skipped characters [prev..titleStart[  | 
439  | 0  |             int32_t titleStart=prev;  | 
440  | 0  |             int32_t titleLimit=prev;  | 
441  | 0  |             UChar32 c;  | 
442  | 0  |             U16_NEXT(src, titleLimit, index, c);  | 
443  | 0  |             if ((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0) { | 
444  |  |                 // Adjust the titlecasing index to the next cased character,  | 
445  |  |                 // or to the next letter/number/symbol/private use.  | 
446  |  |                 // Stop with titleStart<titleLimit<=index  | 
447  |  |                 // if there is a character to be titlecased,  | 
448  |  |                 // or else stop with titleStart==titleLimit==index.  | 
449  | 0  |                 UBool toCased = (options&U_TITLECASE_ADJUST_TO_CASED) != 0;  | 
450  | 0  |                 while (toCased ? UCASE_NONE==ucase_getType(c) : !ustrcase_isLNS(c)) { | 
451  | 0  |                     titleStart=titleLimit;  | 
452  | 0  |                     if(titleLimit==index) { | 
453  | 0  |                         break;  | 
454  | 0  |                     }  | 
455  | 0  |                     U16_NEXT(src, titleLimit, index, c);  | 
456  | 0  |                 }  | 
457  | 0  |                 if (prev < titleStart) { | 
458  | 0  |                     destIndex=appendUnchanged(dest, destIndex, destCapacity,  | 
459  | 0  |                                               src+prev, titleStart-prev, options, edits);  | 
460  | 0  |                     if(destIndex<0) { | 
461  | 0  |                         errorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
462  | 0  |                         return 0;  | 
463  | 0  |                     }  | 
464  | 0  |                 }  | 
465  | 0  |             }  | 
466  |  |  | 
467  | 0  |             if(titleStart<titleLimit) { | 
468  |  |                 /* titlecase c which is from [titleStart..titleLimit[ */  | 
469  | 0  |                 csc.cpStart=titleStart;  | 
470  | 0  |                 csc.cpLimit=titleLimit;  | 
471  | 0  |                 const UChar *s;  | 
472  | 0  |                 c=ucase_toFullTitle(c, utf16_caseContextIterator, &csc, &s, caseLocale);  | 
473  | 0  |                 destIndex=appendResult(dest, destIndex, destCapacity, c, s,  | 
474  | 0  |                                        titleLimit-titleStart, options, edits);  | 
475  | 0  |                 if(destIndex<0) { | 
476  | 0  |                     errorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
477  | 0  |                     return 0;  | 
478  | 0  |                 }  | 
479  |  |  | 
480  |  |                 /* Special case Dutch IJ titlecasing */  | 
481  | 0  |                 if (titleStart+1 < index &&  | 
482  | 0  |                         caseLocale == UCASE_LOC_DUTCH &&  | 
483  | 0  |                         (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) { | 
484  | 0  |                     if (src[titleStart+1] == 0x006A) { | 
485  | 0  |                         destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);  | 
486  | 0  |                         if(destIndex<0) { | 
487  | 0  |                             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
488  | 0  |                             return 0;  | 
489  | 0  |                         }  | 
490  | 0  |                         if(edits!=NULL) { | 
491  | 0  |                             edits->addReplace(1, 1);  | 
492  | 0  |                         }  | 
493  | 0  |                         titleLimit++;  | 
494  | 0  |                     } else if (src[titleStart+1] == 0x004A) { | 
495  |  |                         // Keep the capital J from getting lowercased.  | 
496  | 0  |                         destIndex=appendUnchanged(dest, destIndex, destCapacity,  | 
497  | 0  |                                                   src+titleStart+1, 1, options, edits);  | 
498  | 0  |                         if(destIndex<0) { | 
499  | 0  |                             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
500  | 0  |                             return 0;  | 
501  | 0  |                         }  | 
502  | 0  |                         titleLimit++;  | 
503  | 0  |                     }  | 
504  | 0  |                 }  | 
505  |  |  | 
506  |  |                 /* lowercase [titleLimit..index[ */  | 
507  | 0  |                 if(titleLimit<index) { | 
508  | 0  |                     if((options&U_TITLECASE_NO_LOWERCASE)==0) { | 
509  |  |                         /* Normal operation: Lowercase the rest of the word. */  | 
510  | 0  |                         destIndex+=  | 
511  | 0  |                             toLower(  | 
512  | 0  |                                 caseLocale, options,  | 
513  | 0  |                                 dest+destIndex, destCapacity-destIndex,  | 
514  | 0  |                                 src, &csc, titleLimit, index,  | 
515  | 0  |                                 edits, errorCode);  | 
516  | 0  |                         if(errorCode==U_BUFFER_OVERFLOW_ERROR) { | 
517  | 0  |                             errorCode=U_ZERO_ERROR;  | 
518  | 0  |                         }  | 
519  | 0  |                         if(U_FAILURE(errorCode)) { | 
520  | 0  |                             return destIndex;  | 
521  | 0  |                         }  | 
522  | 0  |                     } else { | 
523  |  |                         /* Optionally just copy the rest of the word unchanged. */  | 
524  | 0  |                         destIndex=appendUnchanged(dest, destIndex, destCapacity,  | 
525  | 0  |                                                   src+titleLimit, index-titleLimit, options, edits);  | 
526  | 0  |                         if(destIndex<0) { | 
527  | 0  |                             errorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
528  | 0  |                             return 0;  | 
529  | 0  |                         }  | 
530  | 0  |                     }  | 
531  | 0  |                 }  | 
532  | 0  |             }  | 
533  | 0  |         }  | 
534  |  |  | 
535  | 0  |         prev=index;  | 
536  | 0  |     }  | 
537  |  |  | 
538  | 0  |     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);  | 
539  | 0  | }  | 
540  |  |  | 
541  |  | #endif  // !UCONFIG_NO_BREAK_ITERATION  | 
542  |  |  | 
543  |  | U_NAMESPACE_BEGIN  | 
544  |  | namespace GreekUpper { | 
545  |  |  | 
546  |  | // Data generated by prototype code, see  | 
547  |  | // http://site.icu-project.org/design/case/greek-upper  | 
548  |  | // TODO: Move this data into ucase.icu.  | 
549  |  | static const uint16_t data0370[] = { | 
550  |  |     // U+0370..03FF  | 
551  |  |     0x0370,  | 
552  |  |     0x0370,  | 
553  |  |     0x0372,  | 
554  |  |     0x0372,  | 
555  |  |     0,  | 
556  |  |     0,  | 
557  |  |     0x0376,  | 
558  |  |     0x0376,  | 
559  |  |     0,  | 
560  |  |     0,  | 
561  |  |     0x037A,  | 
562  |  |     0x03FD,  | 
563  |  |     0x03FE,  | 
564  |  |     0x03FF,  | 
565  |  |     0,  | 
566  |  |     0x037F,  | 
567  |  |     0,  | 
568  |  |     0,  | 
569  |  |     0,  | 
570  |  |     0,  | 
571  |  |     0,  | 
572  |  |     0,  | 
573  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
574  |  |     0,  | 
575  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
576  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
577  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
578  |  |     0,  | 
579  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
580  |  |     0,  | 
581  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
582  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
583  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  | 
584  |  |     0x0391 | HAS_VOWEL,  | 
585  |  |     0x0392,  | 
586  |  |     0x0393,  | 
587  |  |     0x0394,  | 
588  |  |     0x0395 | HAS_VOWEL,  | 
589  |  |     0x0396,  | 
590  |  |     0x0397 | HAS_VOWEL,  | 
591  |  |     0x0398,  | 
592  |  |     0x0399 | HAS_VOWEL,  | 
593  |  |     0x039A,  | 
594  |  |     0x039B,  | 
595  |  |     0x039C,  | 
596  |  |     0x039D,  | 
597  |  |     0x039E,  | 
598  |  |     0x039F | HAS_VOWEL,  | 
599  |  |     0x03A0,  | 
600  |  |     0x03A1,  | 
601  |  |     0,  | 
602  |  |     0x03A3,  | 
603  |  |     0x03A4,  | 
604  |  |     0x03A5 | HAS_VOWEL,  | 
605  |  |     0x03A6,  | 
606  |  |     0x03A7,  | 
607  |  |     0x03A8,  | 
608  |  |     0x03A9 | HAS_VOWEL,  | 
609  |  |     0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  | 
610  |  |     0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  | 
611  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
612  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
613  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
614  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
615  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  | 
616  |  |     0x0391 | HAS_VOWEL,  | 
617  |  |     0x0392,  | 
618  |  |     0x0393,  | 
619  |  |     0x0394,  | 
620  |  |     0x0395 | HAS_VOWEL,  | 
621  |  |     0x0396,  | 
622  |  |     0x0397 | HAS_VOWEL,  | 
623  |  |     0x0398,  | 
624  |  |     0x0399 | HAS_VOWEL,  | 
625  |  |     0x039A,  | 
626  |  |     0x039B,  | 
627  |  |     0x039C,  | 
628  |  |     0x039D,  | 
629  |  |     0x039E,  | 
630  |  |     0x039F | HAS_VOWEL,  | 
631  |  |     0x03A0,  | 
632  |  |     0x03A1,  | 
633  |  |     0x03A3,  | 
634  |  |     0x03A3,  | 
635  |  |     0x03A4,  | 
636  |  |     0x03A5 | HAS_VOWEL,  | 
637  |  |     0x03A6,  | 
638  |  |     0x03A7,  | 
639  |  |     0x03A8,  | 
640  |  |     0x03A9 | HAS_VOWEL,  | 
641  |  |     0x0399 | HAS_VOWEL | HAS_DIALYTIKA,  | 
642  |  |     0x03A5 | HAS_VOWEL | HAS_DIALYTIKA,  | 
643  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
644  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
645  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
646  |  |     0x03CF,  | 
647  |  |     0x0392,  | 
648  |  |     0x0398,  | 
649  |  |     0x03D2,  | 
650  |  |     0x03D2 | HAS_ACCENT,  | 
651  |  |     0x03D2 | HAS_DIALYTIKA,  | 
652  |  |     0x03A6,  | 
653  |  |     0x03A0,  | 
654  |  |     0x03CF,  | 
655  |  |     0x03D8,  | 
656  |  |     0x03D8,  | 
657  |  |     0x03DA,  | 
658  |  |     0x03DA,  | 
659  |  |     0x03DC,  | 
660  |  |     0x03DC,  | 
661  |  |     0x03DE,  | 
662  |  |     0x03DE,  | 
663  |  |     0x03E0,  | 
664  |  |     0x03E0,  | 
665  |  |     0,  | 
666  |  |     0,  | 
667  |  |     0,  | 
668  |  |     0,  | 
669  |  |     0,  | 
670  |  |     0,  | 
671  |  |     0,  | 
672  |  |     0,  | 
673  |  |     0,  | 
674  |  |     0,  | 
675  |  |     0,  | 
676  |  |     0,  | 
677  |  |     0,  | 
678  |  |     0,  | 
679  |  |     0x039A,  | 
680  |  |     0x03A1,  | 
681  |  |     0x03F9,  | 
682  |  |     0x037F,  | 
683  |  |     0x03F4,  | 
684  |  |     0x0395 | HAS_VOWEL,  | 
685  |  |     0,  | 
686  |  |     0x03F7,  | 
687  |  |     0x03F7,  | 
688  |  |     0x03F9,  | 
689  |  |     0x03FA,  | 
690  |  |     0x03FA,  | 
691  |  |     0x03FC,  | 
692  |  |     0x03FD,  | 
693  |  |     0x03FE,  | 
694  |  |     0x03FF,  | 
695  |  | };  | 
696  |  |  | 
697  |  | static const uint16_t data1F00[] = { | 
698  |  |     // U+1F00..1FFF  | 
699  |  |     0x0391 | HAS_VOWEL,  | 
700  |  |     0x0391 | HAS_VOWEL,  | 
701  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
702  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
703  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
704  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
705  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
706  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
707  |  |     0x0391 | HAS_VOWEL,  | 
708  |  |     0x0391 | HAS_VOWEL,  | 
709  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
710  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
711  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
712  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
713  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
714  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
715  |  |     0x0395 | HAS_VOWEL,  | 
716  |  |     0x0395 | HAS_VOWEL,  | 
717  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
718  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
719  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
720  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
721  |  |     0,  | 
722  |  |     0,  | 
723  |  |     0x0395 | HAS_VOWEL,  | 
724  |  |     0x0395 | HAS_VOWEL,  | 
725  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
726  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
727  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
728  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
729  |  |     0,  | 
730  |  |     0,  | 
731  |  |     0x0397 | HAS_VOWEL,  | 
732  |  |     0x0397 | HAS_VOWEL,  | 
733  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
734  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
735  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
736  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
737  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
738  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
739  |  |     0x0397 | HAS_VOWEL,  | 
740  |  |     0x0397 | HAS_VOWEL,  | 
741  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
742  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
743  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
744  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
745  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
746  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
747  |  |     0x0399 | HAS_VOWEL,  | 
748  |  |     0x0399 | HAS_VOWEL,  | 
749  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
750  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
751  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
752  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
753  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
754  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
755  |  |     0x0399 | HAS_VOWEL,  | 
756  |  |     0x0399 | HAS_VOWEL,  | 
757  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
758  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
759  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
760  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
761  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
762  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
763  |  |     0x039F | HAS_VOWEL,  | 
764  |  |     0x039F | HAS_VOWEL,  | 
765  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
766  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
767  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
768  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
769  |  |     0,  | 
770  |  |     0,  | 
771  |  |     0x039F | HAS_VOWEL,  | 
772  |  |     0x039F | HAS_VOWEL,  | 
773  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
774  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
775  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
776  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
777  |  |     0,  | 
778  |  |     0,  | 
779  |  |     0x03A5 | HAS_VOWEL,  | 
780  |  |     0x03A5 | HAS_VOWEL,  | 
781  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
782  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
783  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
784  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
785  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
786  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
787  |  |     0,  | 
788  |  |     0x03A5 | HAS_VOWEL,  | 
789  |  |     0,  | 
790  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
791  |  |     0,  | 
792  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
793  |  |     0,  | 
794  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
795  |  |     0x03A9 | HAS_VOWEL,  | 
796  |  |     0x03A9 | HAS_VOWEL,  | 
797  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
798  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
799  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
800  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
801  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
802  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
803  |  |     0x03A9 | HAS_VOWEL,  | 
804  |  |     0x03A9 | HAS_VOWEL,  | 
805  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
806  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
807  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
808  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
809  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
810  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
811  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
812  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
813  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
814  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
815  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
816  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
817  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
818  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
819  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
820  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
821  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
822  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
823  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
824  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
825  |  |     0,  | 
826  |  |     0,  | 
827  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
828  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
829  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
830  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
831  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
832  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
833  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
834  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
835  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
836  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
837  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
838  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
839  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
840  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
841  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
842  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
843  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
844  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
845  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
846  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
847  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
848  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
849  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
850  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
851  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
852  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
853  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
854  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
855  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
856  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
857  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
858  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
859  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
860  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
861  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
862  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
863  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
864  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
865  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
866  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
867  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
868  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
869  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
870  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
871  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
872  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
873  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
874  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
875  |  |     0x0391 | HAS_VOWEL,  | 
876  |  |     0x0391 | HAS_VOWEL,  | 
877  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
878  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
879  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
880  |  |     0,  | 
881  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
882  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
883  |  |     0x0391 | HAS_VOWEL,  | 
884  |  |     0x0391 | HAS_VOWEL,  | 
885  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
886  |  |     0x0391 | HAS_VOWEL | HAS_ACCENT,  | 
887  |  |     0x0391 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
888  |  |     0,  | 
889  |  |     0x0399 | HAS_VOWEL,  | 
890  |  |     0,  | 
891  |  |     0,  | 
892  |  |     0,  | 
893  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
894  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
895  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
896  |  |     0,  | 
897  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
898  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
899  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
900  |  |     0x0395 | HAS_VOWEL | HAS_ACCENT,  | 
901  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
902  |  |     0x0397 | HAS_VOWEL | HAS_ACCENT,  | 
903  |  |     0x0397 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
904  |  |     0,  | 
905  |  |     0,  | 
906  |  |     0,  | 
907  |  |     0x0399 | HAS_VOWEL,  | 
908  |  |     0x0399 | HAS_VOWEL,  | 
909  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  | 
910  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  | 
911  |  |     0,  | 
912  |  |     0,  | 
913  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
914  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  | 
915  |  |     0x0399 | HAS_VOWEL,  | 
916  |  |     0x0399 | HAS_VOWEL,  | 
917  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
918  |  |     0x0399 | HAS_VOWEL | HAS_ACCENT,  | 
919  |  |     0,  | 
920  |  |     0,  | 
921  |  |     0,  | 
922  |  |     0,  | 
923  |  |     0x03A5 | HAS_VOWEL,  | 
924  |  |     0x03A5 | HAS_VOWEL,  | 
925  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  | 
926  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  | 
927  |  |     0x03A1,  | 
928  |  |     0x03A1,  | 
929  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
930  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT | HAS_DIALYTIKA,  | 
931  |  |     0x03A5 | HAS_VOWEL,  | 
932  |  |     0x03A5 | HAS_VOWEL,  | 
933  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
934  |  |     0x03A5 | HAS_VOWEL | HAS_ACCENT,  | 
935  |  |     0x03A1,  | 
936  |  |     0,  | 
937  |  |     0,  | 
938  |  |     0,  | 
939  |  |     0,  | 
940  |  |     0,  | 
941  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
942  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
943  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
944  |  |     0,  | 
945  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
946  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI | HAS_ACCENT,  | 
947  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
948  |  |     0x039F | HAS_VOWEL | HAS_ACCENT,  | 
949  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
950  |  |     0x03A9 | HAS_VOWEL | HAS_ACCENT,  | 
951  |  |     0x03A9 | HAS_VOWEL | HAS_YPOGEGRAMMENI,  | 
952  |  |     0,  | 
953  |  |     0,  | 
954  |  |     0,  | 
955  |  | };  | 
956  |  |  | 
957  |  | // U+2126 Ohm sign  | 
958  |  | static const uint16_t data2126 = 0x03A9 | HAS_VOWEL;  | 
959  |  |  | 
960  | 0  | uint32_t getLetterData(UChar32 c) { | 
961  | 0  |     if (c < 0x370 || 0x2126 < c || (0x3ff < c && c < 0x1f00)) { | 
962  | 0  |         return 0;  | 
963  | 0  |     } else if (c <= 0x3ff) { | 
964  | 0  |         return data0370[c - 0x370];  | 
965  | 0  |     } else if (c <= 0x1fff) { | 
966  | 0  |         return data1F00[c - 0x1f00];  | 
967  | 0  |     } else if (c == 0x2126) { | 
968  | 0  |         return data2126;  | 
969  | 0  |     } else { | 
970  | 0  |         return 0;  | 
971  | 0  |     }  | 
972  | 0  | }  | 
973  |  |  | 
974  | 0  | uint32_t getDiacriticData(UChar32 c) { | 
975  | 0  |     switch (c) { | 
976  | 0  |     case 0x0300:  // varia  | 
977  | 0  |     case 0x0301:  // tonos = oxia  | 
978  | 0  |     case 0x0342:  // perispomeni  | 
979  | 0  |     case 0x0302:  // circumflex can look like perispomeni  | 
980  | 0  |     case 0x0303:  // tilde can look like perispomeni  | 
981  | 0  |     case 0x0311:  // inverted breve can look like perispomeni  | 
982  | 0  |         return HAS_ACCENT;  | 
983  | 0  |     case 0x0308:  // dialytika = diaeresis  | 
984  | 0  |         return HAS_COMBINING_DIALYTIKA;  | 
985  | 0  |     case 0x0344:  // dialytika tonos  | 
986  | 0  |         return HAS_COMBINING_DIALYTIKA | HAS_ACCENT;  | 
987  | 0  |     case 0x0345:  // ypogegrammeni = iota subscript  | 
988  | 0  |         return HAS_YPOGEGRAMMENI;  | 
989  | 0  |     case 0x0304:  // macron  | 
990  | 0  |     case 0x0306:  // breve  | 
991  | 0  |     case 0x0313:  // comma above  | 
992  | 0  |     case 0x0314:  // reversed comma above  | 
993  | 0  |     case 0x0343:  // koronis  | 
994  | 0  |         return HAS_OTHER_GREEK_DIACRITIC;  | 
995  | 0  |     default:  | 
996  | 0  |         return 0;  | 
997  | 0  |     }  | 
998  | 0  | }  | 
999  |  |  | 
1000  | 0  | UBool isFollowedByCasedLetter(const UChar *s, int32_t i, int32_t length) { | 
1001  | 0  |     while (i < length) { | 
1002  | 0  |         UChar32 c;  | 
1003  | 0  |         U16_NEXT(s, i, length, c);  | 
1004  | 0  |         int32_t type = ucase_getTypeOrIgnorable(c);  | 
1005  | 0  |         if ((type & UCASE_IGNORABLE) != 0) { | 
1006  |  |             // Case-ignorable, continue with the loop.  | 
1007  | 0  |         } else if (type != UCASE_NONE) { | 
1008  | 0  |             return TRUE;  // Followed by cased letter.  | 
1009  | 0  |         } else { | 
1010  | 0  |             return FALSE;  // Uncased and not case-ignorable.  | 
1011  | 0  |         }  | 
1012  | 0  |     }  | 
1013  | 0  |     return FALSE;  // Not followed by cased letter.  | 
1014  | 0  | }  | 
1015  |  |  | 
1016  |  | /**  | 
1017  |  |  * Greek string uppercasing with a state machine.  | 
1018  |  |  * Probably simpler than a stateless function that has to figure out complex context-before  | 
1019  |  |  * for each character.  | 
1020  |  |  * TODO: Try to re-consolidate one way or another with the non-Greek function.  | 
1021  |  |  */  | 
1022  |  | int32_t toUpper(uint32_t options,  | 
1023  |  |                 UChar *dest, int32_t destCapacity,  | 
1024  |  |                 const UChar *src, int32_t srcLength,  | 
1025  |  |                 Edits *edits,  | 
1026  | 0  |                 UErrorCode &errorCode) { | 
1027  | 0  |     int32_t destIndex=0;  | 
1028  | 0  |     uint32_t state = 0;  | 
1029  | 0  |     for (int32_t i = 0; i < srcLength;) { | 
1030  | 0  |         int32_t nextIndex = i;  | 
1031  | 0  |         UChar32 c;  | 
1032  | 0  |         U16_NEXT(src, nextIndex, srcLength, c);  | 
1033  | 0  |         uint32_t nextState = 0;  | 
1034  | 0  |         int32_t type = ucase_getTypeOrIgnorable(c);  | 
1035  | 0  |         if ((type & UCASE_IGNORABLE) != 0) { | 
1036  |  |             // c is case-ignorable  | 
1037  | 0  |             nextState |= (state & AFTER_CASED);  | 
1038  | 0  |         } else if (type != UCASE_NONE) { | 
1039  |  |             // c is cased  | 
1040  | 0  |             nextState |= AFTER_CASED;  | 
1041  | 0  |         }  | 
1042  | 0  |         uint32_t data = getLetterData(c);  | 
1043  | 0  |         if (data > 0) { | 
1044  | 0  |             uint32_t upper = data & UPPER_MASK;  | 
1045  |  |             // Add a dialytika to this iota or ypsilon vowel  | 
1046  |  |             // if we removed a tonos from the previous vowel,  | 
1047  |  |             // and that previous vowel did not also have (or gain) a dialytika.  | 
1048  |  |             // Adding one only to the final vowel in a longer sequence  | 
1049  |  |             // (which does not occur in normal writing) would require lookahead.  | 
1050  |  |             // Set the same flag as for preserving an existing dialytika.  | 
1051  | 0  |             if ((data & HAS_VOWEL) != 0 && (state & AFTER_VOWEL_WITH_ACCENT) != 0 &&  | 
1052  | 0  |                     (upper == 0x399 || upper == 0x3A5)) { | 
1053  | 0  |                 data |= HAS_DIALYTIKA;  | 
1054  | 0  |             }  | 
1055  | 0  |             int32_t numYpogegrammeni = 0;  // Map each one to a trailing, spacing, capital iota.  | 
1056  | 0  |             if ((data & HAS_YPOGEGRAMMENI) != 0) { | 
1057  | 0  |                 numYpogegrammeni = 1;  | 
1058  | 0  |             }  | 
1059  |  |             // Skip combining diacritics after this Greek letter.  | 
1060  | 0  |             while (nextIndex < srcLength) { | 
1061  | 0  |                 uint32_t diacriticData = getDiacriticData(src[nextIndex]);  | 
1062  | 0  |                 if (diacriticData != 0) { | 
1063  | 0  |                     data |= diacriticData;  | 
1064  | 0  |                     if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { | 
1065  | 0  |                         ++numYpogegrammeni;  | 
1066  | 0  |                     }  | 
1067  | 0  |                     ++nextIndex;  | 
1068  | 0  |                 } else { | 
1069  | 0  |                     break;  // not a Greek diacritic  | 
1070  | 0  |                 }  | 
1071  | 0  |             }  | 
1072  | 0  |             if ((data & HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA) == HAS_VOWEL_AND_ACCENT) { | 
1073  | 0  |                 nextState |= AFTER_VOWEL_WITH_ACCENT;  | 
1074  | 0  |             }  | 
1075  |  |             // Map according to Greek rules.  | 
1076  | 0  |             UBool addTonos = FALSE;  | 
1077  | 0  |             if (upper == 0x397 &&  | 
1078  | 0  |                     (data & HAS_ACCENT) != 0 &&  | 
1079  | 0  |                     numYpogegrammeni == 0 &&  | 
1080  | 0  |                     (state & AFTER_CASED) == 0 &&  | 
1081  | 0  |                     !isFollowedByCasedLetter(src, nextIndex, srcLength)) { | 
1082  |  |                 // Keep disjunctive "or" with (only) a tonos.  | 
1083  |  |                 // We use the same "word boundary" conditions as for the Final_Sigma test.  | 
1084  | 0  |                 if (i == nextIndex) { | 
1085  | 0  |                     upper = 0x389;  // Preserve the precomposed form.  | 
1086  | 0  |                 } else { | 
1087  | 0  |                     addTonos = TRUE;  | 
1088  | 0  |                 }  | 
1089  | 0  |             } else if ((data & HAS_DIALYTIKA) != 0) { | 
1090  |  |                 // Preserve a vowel with dialytika in precomposed form if it exists.  | 
1091  | 0  |                 if (upper == 0x399) { | 
1092  | 0  |                     upper = 0x3AA;  | 
1093  | 0  |                     data &= ~HAS_EITHER_DIALYTIKA;  | 
1094  | 0  |                 } else if (upper == 0x3A5) { | 
1095  | 0  |                     upper = 0x3AB;  | 
1096  | 0  |                     data &= ~HAS_EITHER_DIALYTIKA;  | 
1097  | 0  |                 }  | 
1098  | 0  |             }  | 
1099  |  | 
  | 
1100  | 0  |             UBool change;  | 
1101  | 0  |             if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) { | 
1102  | 0  |                 change = TRUE;  // common, simple usage  | 
1103  | 0  |             } else { | 
1104  |  |                 // Find out first whether we are changing the text.  | 
1105  | 0  |                 change = src[i] != upper || numYpogegrammeni > 0;  | 
1106  | 0  |                 int32_t i2 = i + 1;  | 
1107  | 0  |                 if ((data & HAS_EITHER_DIALYTIKA) != 0) { | 
1108  | 0  |                     change |= i2 >= nextIndex || src[i2] != 0x308;  | 
1109  | 0  |                     ++i2;  | 
1110  | 0  |                 }  | 
1111  | 0  |                 if (addTonos) { | 
1112  | 0  |                     change |= i2 >= nextIndex || src[i2] != 0x301;  | 
1113  | 0  |                     ++i2;  | 
1114  | 0  |                 }  | 
1115  | 0  |                 int32_t oldLength = nextIndex - i;  | 
1116  | 0  |                 int32_t newLength = (i2 - i) + numYpogegrammeni;  | 
1117  | 0  |                 change |= oldLength != newLength;  | 
1118  | 0  |                 if (change) { | 
1119  | 0  |                     if (edits != NULL) { | 
1120  | 0  |                         edits->addReplace(oldLength, newLength);  | 
1121  | 0  |                     }  | 
1122  | 0  |                 } else { | 
1123  | 0  |                     if (edits != NULL) { | 
1124  | 0  |                         edits->addUnchanged(oldLength);  | 
1125  | 0  |                     }  | 
1126  |  |                     // Write unchanged text?  | 
1127  | 0  |                     change = (options & U_OMIT_UNCHANGED_TEXT) == 0;  | 
1128  | 0  |                 }  | 
1129  | 0  |             }  | 
1130  |  | 
  | 
1131  | 0  |             if (change) { | 
1132  | 0  |                 destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);  | 
1133  | 0  |                 if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) { | 
1134  | 0  |                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika  | 
1135  | 0  |                 }  | 
1136  | 0  |                 if (destIndex >= 0 && addTonos) { | 
1137  | 0  |                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);  | 
1138  | 0  |                 }  | 
1139  | 0  |                 while (destIndex >= 0 && numYpogegrammeni > 0) { | 
1140  | 0  |                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);  | 
1141  | 0  |                     --numYpogegrammeni;  | 
1142  | 0  |                 }  | 
1143  | 0  |                 if(destIndex<0) { | 
1144  | 0  |                     errorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
1145  | 0  |                     return 0;  | 
1146  | 0  |                 }  | 
1147  | 0  |             }  | 
1148  | 0  |         } else { | 
1149  | 0  |             const UChar *s;  | 
1150  | 0  |             c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);  | 
1151  | 0  |             destIndex = appendResult(dest, destIndex, destCapacity, c, s,  | 
1152  | 0  |                                      nextIndex - i, options, edits);  | 
1153  | 0  |             if (destIndex < 0) { | 
1154  | 0  |                 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;  | 
1155  | 0  |                 return 0;  | 
1156  | 0  |             }  | 
1157  | 0  |         }  | 
1158  | 0  |         i = nextIndex;  | 
1159  | 0  |         state = nextState;  | 
1160  | 0  |     }  | 
1161  |  |  | 
1162  | 0  |     return destIndex;  | 
1163  | 0  | }  | 
1164  |  |  | 
1165  |  | }  // namespace GreekUpper  | 
1166  |  | U_NAMESPACE_END  | 
1167  |  |  | 
1168  |  | /* functions available in the common library (for unistr_case.cpp) */  | 
1169  |  |  | 
1170  |  | U_CFUNC int32_t U_CALLCONV  | 
1171  |  | ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED  | 
1172  |  |                          UChar *dest, int32_t destCapacity,  | 
1173  |  |                          const UChar *src, int32_t srcLength,  | 
1174  |  |                          icu::Edits *edits,  | 
1175  | 0  |                          UErrorCode &errorCode) { | 
1176  | 0  |     UCaseContext csc=UCASECONTEXT_INITIALIZER;  | 
1177  | 0  |     csc.p=(void *)src;  | 
1178  | 0  |     csc.limit=srcLength;  | 
1179  | 0  |     int32_t destIndex = toLower(  | 
1180  | 0  |         caseLocale, options,  | 
1181  | 0  |         dest, destCapacity,  | 
1182  | 0  |         src, &csc, 0, srcLength,  | 
1183  | 0  |         edits, errorCode);  | 
1184  | 0  |     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);  | 
1185  | 0  | }  | 
1186  |  |  | 
1187  |  | U_CFUNC int32_t U_CALLCONV  | 
1188  |  | ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED  | 
1189  |  |                          UChar *dest, int32_t destCapacity,  | 
1190  |  |                          const UChar *src, int32_t srcLength,  | 
1191  |  |                          icu::Edits *edits,  | 
1192  | 0  |                          UErrorCode &errorCode) { | 
1193  | 0  |     int32_t destIndex;  | 
1194  | 0  |     if (caseLocale == UCASE_LOC_GREEK) { | 
1195  | 0  |         destIndex = GreekUpper::toUpper(options, dest, destCapacity,  | 
1196  | 0  |                                         src, srcLength, edits, errorCode);  | 
1197  | 0  |     } else { | 
1198  | 0  |         UCaseContext csc=UCASECONTEXT_INITIALIZER;  | 
1199  | 0  |         csc.p=(void *)src;  | 
1200  | 0  |         csc.limit=srcLength;  | 
1201  | 0  |         destIndex = toUpper(  | 
1202  | 0  |             caseLocale, options,  | 
1203  | 0  |             dest, destCapacity,  | 
1204  | 0  |             src, &csc, srcLength,  | 
1205  | 0  |             edits, errorCode);  | 
1206  | 0  |     }  | 
1207  | 0  |     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);  | 
1208  | 0  | }  | 
1209  |  |  | 
1210  |  | U_CFUNC int32_t U_CALLCONV  | 
1211  |  | ustrcase_internalFold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED  | 
1212  |  |                       UChar *dest, int32_t destCapacity,  | 
1213  |  |                       const UChar *src, int32_t srcLength,  | 
1214  |  |                       icu::Edits *edits,  | 
1215  | 0  |                       UErrorCode &errorCode) { | 
1216  | 0  |     int32_t destIndex = toLower(  | 
1217  | 0  |         -1, options,  | 
1218  | 0  |         dest, destCapacity,  | 
1219  | 0  |         src, nullptr, 0, srcLength,  | 
1220  | 0  |         edits, errorCode);  | 
1221  | 0  |     return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);  | 
1222  | 0  | }  | 
1223  |  |  | 
1224  |  | U_CFUNC int32_t  | 
1225  |  | ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM  | 
1226  |  |              UChar *dest, int32_t destCapacity,  | 
1227  |  |              const UChar *src, int32_t srcLength,  | 
1228  |  |              UStringCaseMapper *stringCaseMapper,  | 
1229  |  |              icu::Edits *edits,  | 
1230  | 0  |              UErrorCode &errorCode) { | 
1231  | 0  |     int32_t destLength;  | 
1232  |  |  | 
1233  |  |     /* check argument values */  | 
1234  | 0  |     if(U_FAILURE(errorCode)) { | 
1235  | 0  |         return 0;  | 
1236  | 0  |     }  | 
1237  | 0  |     if( destCapacity<0 ||  | 
1238  | 0  |         (dest==NULL && destCapacity>0) ||  | 
1239  | 0  |         src==NULL ||  | 
1240  | 0  |         srcLength<-1  | 
1241  | 0  |     ) { | 
1242  | 0  |         errorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1243  | 0  |         return 0;  | 
1244  | 0  |     }  | 
1245  |  |  | 
1246  |  |     /* get the string length */  | 
1247  | 0  |     if(srcLength==-1) { | 
1248  | 0  |         srcLength=u_strlen(src);  | 
1249  | 0  |     }  | 
1250  |  |  | 
1251  |  |     /* check for overlapping source and destination */  | 
1252  | 0  |     if( dest!=NULL &&  | 
1253  | 0  |         ((src>=dest && src<(dest+destCapacity)) ||  | 
1254  | 0  |          (dest>=src && dest<(src+srcLength)))  | 
1255  | 0  |     ) { | 
1256  | 0  |         errorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1257  | 0  |         return 0;  | 
1258  | 0  |     }  | 
1259  |  |  | 
1260  | 0  |     if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { | 
1261  | 0  |         edits->reset();  | 
1262  | 0  |     }  | 
1263  | 0  |     destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR  | 
1264  | 0  |                                 dest, destCapacity, src, srcLength, edits, errorCode);  | 
1265  | 0  |     return u_terminateUChars(dest, destCapacity, destLength, &errorCode);  | 
1266  | 0  | }  | 
1267  |  |  | 
1268  |  | U_CFUNC int32_t  | 
1269  |  | ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM  | 
1270  |  |                         UChar *dest, int32_t destCapacity,  | 
1271  |  |                         const UChar *src, int32_t srcLength,  | 
1272  |  |                         UStringCaseMapper *stringCaseMapper,  | 
1273  | 0  |                         UErrorCode &errorCode) { | 
1274  | 0  |     UChar buffer[300];  | 
1275  | 0  |     UChar *temp;  | 
1276  |  | 
  | 
1277  | 0  |     int32_t destLength;  | 
1278  |  |  | 
1279  |  |     /* check argument values */  | 
1280  | 0  |     if(U_FAILURE(errorCode)) { | 
1281  | 0  |         return 0;  | 
1282  | 0  |     }  | 
1283  | 0  |     if( destCapacity<0 ||  | 
1284  | 0  |         (dest==NULL && destCapacity>0) ||  | 
1285  | 0  |         src==NULL ||  | 
1286  | 0  |         srcLength<-1  | 
1287  | 0  |     ) { | 
1288  | 0  |         errorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1289  | 0  |         return 0;  | 
1290  | 0  |     }  | 
1291  |  |  | 
1292  |  |     /* get the string length */  | 
1293  | 0  |     if(srcLength==-1) { | 
1294  | 0  |         srcLength=u_strlen(src);  | 
1295  | 0  |     }  | 
1296  |  |  | 
1297  |  |     /* check for overlapping source and destination */  | 
1298  | 0  |     if( dest!=NULL &&  | 
1299  | 0  |         ((src>=dest && src<(dest+destCapacity)) ||  | 
1300  | 0  |          (dest>=src && dest<(src+srcLength)))  | 
1301  | 0  |     ) { | 
1302  |  |         /* overlap: provide a temporary destination buffer and later copy the result */  | 
1303  | 0  |         if(destCapacity<=UPRV_LENGTHOF(buffer)) { | 
1304  |  |             /* the stack buffer is large enough */  | 
1305  | 0  |             temp=buffer;  | 
1306  | 0  |         } else { | 
1307  |  |             /* allocate a buffer */  | 
1308  | 0  |             temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);  | 
1309  | 0  |             if(temp==NULL) { | 
1310  | 0  |                 errorCode=U_MEMORY_ALLOCATION_ERROR;  | 
1311  | 0  |                 return 0;  | 
1312  | 0  |             }  | 
1313  | 0  |         }  | 
1314  | 0  |     } else { | 
1315  | 0  |         temp=dest;  | 
1316  | 0  |     }  | 
1317  |  |  | 
1318  | 0  |     destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR  | 
1319  | 0  |                                 temp, destCapacity, src, srcLength, NULL, errorCode);  | 
1320  | 0  |     if(temp!=dest) { | 
1321  |  |         /* copy the result string to the destination buffer */  | 
1322  | 0  |         if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) { | 
1323  | 0  |             u_memmove(dest, temp, destLength);  | 
1324  | 0  |         }  | 
1325  | 0  |         if(temp!=buffer) { | 
1326  | 0  |             uprv_free(temp);  | 
1327  | 0  |         }  | 
1328  | 0  |     }  | 
1329  |  | 
  | 
1330  | 0  |     return u_terminateUChars(dest, destCapacity, destLength, &errorCode);  | 
1331  | 0  | }  | 
1332  |  |  | 
1333  |  | /* public API functions */  | 
1334  |  |  | 
1335  |  | U_CAPI int32_t U_EXPORT2  | 
1336  |  | u_strFoldCase(UChar *dest, int32_t destCapacity,  | 
1337  |  |               const UChar *src, int32_t srcLength,  | 
1338  |  |               uint32_t options,  | 
1339  | 0  |               UErrorCode *pErrorCode) { | 
1340  | 0  |     return ustrcase_mapWithOverlap(  | 
1341  | 0  |         UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL  | 
1342  | 0  |         dest, destCapacity,  | 
1343  | 0  |         src, srcLength,  | 
1344  | 0  |         ustrcase_internalFold, *pErrorCode);  | 
1345  | 0  | }  | 
1346  |  |  | 
1347  |  | U_NAMESPACE_BEGIN  | 
1348  |  |  | 
1349  |  | int32_t CaseMap::fold(  | 
1350  |  |         uint32_t options,  | 
1351  |  |         const UChar *src, int32_t srcLength,  | 
1352  |  |         UChar *dest, int32_t destCapacity, Edits *edits,  | 
1353  | 0  |         UErrorCode &errorCode) { | 
1354  | 0  |     return ustrcase_map(  | 
1355  | 0  |         UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL  | 
1356  | 0  |         dest, destCapacity,  | 
1357  | 0  |         src, srcLength,  | 
1358  | 0  |         ustrcase_internalFold, edits, errorCode);  | 
1359  | 0  | }  | 
1360  |  |  | 
1361  |  | U_NAMESPACE_END  | 
1362  |  |  | 
1363  |  | /* case-insensitive string comparisons -------------------------------------- */  | 
1364  |  |  | 
1365  |  | /*  | 
1366  |  |  * This function is a copy of unorm_cmpEquivFold() minus the parts for  | 
1367  |  |  * canonical equivalence.  | 
1368  |  |  * Keep the functions in sync, and see there for how this works.  | 
1369  |  |  * The duplication is for modularization:  | 
1370  |  |  * It makes caseless (but not canonical caseless) matches independent of  | 
1371  |  |  * the normalization code.  | 
1372  |  |  */  | 
1373  |  |  | 
1374  |  | /* stack element for previous-level source/decomposition pointers */  | 
1375  |  | struct CmpEquivLevel { | 
1376  |  |     const UChar *start, *s, *limit;  | 
1377  |  | };  | 
1378  |  | typedef struct CmpEquivLevel CmpEquivLevel;  | 
1379  |  |  | 
1380  |  | /**  | 
1381  |  |  * Internal implementation code comparing string with case fold.  | 
1382  |  |  * This function is called from u_strcmpFold() and u_caseInsensitivePrefixMatch().  | 
1383  |  |  *  | 
1384  |  |  * @param s1            input string 1  | 
1385  |  |  * @param length1       length of string 1, or -1 (NULL terminated)  | 
1386  |  |  * @param s2            input string 2  | 
1387  |  |  * @param length2       length of string 2, or -1 (NULL terminated)  | 
1388  |  |  * @param options       compare options  | 
1389  |  |  * @param matchLen1     (output) length of partial prefix match in s1  | 
1390  |  |  * @param matchLen2     (output) length of partial prefix match in s2  | 
1391  |  |  * @param pErrorCode    receives error status  | 
1392  |  |  * @return The result of comparison  | 
1393  |  |  */  | 
1394  |  | static int32_t _cmpFold(  | 
1395  |  |             const UChar *s1, int32_t length1,  | 
1396  |  |             const UChar *s2, int32_t length2,  | 
1397  |  |             uint32_t options,  | 
1398  |  |             int32_t *matchLen1, int32_t *matchLen2,  | 
1399  | 0  |             UErrorCode *pErrorCode) { | 
1400  | 0  |     int32_t cmpRes = 0;  | 
1401  |  |  | 
1402  |  |     /* current-level start/limit - s1/s2 as current */  | 
1403  | 0  |     const UChar *start1, *start2, *limit1, *limit2;  | 
1404  |  |  | 
1405  |  |     /* points to the original start address */  | 
1406  | 0  |     const UChar *org1, *org2;  | 
1407  |  |  | 
1408  |  |     /* points to the end of match + 1 */  | 
1409  | 0  |     const UChar *m1, *m2;  | 
1410  |  |  | 
1411  |  |     /* case folding variables */  | 
1412  | 0  |     const UChar *p;  | 
1413  | 0  |     int32_t length;  | 
1414  |  |  | 
1415  |  |     /* stacks of previous-level start/current/limit */  | 
1416  | 0  |     CmpEquivLevel stack1[2], stack2[2];  | 
1417  |  |  | 
1418  |  |     /* case folding buffers, only use current-level start/limit */  | 
1419  | 0  |     UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];  | 
1420  |  |  | 
1421  |  |     /* track which is the current level per string */  | 
1422  | 0  |     int32_t level1, level2;  | 
1423  |  |  | 
1424  |  |     /* current code units, and code points for lookups */  | 
1425  | 0  |     UChar32 c1, c2, cp1, cp2;  | 
1426  |  |  | 
1427  |  |     /* no argument error checking because this itself is not an API */  | 
1428  |  |  | 
1429  |  |     /*  | 
1430  |  |      * assume that at least the option U_COMPARE_IGNORE_CASE is set  | 
1431  |  |      * otherwise this function would have to behave exactly as uprv_strCompare()  | 
1432  |  |      */  | 
1433  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
1434  | 0  |         return 0;  | 
1435  | 0  |     }  | 
1436  |  |  | 
1437  |  |     /* initialize */  | 
1438  | 0  |     if(matchLen1) { | 
1439  | 0  |         U_ASSERT(matchLen2 !=NULL);  | 
1440  | 0  |         *matchLen1=0;  | 
1441  | 0  |         *matchLen2=0;  | 
1442  | 0  |     }  | 
1443  |  | 
  | 
1444  | 0  |     start1=m1=org1=s1;  | 
1445  | 0  |     if(length1==-1) { | 
1446  | 0  |         limit1=NULL;  | 
1447  | 0  |     } else { | 
1448  | 0  |         limit1=s1+length1;  | 
1449  | 0  |     }  | 
1450  |  | 
  | 
1451  | 0  |     start2=m2=org2=s2;  | 
1452  | 0  |     if(length2==-1) { | 
1453  | 0  |         limit2=NULL;  | 
1454  | 0  |     } else { | 
1455  | 0  |         limit2=s2+length2;  | 
1456  | 0  |     }  | 
1457  |  | 
  | 
1458  | 0  |     level1=level2=0;  | 
1459  | 0  |     c1=c2=-1;  | 
1460  |  |  | 
1461  |  |     /* comparison loop */  | 
1462  | 0  |     for(;;) { | 
1463  |  |         /*  | 
1464  |  |          * here a code unit value of -1 means "get another code unit"  | 
1465  |  |          * below it will mean "this source is finished"  | 
1466  |  |          */  | 
1467  |  | 
  | 
1468  | 0  |         if(c1<0) { | 
1469  |  |             /* get next code unit from string 1, post-increment */  | 
1470  | 0  |             for(;;) { | 
1471  | 0  |                 if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) { | 
1472  | 0  |                     if(level1==0) { | 
1473  | 0  |                         c1=-1;  | 
1474  | 0  |                         break;  | 
1475  | 0  |                     }  | 
1476  | 0  |                 } else { | 
1477  | 0  |                     ++s1;  | 
1478  | 0  |                     break;  | 
1479  | 0  |                 }  | 
1480  |  |  | 
1481  |  |                 /* reached end of level buffer, pop one level */  | 
1482  | 0  |                 do { | 
1483  | 0  |                     --level1;  | 
1484  | 0  |                     start1=stack1[level1].start;    /*Not uninitialized*/  | 
1485  | 0  |                 } while(start1==NULL);  | 
1486  | 0  |                 s1=stack1[level1].s;                /*Not uninitialized*/  | 
1487  | 0  |                 limit1=stack1[level1].limit;        /*Not uninitialized*/  | 
1488  | 0  |             }  | 
1489  | 0  |         }  | 
1490  |  | 
  | 
1491  | 0  |         if(c2<0) { | 
1492  |  |             /* get next code unit from string 2, post-increment */  | 
1493  | 0  |             for(;;) { | 
1494  | 0  |                 if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) { | 
1495  | 0  |                     if(level2==0) { | 
1496  | 0  |                         c2=-1;  | 
1497  | 0  |                         break;  | 
1498  | 0  |                     }  | 
1499  | 0  |                 } else { | 
1500  | 0  |                     ++s2;  | 
1501  | 0  |                     break;  | 
1502  | 0  |                 }  | 
1503  |  |  | 
1504  |  |                 /* reached end of level buffer, pop one level */  | 
1505  | 0  |                 do { | 
1506  | 0  |                     --level2;  | 
1507  | 0  |                     start2=stack2[level2].start;    /*Not uninitialized*/  | 
1508  | 0  |                 } while(start2==NULL);  | 
1509  | 0  |                 s2=stack2[level2].s;                /*Not uninitialized*/  | 
1510  | 0  |                 limit2=stack2[level2].limit;        /*Not uninitialized*/  | 
1511  | 0  |             }  | 
1512  | 0  |         }  | 
1513  |  |  | 
1514  |  |         /*  | 
1515  |  |          * compare c1 and c2  | 
1516  |  |          * either variable c1, c2 is -1 only if the corresponding string is finished  | 
1517  |  |          */  | 
1518  | 0  |         if(c1==c2) { | 
1519  | 0  |             const UChar *next1, *next2;  | 
1520  |  | 
  | 
1521  | 0  |             if(c1<0) { | 
1522  | 0  |                 cmpRes=0;   /* c1==c2==-1 indicating end of strings */  | 
1523  | 0  |                 break;  | 
1524  | 0  |             }  | 
1525  |  |  | 
1526  |  |             /*  | 
1527  |  |              * Note: Move the match positions in both strings at the same time  | 
1528  |  |              *      only when corresponding code point(s) in the original strings  | 
1529  |  |              *      are fully consumed. For example, when comparing s1="Fust" and  | 
1530  |  |              *      s2="Fu\u00dfball", s2[2] is folded into "ss", and s1[2] matches  | 
1531  |  |              *      the first code point in the case-folded data. But the second "s"  | 
1532  |  |              *      has no matching code point in s1, so this implementation returns  | 
1533  |  |              *      2 as the prefix match length ("Fu"). | 
1534  |  |              */  | 
1535  | 0  |             next1=next2=NULL;  | 
1536  | 0  |             if(level1==0) { | 
1537  | 0  |                 next1=s1;  | 
1538  | 0  |             } else if(s1==limit1) { | 
1539  |  |                 /* Note: This implementation only use a single level of stack.  | 
1540  |  |                  *      If this code needs to be changed to use multiple levels  | 
1541  |  |                  *      of stacks, the code above should check if the current  | 
1542  |  |                  *      code is at the end of all stacks.  | 
1543  |  |                  */  | 
1544  | 0  |                 U_ASSERT(level1==1);  | 
1545  |  |  | 
1546  |  |                 /* is s1 at the end of the current stack? */  | 
1547  | 0  |                 next1=stack1[0].s;  | 
1548  | 0  |             }  | 
1549  |  | 
  | 
1550  | 0  |             if (next1!=NULL) { | 
1551  | 0  |                 if(level2==0) { | 
1552  | 0  |                     next2=s2;  | 
1553  | 0  |                 } else if(s2==limit2) { | 
1554  | 0  |                     U_ASSERT(level2==1);  | 
1555  |  |  | 
1556  |  |                     /* is s2 at the end of the current stack? */  | 
1557  | 0  |                     next2=stack2[0].s;  | 
1558  | 0  |                 }  | 
1559  | 0  |                 if(next2!=NULL) { | 
1560  | 0  |                     m1=next1;  | 
1561  | 0  |                     m2=next2;  | 
1562  | 0  |                 }  | 
1563  | 0  |             }  | 
1564  | 0  |             c1=c2=-1;       /* make us fetch new code units */  | 
1565  | 0  |             continue;  | 
1566  | 0  |         } else if(c1<0) { | 
1567  | 0  |             cmpRes=-1;      /* string 1 ends before string 2 */  | 
1568  | 0  |             break;  | 
1569  | 0  |         } else if(c2<0) { | 
1570  | 0  |             cmpRes=1;       /* string 2 ends before string 1 */  | 
1571  | 0  |             break;  | 
1572  | 0  |         }  | 
1573  |  |         /* c1!=c2 && c1>=0 && c2>=0 */  | 
1574  |  |  | 
1575  |  |         /* get complete code points for c1, c2 for lookups if either is a surrogate */  | 
1576  | 0  |         cp1=c1;  | 
1577  | 0  |         if(U_IS_SURROGATE(c1)) { | 
1578  | 0  |             UChar c;  | 
1579  |  | 
  | 
1580  | 0  |             if(U_IS_SURROGATE_LEAD(c1)) { | 
1581  | 0  |                 if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) { | 
1582  |  |                     /* advance ++s1; only below if cp1 decomposes/case-folds */  | 
1583  | 0  |                     cp1=U16_GET_SUPPLEMENTARY(c1, c);  | 
1584  | 0  |                 }  | 
1585  | 0  |             } else /* isTrail(c1) */ { | 
1586  | 0  |                 if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) { | 
1587  | 0  |                     cp1=U16_GET_SUPPLEMENTARY(c, c1);  | 
1588  | 0  |                 }  | 
1589  | 0  |             }  | 
1590  | 0  |         }  | 
1591  |  | 
  | 
1592  | 0  |         cp2=c2;  | 
1593  | 0  |         if(U_IS_SURROGATE(c2)) { | 
1594  | 0  |             UChar c;  | 
1595  |  | 
  | 
1596  | 0  |             if(U_IS_SURROGATE_LEAD(c2)) { | 
1597  | 0  |                 if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) { | 
1598  |  |                     /* advance ++s2; only below if cp2 decomposes/case-folds */  | 
1599  | 0  |                     cp2=U16_GET_SUPPLEMENTARY(c2, c);  | 
1600  | 0  |                 }  | 
1601  | 0  |             } else /* isTrail(c2) */ { | 
1602  | 0  |                 if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) { | 
1603  | 0  |                     cp2=U16_GET_SUPPLEMENTARY(c, c2);  | 
1604  | 0  |                 }  | 
1605  | 0  |             }  | 
1606  | 0  |         }  | 
1607  |  |  | 
1608  |  |         /*  | 
1609  |  |          * go down one level for each string  | 
1610  |  |          * continue with the main loop as soon as there is a real change  | 
1611  |  |          */  | 
1612  |  | 
  | 
1613  | 0  |         if( level1==0 &&  | 
1614  | 0  |             (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0  | 
1615  | 0  |         ) { | 
1616  |  |             /* cp1 case-folds to the code point "length" or to p[length] */  | 
1617  | 0  |             if(U_IS_SURROGATE(c1)) { | 
1618  | 0  |                 if(U_IS_SURROGATE_LEAD(c1)) { | 
1619  |  |                     /* advance beyond source surrogate pair if it case-folds */  | 
1620  | 0  |                     ++s1;  | 
1621  | 0  |                 } else /* isTrail(c1) */ { | 
1622  |  |                     /*  | 
1623  |  |                      * we got a supplementary code point when hitting its trail surrogate,  | 
1624  |  |                      * therefore the lead surrogate must have been the same as in the other string;  | 
1625  |  |                      * compare this decomposition with the lead surrogate in the other string  | 
1626  |  |                      * remember that this simulates bulk text replacement:  | 
1627  |  |                      * the decomposition would replace the entire code point  | 
1628  |  |                      */  | 
1629  | 0  |                     --s2;  | 
1630  | 0  |                     --m2;  | 
1631  | 0  |                     c2=*(s2-1);  | 
1632  | 0  |                 }  | 
1633  | 0  |             }  | 
1634  |  |  | 
1635  |  |             /* push current level pointers */  | 
1636  | 0  |             stack1[0].start=start1;  | 
1637  | 0  |             stack1[0].s=s1;  | 
1638  | 0  |             stack1[0].limit=limit1;  | 
1639  | 0  |             ++level1;  | 
1640  |  |  | 
1641  |  |             /* copy the folding result to fold1[] */  | 
1642  | 0  |             if(length<=UCASE_MAX_STRING_LENGTH) { | 
1643  | 0  |                 u_memcpy(fold1, p, length);  | 
1644  | 0  |             } else { | 
1645  | 0  |                 int32_t i=0;  | 
1646  | 0  |                 U16_APPEND_UNSAFE(fold1, i, length);  | 
1647  | 0  |                 length=i;  | 
1648  | 0  |             }  | 
1649  |  |  | 
1650  |  |             /* set next level pointers to case folding */  | 
1651  | 0  |             start1=s1=fold1;  | 
1652  | 0  |             limit1=fold1+length;  | 
1653  |  |  | 
1654  |  |             /* get ready to read from decomposition, continue with loop */  | 
1655  | 0  |             c1=-1;  | 
1656  | 0  |             continue;  | 
1657  | 0  |         }  | 
1658  |  |  | 
1659  | 0  |         if( level2==0 &&  | 
1660  | 0  |             (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0  | 
1661  | 0  |         ) { | 
1662  |  |             /* cp2 case-folds to the code point "length" or to p[length] */  | 
1663  | 0  |             if(U_IS_SURROGATE(c2)) { | 
1664  | 0  |                 if(U_IS_SURROGATE_LEAD(c2)) { | 
1665  |  |                     /* advance beyond source surrogate pair if it case-folds */  | 
1666  | 0  |                     ++s2;  | 
1667  | 0  |                 } else /* isTrail(c2) */ { | 
1668  |  |                     /*  | 
1669  |  |                      * we got a supplementary code point when hitting its trail surrogate,  | 
1670  |  |                      * therefore the lead surrogate must have been the same as in the other string;  | 
1671  |  |                      * compare this decomposition with the lead surrogate in the other string  | 
1672  |  |                      * remember that this simulates bulk text replacement:  | 
1673  |  |                      * the decomposition would replace the entire code point  | 
1674  |  |                      */  | 
1675  | 0  |                     --s1;  | 
1676  | 0  |                     --m2;  | 
1677  | 0  |                     c1=*(s1-1);  | 
1678  | 0  |                 }  | 
1679  | 0  |             }  | 
1680  |  |  | 
1681  |  |             /* push current level pointers */  | 
1682  | 0  |             stack2[0].start=start2;  | 
1683  | 0  |             stack2[0].s=s2;  | 
1684  | 0  |             stack2[0].limit=limit2;  | 
1685  | 0  |             ++level2;  | 
1686  |  |  | 
1687  |  |             /* copy the folding result to fold2[] */  | 
1688  | 0  |             if(length<=UCASE_MAX_STRING_LENGTH) { | 
1689  | 0  |                 u_memcpy(fold2, p, length);  | 
1690  | 0  |             } else { | 
1691  | 0  |                 int32_t i=0;  | 
1692  | 0  |                 U16_APPEND_UNSAFE(fold2, i, length);  | 
1693  | 0  |                 length=i;  | 
1694  | 0  |             }  | 
1695  |  |  | 
1696  |  |             /* set next level pointers to case folding */  | 
1697  | 0  |             start2=s2=fold2;  | 
1698  | 0  |             limit2=fold2+length;  | 
1699  |  |  | 
1700  |  |             /* get ready to read from decomposition, continue with loop */  | 
1701  | 0  |             c2=-1;  | 
1702  | 0  |             continue;  | 
1703  | 0  |         }  | 
1704  |  |  | 
1705  |  |         /*  | 
1706  |  |          * no decomposition/case folding, max level for both sides:  | 
1707  |  |          * return difference result  | 
1708  |  |          *  | 
1709  |  |          * code point order comparison must not just return cp1-cp2  | 
1710  |  |          * because when single surrogates are present then the surrogate pairs  | 
1711  |  |          * that formed cp1 and cp2 may be from different string indexes  | 
1712  |  |          *  | 
1713  |  |          * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units | 
1714  |  |          * c1=d800 cp1=10001 c2=dc00 cp2=10000  | 
1715  |  |          * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 } | 
1716  |  |          *  | 
1717  |  |          * therefore, use same fix-up as in ustring.c/uprv_strCompare()  | 
1718  |  |          * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++  | 
1719  |  |          * so we have slightly different pointer/start/limit comparisons here  | 
1720  |  |          */  | 
1721  |  |  | 
1722  | 0  |         if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) { | 
1723  |  |             /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */  | 
1724  | 0  |             if(  | 
1725  | 0  |                 (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||  | 
1726  | 0  |                 (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))  | 
1727  | 0  |             ) { | 
1728  |  |                 /* part of a surrogate pair, leave >=d800 */  | 
1729  | 0  |             } else { | 
1730  |  |                 /* BMP code point - may be surrogate code point - make <d800 */  | 
1731  | 0  |                 c1-=0x2800;  | 
1732  | 0  |             }  | 
1733  |  | 
  | 
1734  | 0  |             if(  | 
1735  | 0  |                 (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||  | 
1736  | 0  |                 (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))  | 
1737  | 0  |             ) { | 
1738  |  |                 /* part of a surrogate pair, leave >=d800 */  | 
1739  | 0  |             } else { | 
1740  |  |                 /* BMP code point - may be surrogate code point - make <d800 */  | 
1741  | 0  |                 c2-=0x2800;  | 
1742  | 0  |             }  | 
1743  | 0  |         }  | 
1744  |  | 
  | 
1745  | 0  |         cmpRes=c1-c2;  | 
1746  | 0  |         break;  | 
1747  | 0  |     }  | 
1748  |  | 
  | 
1749  | 0  |     if(matchLen1) { | 
1750  | 0  |         *matchLen1=static_cast<int32_t>(m1-org1);  | 
1751  | 0  |         *matchLen2=static_cast<int32_t>(m2-org2);  | 
1752  | 0  |     }  | 
1753  | 0  |     return cmpRes;  | 
1754  | 0  | }  | 
1755  |  |  | 
1756  |  | /* internal function */  | 
1757  |  | U_CFUNC int32_t  | 
1758  |  | u_strcmpFold(const UChar *s1, int32_t length1,  | 
1759  |  |              const UChar *s2, int32_t length2,  | 
1760  |  |              uint32_t options,  | 
1761  | 0  |              UErrorCode *pErrorCode) { | 
1762  | 0  |     return _cmpFold(s1, length1, s2, length2, options, NULL, NULL, pErrorCode);  | 
1763  | 0  | }  | 
1764  |  |  | 
1765  |  | /* public API functions */  | 
1766  |  |  | 
1767  |  | U_CAPI int32_t U_EXPORT2  | 
1768  |  | u_strCaseCompare(const UChar *s1, int32_t length1,  | 
1769  |  |                  const UChar *s2, int32_t length2,  | 
1770  |  |                  uint32_t options,  | 
1771  | 0  |                  UErrorCode *pErrorCode) { | 
1772  |  |     /* argument checking */  | 
1773  | 0  |     if(pErrorCode==0 || U_FAILURE(*pErrorCode)) { | 
1774  | 0  |         return 0;  | 
1775  | 0  |     }  | 
1776  | 0  |     if(s1==NULL || length1<-1 || s2==NULL || length2<-1) { | 
1777  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1778  | 0  |         return 0;  | 
1779  | 0  |     }  | 
1780  | 0  |     return u_strcmpFold(s1, length1, s2, length2,  | 
1781  | 0  |                         options|U_COMPARE_IGNORE_CASE,  | 
1782  | 0  |                         pErrorCode);  | 
1783  | 0  | }  | 
1784  |  |  | 
1785  |  | U_CAPI int32_t U_EXPORT2  | 
1786  | 0  | u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options) { | 
1787  | 0  |     UErrorCode errorCode=U_ZERO_ERROR;  | 
1788  | 0  |     return u_strcmpFold(s1, -1, s2, -1,  | 
1789  | 0  |                         options|U_COMPARE_IGNORE_CASE,  | 
1790  | 0  |                         &errorCode);  | 
1791  | 0  | }  | 
1792  |  |  | 
1793  |  | U_CAPI int32_t U_EXPORT2  | 
1794  | 0  | u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options) { | 
1795  | 0  |     UErrorCode errorCode=U_ZERO_ERROR;  | 
1796  | 0  |     return u_strcmpFold(s1, length, s2, length,  | 
1797  | 0  |                         options|U_COMPARE_IGNORE_CASE,  | 
1798  | 0  |                         &errorCode);  | 
1799  | 0  | }  | 
1800  |  |  | 
1801  |  | U_CAPI int32_t U_EXPORT2  | 
1802  | 0  | u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options) { | 
1803  | 0  |     UErrorCode errorCode=U_ZERO_ERROR;  | 
1804  | 0  |     return u_strcmpFold(s1, n, s2, n,  | 
1805  | 0  |                         options|(U_COMPARE_IGNORE_CASE|_STRNCMP_STYLE),  | 
1806  | 0  |                         &errorCode);  | 
1807  | 0  | }  | 
1808  |  |  | 
1809  |  | /* internal API - detect length of shared prefix */  | 
1810  |  | U_CAPI void  | 
1811  |  | u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,  | 
1812  |  |                              const UChar *s2, int32_t length2,  | 
1813  |  |                              uint32_t options,  | 
1814  |  |                              int32_t *matchLen1, int32_t *matchLen2,  | 
1815  | 0  |                              UErrorCode *pErrorCode) { | 
1816  | 0  |     _cmpFold(s1, length1, s2, length2, options,  | 
1817  | 0  |         matchLen1, matchLen2, pErrorCode);  | 
1818  | 0  | }  |