/src/icu/source/common/ucasemap_imp.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2017 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | // ucasemap_imp.h  | 
5  |  | // created: 2017feb08 Markus W. Scherer  | 
6  |  |  | 
7  |  | #ifndef __UCASEMAP_IMP_H__  | 
8  |  | #define __UCASEMAP_IMP_H__  | 
9  |  |  | 
10  |  | #include "unicode/utypes.h"  | 
11  |  | #include "unicode/ucasemap.h"  | 
12  |  | #include "unicode/uchar.h"  | 
13  |  | #include "ucase.h"  | 
14  |  |  | 
15  |  | /**  | 
16  |  |  * Bit mask for the titlecasing iterator options bit field.  | 
17  |  |  * Currently only 3 out of 8 values are used:  | 
18  |  |  * 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.  | 
19  |  |  * See stringoptions.h.  | 
20  |  |  * @internal  | 
21  |  |  */  | 
22  | 0  | #define U_TITLECASE_ITERATOR_MASK 0xe0  | 
23  |  |  | 
24  |  | /**  | 
25  |  |  * Bit mask for the titlecasing index adjustment options bit set.  | 
26  |  |  * Currently two bits are defined:  | 
27  |  |  * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED.  | 
28  |  |  * See stringoptions.h.  | 
29  |  |  * @internal  | 
30  |  |  */  | 
31  | 0  | #define U_TITLECASE_ADJUSTMENT_MASK 0x600  | 
32  |  |  | 
33  |  | /**  | 
34  |  |  * Internal API, used by u_strcasecmp() etc.  | 
35  |  |  * Compare strings case-insensitively,  | 
36  |  |  * in code point order or code unit order.  | 
37  |  |  */  | 
38  |  | U_CFUNC int32_t  | 
39  |  | u_strcmpFold(const UChar *s1, int32_t length1,  | 
40  |  |              const UChar *s2, int32_t length2,  | 
41  |  |              uint32_t options,  | 
42  |  |              UErrorCode *pErrorCode);  | 
43  |  |  | 
44  |  | /**  | 
45  |  |  * Internal API, used for detecting length of  | 
46  |  |  * shared prefix case-insensitively.  | 
47  |  |  * @param s1            input string 1  | 
48  |  |  * @param length1       length of string 1, or -1 (NULL terminated)  | 
49  |  |  * @param s2            input string 2  | 
50  |  |  * @param length2       length of string 2, or -1 (NULL terminated)  | 
51  |  |  * @param options       compare options  | 
52  |  |  * @param matchLen1     (output) length of partial prefix match in s1  | 
53  |  |  * @param matchLen2     (output) length of partial prefix match in s2  | 
54  |  |  * @param pErrorCode    receives error status  | 
55  |  |  */  | 
56  |  | U_CAPI void  | 
57  |  | u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,  | 
58  |  |                              const UChar *s2, int32_t length2,  | 
59  |  |                              uint32_t options,  | 
60  |  |                              int32_t *matchLen1, int32_t *matchLen2,  | 
61  |  |                              UErrorCode *pErrorCode);  | 
62  |  |  | 
63  |  | #ifdef __cplusplus  | 
64  |  |  | 
65  |  | U_NAMESPACE_BEGIN  | 
66  |  |  | 
67  |  | class BreakIterator;        // unicode/brkiter.h  | 
68  |  | class ByteSink;  | 
69  |  | class Locale;               // unicode/locid.h  | 
70  |  |  | 
71  |  | /** Returns true if the options are valid. Otherwise false, and sets an error. */  | 
72  | 0  | inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) { | 
73  | 0  |     if (U_FAILURE(errorCode)) { return false; } | 
74  | 0  |     if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) { | 
75  |  |         // Both options together.  | 
76  | 0  |         errorCode = U_ILLEGAL_ARGUMENT_ERROR;  | 
77  | 0  |         return false;  | 
78  | 0  |     }  | 
79  | 0  |     return true;  | 
80  | 0  | }  | 
81  |  |  | 
82  | 0  | inline UBool ustrcase_isLNS(UChar32 c) { | 
83  |  |     // Letter, number, symbol,  | 
84  |  |     // or a private use code point because those are typically used as letters or numbers.  | 
85  |  |     // Consider modifier letters only if they are cased.  | 
86  | 0  |     const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK;  | 
87  | 0  |     int gc = u_charType(c);  | 
88  | 0  |     return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE);  | 
89  | 0  | }  | 
90  |  |  | 
91  |  | #if !UCONFIG_NO_BREAK_ITERATION  | 
92  |  |  | 
93  |  | /** Returns nullptr if error. Pass in either locale or locID, not both. */  | 
94  |  | U_CFUNC  | 
95  |  | BreakIterator *ustrcase_getTitleBreakIterator(  | 
96  |  |         const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,  | 
97  |  |         LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode);  | 
98  |  |  | 
99  |  | #endif  | 
100  |  |  | 
101  |  | U_NAMESPACE_END  | 
102  |  |  | 
103  |  | #include "unicode/unistr.h"  // for UStringCaseMapper  | 
104  |  |  | 
105  |  | /*  | 
106  |  |  * Internal string casing functions implementing  | 
107  |  |  * ustring.h/ustrcase.cpp and UnicodeString case mapping functions.  | 
108  |  |  */  | 
109  |  |  | 
110  |  | struct UCaseMap : public icu::UMemory { | 
111  |  |     /** Implements most of ucasemap_open(). */  | 
112  |  |     UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);  | 
113  |  |     ~UCaseMap();  | 
114  |  |  | 
115  |  | #if !UCONFIG_NO_BREAK_ITERATION  | 
116  |  |     icu::BreakIterator *iter;  /* We adopt the iterator, so we own it. */  | 
117  |  | #endif  | 
118  |  |     char locale[32];  | 
119  |  |     int32_t caseLocale;  | 
120  |  |     uint32_t options;  | 
121  |  | };  | 
122  |  |  | 
123  |  | #if UCONFIG_NO_BREAK_ITERATION  | 
124  |  | #   define UCASEMAP_BREAK_ITERATOR_PARAM  | 
125  |  | #   define UCASEMAP_BREAK_ITERATOR_UNUSED  | 
126  |  | #   define UCASEMAP_BREAK_ITERATOR  | 
127  |  | #   define UCASEMAP_BREAK_ITERATOR_NULL  | 
128  |  | #else  | 
129  |  | #   define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,  | 
130  |  | #   define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,  | 
131  | 0  | #   define UCASEMAP_BREAK_ITERATOR iter,  | 
132  | 0  | #   define UCASEMAP_BREAK_ITERATOR_NULL NULL,  | 
133  |  | #endif  | 
134  |  |  | 
135  |  | U_CFUNC int32_t  | 
136  |  | ustrcase_getCaseLocale(const char *locale);  | 
137  |  |  | 
138  |  | // TODO: swap src / dest if approved for new public api  | 
139  |  | /** Implements UStringCaseMapper. */  | 
140  |  | U_CFUNC int32_t U_CALLCONV  | 
141  |  | ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM  | 
142  |  |                          UChar *dest, int32_t destCapacity,  | 
143  |  |                          const UChar *src, int32_t srcLength,  | 
144  |  |                          icu::Edits *edits,  | 
145  |  |                          UErrorCode &errorCode);  | 
146  |  |  | 
147  |  | /** Implements UStringCaseMapper. */  | 
148  |  | U_CFUNC int32_t U_CALLCONV  | 
149  |  | ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM  | 
150  |  |                          UChar *dest, int32_t destCapacity,  | 
151  |  |                          const UChar *src, int32_t srcLength,  | 
152  |  |                          icu::Edits *edits,  | 
153  |  |                          UErrorCode &errorCode);  | 
154  |  |  | 
155  |  | #if !UCONFIG_NO_BREAK_ITERATION  | 
156  |  |  | 
157  |  | /** Implements UStringCaseMapper. */  | 
158  |  | U_CFUNC int32_t U_CALLCONV  | 
159  |  | ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,  | 
160  |  |                          icu::BreakIterator *iter,  | 
161  |  |                          UChar *dest, int32_t destCapacity,  | 
162  |  |                          const UChar *src, int32_t srcLength,  | 
163  |  |                          icu::Edits *edits,  | 
164  |  |                          UErrorCode &errorCode);  | 
165  |  |  | 
166  |  | #endif  | 
167  |  |  | 
168  |  | /** Implements UStringCaseMapper. */  | 
169  |  | U_CFUNC int32_t U_CALLCONV  | 
170  |  | ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM  | 
171  |  |                       UChar *dest, int32_t destCapacity,  | 
172  |  |                       const UChar *src, int32_t srcLength,  | 
173  |  |                       icu::Edits *edits,  | 
174  |  |                       UErrorCode &errorCode);  | 
175  |  |  | 
176  |  | /**  | 
177  |  |  * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().  | 
178  |  |  * Implements argument checking.  | 
179  |  |  */  | 
180  |  | U_CFUNC int32_t  | 
181  |  | ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM  | 
182  |  |              UChar *dest, int32_t destCapacity,  | 
183  |  |              const UChar *src, int32_t srcLength,  | 
184  |  |              UStringCaseMapper *stringCaseMapper,  | 
185  |  |              icu::Edits *edits,  | 
186  |  |              UErrorCode &errorCode);  | 
187  |  |  | 
188  |  | /**  | 
189  |  |  * Common string case mapping implementation for old-fashioned u_strToXyz() functions  | 
190  |  |  * that allow the source string to overlap the destination buffer.  | 
191  |  |  * Implements argument checking and internally works with an intermediate buffer if necessary.  | 
192  |  |  */  | 
193  |  | U_CFUNC int32_t  | 
194  |  | ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM  | 
195  |  |                         UChar *dest, int32_t destCapacity,  | 
196  |  |                         const UChar *src, int32_t srcLength,  | 
197  |  |                         UStringCaseMapper *stringCaseMapper,  | 
198  |  |                         UErrorCode &errorCode);  | 
199  |  |  | 
200  |  | /**  | 
201  |  |  * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().  | 
202  |  |  * UTF-8 version of UStringCaseMapper.  | 
203  |  |  * All error checking must be done.  | 
204  |  |  * The UCaseMap must be fully initialized, with locale and/or iter set as needed.  | 
205  |  |  */  | 
206  |  | typedef void U_CALLCONV  | 
207  |  | UTF8CaseMapper(int32_t caseLocale, uint32_t options,  | 
208  |  | #if !UCONFIG_NO_BREAK_ITERATION  | 
209  |  |                icu::BreakIterator *iter,  | 
210  |  | #endif  | 
211  |  |                const uint8_t *src, int32_t srcLength,  | 
212  |  |                icu::ByteSink &sink, icu::Edits *edits,  | 
213  |  |                UErrorCode &errorCode);  | 
214  |  |  | 
215  |  | #if !UCONFIG_NO_BREAK_ITERATION  | 
216  |  |  | 
217  |  | /** Implements UTF8CaseMapper. */  | 
218  |  | U_CFUNC void U_CALLCONV  | 
219  |  | ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,  | 
220  |  |         icu::BreakIterator *iter,  | 
221  |  |         const uint8_t *src, int32_t srcLength,  | 
222  |  |         icu::ByteSink &sink, icu::Edits *edits,  | 
223  |  |         UErrorCode &errorCode);  | 
224  |  |  | 
225  |  | #endif  | 
226  |  |  | 
227  |  | void  | 
228  |  | ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM  | 
229  |  |                  const char *src, int32_t srcLength,  | 
230  |  |                  UTF8CaseMapper *stringCaseMapper,  | 
231  |  |                  icu::ByteSink &sink, icu::Edits *edits,  | 
232  |  |                  UErrorCode &errorCode);  | 
233  |  |  | 
234  |  | /**  | 
235  |  |  * Implements argument checking and buffer handling  | 
236  |  |  * for UTF-8 string case mapping as a common function.  | 
237  |  |  */  | 
238  |  | int32_t  | 
239  |  | ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM  | 
240  |  |                  char *dest, int32_t destCapacity,  | 
241  |  |                  const char *src, int32_t srcLength,  | 
242  |  |                  UTF8CaseMapper *stringCaseMapper,  | 
243  |  |                  icu::Edits *edits,  | 
244  |  |                  UErrorCode &errorCode);  | 
245  |  |  | 
246  |  | U_NAMESPACE_BEGIN  | 
247  |  | namespace GreekUpper { | 
248  |  |  | 
249  |  | // Data bits.  | 
250  |  | static const uint32_t UPPER_MASK = 0x3ff;  | 
251  |  | static const uint32_t HAS_VOWEL = 0x1000;  | 
252  |  | static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;  | 
253  |  | static const uint32_t HAS_ACCENT = 0x4000;  | 
254  |  | static const uint32_t HAS_DIALYTIKA = 0x8000;  | 
255  |  | // Further bits during data building and processing, not stored in the data map.  | 
256  |  | static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;  | 
257  |  | static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;  | 
258  |  |  | 
259  |  | static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;  | 
260  |  | static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =  | 
261  |  |         HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;  | 
262  |  | static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;  | 
263  |  |  | 
264  |  | // State bits.  | 
265  |  | static const uint32_t AFTER_CASED = 1;  | 
266  |  | static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;  | 
267  |  |  | 
268  |  | uint32_t getLetterData(UChar32 c);  | 
269  |  |  | 
270  |  | /**  | 
271  |  |  * Returns a non-zero value for each of the Greek combining diacritics  | 
272  |  |  * listed in The Unicode Standard, version 8, chapter 7.2 Greek,  | 
273  |  |  * plus some perispomeni look-alikes.  | 
274  |  |  */  | 
275  |  | uint32_t getDiacriticData(UChar32 c);  | 
276  |  |  | 
277  |  | }  // namespace GreekUpper  | 
278  |  | U_NAMESPACE_END  | 
279  |  |  | 
280  |  | #endif  // __cplusplus  | 
281  |  |  | 
282  |  | #endif  // __UCASEMAP_IMP_H__  |