Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/ucasemap_imp.h
Line
Count
Source (jump to first uncovered line)
1
// © 2017 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// ucasemap_imp.h
5
// created: 2017feb08 Markus W. Scherer
6
7
#ifndef __UCASEMAP_IMP_H__
8
#define __UCASEMAP_IMP_H__
9
10
#include "unicode/utypes.h"
11
#include "unicode/ucasemap.h"
12
#include "unicode/uchar.h"
13
#include "ucase.h"
14
15
/**
16
 * Bit mask for the titlecasing iterator options bit field.
17
 * Currently only 3 out of 8 values are used:
18
 * 0 (words), U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
19
 * See stringoptions.h.
20
 * @internal
21
 */
22
#define U_TITLECASE_ITERATOR_MASK 0xe0
23
24
/**
25
 * Bit mask for the titlecasing index adjustment options bit set.
26
 * Currently two bits are defined:
27
 * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED.
28
 * See stringoptions.h.
29
 * @internal
30
 */
31
0
#define U_TITLECASE_ADJUSTMENT_MASK 0x600
32
33
/**
34
 * Internal API, used by u_strcasecmp() etc.
35
 * Compare strings case-insensitively,
36
 * in code point order or code unit order.
37
 */
38
U_CFUNC int32_t
39
u_strcmpFold(const UChar *s1, int32_t length1,
40
             const UChar *s2, int32_t length2,
41
             uint32_t options,
42
             UErrorCode *pErrorCode);
43
44
/**
45
 * Internal API, used for detecting length of
46
 * shared prefix case-insensitively.
47
 * @param s1            input string 1
48
 * @param length1       length of string 1, or -1 (NULL terminated)
49
 * @param s2            input string 2
50
 * @param length2       length of string 2, or -1 (NULL terminated)
51
 * @param options       compare options
52
 * @param matchLen1     (output) length of partial prefix match in s1
53
 * @param matchLen2     (output) length of partial prefix match in s2
54
 * @param pErrorCode    receives error status
55
 */
56
U_CAPI void
57
u_caseInsensitivePrefixMatch(const UChar *s1, int32_t length1,
58
                             const UChar *s2, int32_t length2,
59
                             uint32_t options,
60
                             int32_t *matchLen1, int32_t *matchLen2,
61
                             UErrorCode *pErrorCode);
62
63
#ifdef __cplusplus
64
65
U_NAMESPACE_BEGIN
66
67
class BreakIterator;        // unicode/brkiter.h
68
class ByteSink;
69
class Locale;               // unicode/locid.h
70
71
/** Returns true if the options are valid. Otherwise false, and sets an error. */
72
0
inline UBool ustrcase_checkTitleAdjustmentOptions(uint32_t options, UErrorCode &errorCode) {
73
0
    if (U_FAILURE(errorCode)) { return false; }
74
0
    if ((options & U_TITLECASE_ADJUSTMENT_MASK) == U_TITLECASE_ADJUSTMENT_MASK) {
75
        // Both options together.
76
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
77
0
        return false;
78
0
    }
79
0
    return true;
80
0
}
81
82
0
inline UBool ustrcase_isLNS(UChar32 c) {
83
    // Letter, number, symbol,
84
    // or a private use code point because those are typically used as letters or numbers.
85
    // Consider modifier letters only if they are cased.
86
0
    const uint32_t LNS = (U_GC_L_MASK|U_GC_N_MASK|U_GC_S_MASK|U_GC_CO_MASK) & ~U_GC_LM_MASK;
87
0
    int gc = u_charType(c);
88
0
    return (U_MASK(gc) & LNS) != 0 || (gc == U_MODIFIER_LETTER && ucase_getType(c) != UCASE_NONE);
89
0
}
90
91
#if !UCONFIG_NO_BREAK_ITERATION
92
93
/** Returns nullptr if error. Pass in either locale or locID, not both. */
94
U_CFUNC
95
BreakIterator *ustrcase_getTitleBreakIterator(
96
        const Locale *locale, const char *locID, uint32_t options, BreakIterator *iter,
97
        LocalPointer<BreakIterator> &ownedIter, UErrorCode &errorCode);
98
99
#endif
100
101
U_NAMESPACE_END
102
103
#include "unicode/unistr.h"  // for UStringCaseMapper
104
105
/*
106
 * Internal string casing functions implementing
107
 * ustring.h/ustrcase.cpp and UnicodeString case mapping functions.
108
 */
109
110
struct UCaseMap : public icu::UMemory {
111
    /** Implements most of ucasemap_open(). */
112
    UCaseMap(const char *localeID, uint32_t opts, UErrorCode *pErrorCode);
113
    ~UCaseMap();
114
115
#if !UCONFIG_NO_BREAK_ITERATION
116
    icu::BreakIterator *iter;  /* We adopt the iterator, so we own it. */
117
#endif
118
    char locale[32];
119
    int32_t caseLocale;
120
    uint32_t options;
121
};
122
123
#if UCONFIG_NO_BREAK_ITERATION
124
#   define UCASEMAP_BREAK_ITERATOR_PARAM
125
#   define UCASEMAP_BREAK_ITERATOR_UNUSED
126
#   define UCASEMAP_BREAK_ITERATOR
127
#   define UCASEMAP_BREAK_ITERATOR_NULL
128
#else
129
#   define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
130
#   define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
131
0
#   define UCASEMAP_BREAK_ITERATOR iter,
132
0
#   define UCASEMAP_BREAK_ITERATOR_NULL NULL,
133
#endif
134
135
U_CFUNC int32_t
136
ustrcase_getCaseLocale(const char *locale);
137
138
// TODO: swap src / dest if approved for new public api
139
/** Implements UStringCaseMapper. */
140
U_CFUNC int32_t U_CALLCONV
141
ustrcase_internalToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
142
                         UChar *dest, int32_t destCapacity,
143
                         const UChar *src, int32_t srcLength,
144
                         icu::Edits *edits,
145
                         UErrorCode &errorCode);
146
147
/** Implements UStringCaseMapper. */
148
U_CFUNC int32_t U_CALLCONV
149
ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
150
                         UChar *dest, int32_t destCapacity,
151
                         const UChar *src, int32_t srcLength,
152
                         icu::Edits *edits,
153
                         UErrorCode &errorCode);
154
155
#if !UCONFIG_NO_BREAK_ITERATION
156
157
/** Implements UStringCaseMapper. */
158
U_CFUNC int32_t U_CALLCONV
159
ustrcase_internalToTitle(int32_t caseLocale, uint32_t options,
160
                         icu::BreakIterator *iter,
161
                         UChar *dest, int32_t destCapacity,
162
                         const UChar *src, int32_t srcLength,
163
                         icu::Edits *edits,
164
                         UErrorCode &errorCode);
165
166
#endif
167
168
/** Implements UStringCaseMapper. */
169
U_CFUNC int32_t U_CALLCONV
170
ustrcase_internalFold(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
171
                      UChar *dest, int32_t destCapacity,
172
                      const UChar *src, int32_t srcLength,
173
                      icu::Edits *edits,
174
                      UErrorCode &errorCode);
175
176
/**
177
 * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
178
 * Implements argument checking.
179
 */
180
U_CFUNC int32_t
181
ustrcase_map(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
182
             UChar *dest, int32_t destCapacity,
183
             const UChar *src, int32_t srcLength,
184
             UStringCaseMapper *stringCaseMapper,
185
             icu::Edits *edits,
186
             UErrorCode &errorCode);
187
188
/**
189
 * Common string case mapping implementation for old-fashioned u_strToXyz() functions
190
 * that allow the source string to overlap the destination buffer.
191
 * Implements argument checking and internally works with an intermediate buffer if necessary.
192
 */
193
U_CFUNC int32_t
194
ustrcase_mapWithOverlap(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
195
                        UChar *dest, int32_t destCapacity,
196
                        const UChar *src, int32_t srcLength,
197
                        UStringCaseMapper *stringCaseMapper,
198
                        UErrorCode &errorCode);
199
200
/**
201
 * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
202
 * UTF-8 version of UStringCaseMapper.
203
 * All error checking must be done.
204
 * The UCaseMap must be fully initialized, with locale and/or iter set as needed.
205
 */
206
typedef void U_CALLCONV
207
UTF8CaseMapper(int32_t caseLocale, uint32_t options,
208
#if !UCONFIG_NO_BREAK_ITERATION
209
               icu::BreakIterator *iter,
210
#endif
211
               const uint8_t *src, int32_t srcLength,
212
               icu::ByteSink &sink, icu::Edits *edits,
213
               UErrorCode &errorCode);
214
215
#if !UCONFIG_NO_BREAK_ITERATION
216
217
/** Implements UTF8CaseMapper. */
218
U_CFUNC void U_CALLCONV
219
ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
220
        icu::BreakIterator *iter,
221
        const uint8_t *src, int32_t srcLength,
222
        icu::ByteSink &sink, icu::Edits *edits,
223
        UErrorCode &errorCode);
224
225
#endif
226
227
void
228
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
229
                 const char *src, int32_t srcLength,
230
                 UTF8CaseMapper *stringCaseMapper,
231
                 icu::ByteSink &sink, icu::Edits *edits,
232
                 UErrorCode &errorCode);
233
234
/**
235
 * Implements argument checking and buffer handling
236
 * for UTF-8 string case mapping as a common function.
237
 */
238
int32_t
239
ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_PARAM
240
                 char *dest, int32_t destCapacity,
241
                 const char *src, int32_t srcLength,
242
                 UTF8CaseMapper *stringCaseMapper,
243
                 icu::Edits *edits,
244
                 UErrorCode &errorCode);
245
246
U_NAMESPACE_BEGIN
247
namespace GreekUpper {
248
249
// Data bits.
250
static const uint32_t UPPER_MASK = 0x3ff;
251
static const uint32_t HAS_VOWEL = 0x1000;
252
static const uint32_t HAS_YPOGEGRAMMENI = 0x2000;
253
static const uint32_t HAS_ACCENT = 0x4000;
254
static const uint32_t HAS_DIALYTIKA = 0x8000;
255
// Further bits during data building and processing, not stored in the data map.
256
static const uint32_t HAS_COMBINING_DIALYTIKA = 0x10000;
257
static const uint32_t HAS_OTHER_GREEK_DIACRITIC = 0x20000;
258
259
static const uint32_t HAS_VOWEL_AND_ACCENT = HAS_VOWEL | HAS_ACCENT;
260
static const uint32_t HAS_VOWEL_AND_ACCENT_AND_DIALYTIKA =
261
        HAS_VOWEL_AND_ACCENT | HAS_DIALYTIKA;
262
static const uint32_t HAS_EITHER_DIALYTIKA = HAS_DIALYTIKA | HAS_COMBINING_DIALYTIKA;
263
264
// State bits.
265
static const uint32_t AFTER_CASED = 1;
266
static const uint32_t AFTER_VOWEL_WITH_ACCENT = 2;
267
268
uint32_t getLetterData(UChar32 c);
269
270
/**
271
 * Returns a non-zero value for each of the Greek combining diacritics
272
 * listed in The Unicode Standard, version 8, chapter 7.2 Greek,
273
 * plus some perispomeni look-alikes.
274
 */
275
uint32_t getDiacriticData(UChar32 c);
276
277
}  // namespace GreekUpper
278
U_NAMESPACE_END
279
280
#endif  // __cplusplus
281
282
#endif  // __UCASEMAP_IMP_H__