Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/characterproperties.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2018 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// characterproperties.cpp
5
// created: 2018sep03 Markus W. Scherer
6
7
#include "unicode/utypes.h"
8
#include "unicode/localpointer.h"
9
#include "unicode/uchar.h"
10
#include "unicode/ucpmap.h"
11
#include "unicode/ucptrie.h"
12
#include "unicode/umutablecptrie.h"
13
#include "unicode/uniset.h"
14
#include "unicode/uscript.h"
15
#include "unicode/uset.h"
16
#include "cmemory.h"
17
#include "mutex.h"
18
#include "normalizer2impl.h"
19
#include "uassert.h"
20
#include "ubidi_props.h"
21
#include "ucase.h"
22
#include "ucln_cmn.h"
23
#include "umutex.h"
24
#include "uprops.h"
25
26
using icu::LocalPointer;
27
#if !UCONFIG_NO_NORMALIZATION
28
using icu::Normalizer2Factory;
29
using icu::Normalizer2Impl;
30
#endif
31
using icu::UInitOnce;
32
using icu::UnicodeSet;
33
34
namespace {
35
36
UBool U_CALLCONV characterproperties_cleanup();
37
38
constexpr int32_t NUM_INCLUSIONS = UPROPS_SRC_COUNT + UCHAR_INT_LIMIT - UCHAR_INT_START;
39
40
struct Inclusion {
41
    UnicodeSet  *fSet = nullptr;
42
    UInitOnce    fInitOnce = U_INITONCE_INITIALIZER;
43
};
44
Inclusion gInclusions[NUM_INCLUSIONS]; // cached getInclusions()
45
46
UnicodeSet *sets[UCHAR_BINARY_LIMIT] = {};
47
48
UCPMap *maps[UCHAR_INT_LIMIT - UCHAR_INT_START] = {};
49
50
icu::UMutex cpMutex;
51
52
//----------------------------------------------------------------
53
// Inclusions list
54
//----------------------------------------------------------------
55
56
// USetAdder implementation
57
// Does not use uset.h to reduce code dependencies
58
void U_CALLCONV
59
24.9k
_set_add(USet *set, UChar32 c) {
60
24.9k
    ((UnicodeSet *)set)->add(c);
61
24.9k
}
62
63
void U_CALLCONV
64
0
_set_addRange(USet *set, UChar32 start, UChar32 end) {
65
0
    ((UnicodeSet *)set)->add(start, end);
66
0
}
67
68
void U_CALLCONV
69
0
_set_addString(USet *set, const UChar *str, int32_t length) {
70
0
    ((UnicodeSet *)set)->add(icu::UnicodeString((UBool)(length<0), str, length));
71
0
}
72
73
0
UBool U_CALLCONV characterproperties_cleanup() {
74
0
    for (Inclusion &in: gInclusions) {
75
0
        delete in.fSet;
76
0
        in.fSet = nullptr;
77
0
        in.fInitOnce.reset();
78
0
    }
79
0
    for (int32_t i = 0; i < UPRV_LENGTHOF(sets); ++i) {
80
0
        delete sets[i];
81
0
        sets[i] = nullptr;
82
0
    }
83
0
    for (int32_t i = 0; i < UPRV_LENGTHOF(maps); ++i) {
84
0
        ucptrie_close(reinterpret_cast<UCPTrie *>(maps[i]));
85
0
        maps[i] = nullptr;
86
0
    }
87
0
    return TRUE;
88
0
}
89
90
4
void U_CALLCONV initInclusion(UPropertySource src, UErrorCode &errorCode) {
91
    // This function is invoked only via umtx_initOnce().
92
4
    U_ASSERT(0 <= src && src < UPROPS_SRC_COUNT);
93
4
    if (src == UPROPS_SRC_NONE) {
94
0
        errorCode = U_INTERNAL_PROGRAM_ERROR;
95
0
        return;
96
0
    }
97
4
    U_ASSERT(gInclusions[src].fSet == nullptr);
98
99
4
    LocalPointer<UnicodeSet> incl(new UnicodeSet());
100
4
    if (incl.isNull()) {
101
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
102
0
        return;
103
0
    }
104
4
    USetAdder sa = {
105
4
        (USet *)incl.getAlias(),
106
4
        _set_add,
107
4
        _set_addRange,
108
4
        _set_addString,
109
4
        nullptr, // don't need remove()
110
4
        nullptr // don't need removeRange()
111
4
    };
112
113
4
    switch(src) {
114
2
    case UPROPS_SRC_CHAR:
115
2
        uchar_addPropertyStarts(&sa, &errorCode);
116
2
        break;
117
2
    case UPROPS_SRC_PROPSVEC:
118
2
        upropsvec_addPropertyStarts(&sa, &errorCode);
119
2
        break;
120
0
    case UPROPS_SRC_CHAR_AND_PROPSVEC:
121
0
        uchar_addPropertyStarts(&sa, &errorCode);
122
0
        upropsvec_addPropertyStarts(&sa, &errorCode);
123
0
        break;
124
0
#if !UCONFIG_NO_NORMALIZATION
125
0
    case UPROPS_SRC_CASE_AND_NORM: {
126
0
        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
127
0
        if(U_SUCCESS(errorCode)) {
128
0
            impl->addPropertyStarts(&sa, errorCode);
129
0
        }
130
0
        ucase_addPropertyStarts(&sa, &errorCode);
131
0
        break;
132
0
    }
133
0
    case UPROPS_SRC_NFC: {
134
0
        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
135
0
        if(U_SUCCESS(errorCode)) {
136
0
            impl->addPropertyStarts(&sa, errorCode);
137
0
        }
138
0
        break;
139
0
    }
140
0
    case UPROPS_SRC_NFKC: {
141
0
        const Normalizer2Impl *impl=Normalizer2Factory::getNFKCImpl(errorCode);
142
0
        if(U_SUCCESS(errorCode)) {
143
0
            impl->addPropertyStarts(&sa, errorCode);
144
0
        }
145
0
        break;
146
0
    }
147
0
    case UPROPS_SRC_NFKC_CF: {
148
0
        const Normalizer2Impl *impl=Normalizer2Factory::getNFKC_CFImpl(errorCode);
149
0
        if(U_SUCCESS(errorCode)) {
150
0
            impl->addPropertyStarts(&sa, errorCode);
151
0
        }
152
0
        break;
153
0
    }
154
0
    case UPROPS_SRC_NFC_CANON_ITER: {
155
0
        const Normalizer2Impl *impl=Normalizer2Factory::getNFCImpl(errorCode);
156
0
        if(U_SUCCESS(errorCode)) {
157
0
            impl->addCanonIterPropertyStarts(&sa, errorCode);
158
0
        }
159
0
        break;
160
0
    }
161
0
#endif
162
0
    case UPROPS_SRC_CASE:
163
0
        ucase_addPropertyStarts(&sa, &errorCode);
164
0
        break;
165
0
    case UPROPS_SRC_BIDI:
166
0
        ubidi_addPropertyStarts(&sa, &errorCode);
167
0
        break;
168
0
    case UPROPS_SRC_INPC:
169
0
    case UPROPS_SRC_INSC:
170
0
    case UPROPS_SRC_VO:
171
0
        uprops_addPropertyStarts((UPropertySource)src, &sa, &errorCode);
172
0
        break;
173
0
    default:
174
0
        errorCode = U_INTERNAL_PROGRAM_ERROR;
175
0
        break;
176
4
    }
177
178
4
    if (U_FAILURE(errorCode)) {
179
0
        return;
180
0
    }
181
4
    if (incl->isBogus()) {
182
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
183
0
        return;
184
0
    }
185
    // Compact for caching.
186
4
    incl->compact();
187
4
    gInclusions[src].fSet = incl.orphan();
188
4
    ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
189
4
}
190
191
12
const UnicodeSet *getInclusionsForSource(UPropertySource src, UErrorCode &errorCode) {
192
12
    if (U_FAILURE(errorCode)) { return nullptr; }
193
12
    if (src < 0 || UPROPS_SRC_COUNT <= src) {
194
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
195
0
        return nullptr;
196
0
    }
197
12
    Inclusion &i = gInclusions[src];
198
12
    umtx_initOnce(i.fInitOnce, &initInclusion, src, errorCode);
199
12
    return i.fSet;
200
12
}
201
202
4
void U_CALLCONV initIntPropInclusion(UProperty prop, UErrorCode &errorCode) {
203
    // This function is invoked only via umtx_initOnce().
204
4
    U_ASSERT(UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT);
205
4
    int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
206
4
    U_ASSERT(gInclusions[inclIndex].fSet == nullptr);
207
4
    UPropertySource src = uprops_getSource(prop);
208
4
    const UnicodeSet *incl = getInclusionsForSource(src, errorCode);
209
4
    if (U_FAILURE(errorCode)) {
210
0
        return;
211
0
    }
212
213
4
    LocalPointer<UnicodeSet> intPropIncl(new UnicodeSet(0, 0));
214
4
    if (intPropIncl.isNull()) {
215
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
216
0
        return;
217
0
    }
218
4
    int32_t numRanges = incl->getRangeCount();
219
4
    int32_t prevValue = 0;
220
11.7k
    for (int32_t i = 0; i < numRanges; ++i) {
221
11.7k
        UChar32 rangeEnd = incl->getRangeEnd(i);
222
39.0k
        for (UChar32 c = incl->getRangeStart(i); c <= rangeEnd; ++c) {
223
            // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
224
27.2k
            int32_t value = u_getIntPropertyValue(c, prop);
225
27.2k
            if (value != prevValue) {
226
9.90k
                intPropIncl->add(c);
227
9.90k
                prevValue = value;
228
9.90k
            }
229
27.2k
        }
230
11.7k
    }
231
232
4
    if (intPropIncl->isBogus()) {
233
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
234
0
        return;
235
0
    }
236
    // Compact for caching.
237
4
    intPropIncl->compact();
238
4
    gInclusions[inclIndex].fSet = intPropIncl.orphan();
239
4
    ucln_common_registerCleanup(UCLN_COMMON_CHARACTERPROPERTIES, characterproperties_cleanup);
240
4
}
241
242
}  // namespace
243
244
U_NAMESPACE_BEGIN
245
246
const UnicodeSet *CharacterProperties::getInclusionsForProperty(
247
7.97k
        UProperty prop, UErrorCode &errorCode) {
248
7.97k
    if (U_FAILURE(errorCode)) { return nullptr; }
249
7.97k
    if (UCHAR_INT_START <= prop && prop < UCHAR_INT_LIMIT) {
250
7.96k
        int32_t inclIndex = UPROPS_SRC_COUNT + prop - UCHAR_INT_START;
251
7.96k
        Inclusion &i = gInclusions[inclIndex];
252
7.96k
        umtx_initOnce(i.fInitOnce, &initIntPropInclusion, prop, errorCode);
253
7.96k
        return i.fSet;
254
8
    } else {
255
8
        UPropertySource src = uprops_getSource(prop);
256
8
        return getInclusionsForSource(src, errorCode);
257
8
    }
258
7.97k
}
259
260
U_NAMESPACE_END
261
262
namespace {
263
264
0
UnicodeSet *makeSet(UProperty property, UErrorCode &errorCode) {
265
0
    if (U_FAILURE(errorCode)) { return nullptr; }
266
0
    LocalPointer<UnicodeSet> set(new UnicodeSet());
267
0
    if (set.isNull()) {
268
0
        errorCode = U_MEMORY_ALLOCATION_ERROR;
269
0
        return nullptr;
270
0
    }
271
0
    const UnicodeSet *inclusions =
272
0
        icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
273
0
    if (U_FAILURE(errorCode)) { return nullptr; }
274
0
    int32_t numRanges = inclusions->getRangeCount();
275
0
    UChar32 startHasProperty = -1;
276
277
0
    for (int32_t i = 0; i < numRanges; ++i) {
278
0
        UChar32 rangeEnd = inclusions->getRangeEnd(i);
279
0
        for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
280
            // TODO: Get a UCharacterProperty.BinaryProperty to avoid the property dispatch.
281
0
            if (u_hasBinaryProperty(c, property)) {
282
0
                if (startHasProperty < 0) {
283
                    // Transition from false to true.
284
0
                    startHasProperty = c;
285
0
                }
286
0
            } else if (startHasProperty >= 0) {
287
                // Transition from true to false.
288
0
                set->add(startHasProperty, c - 1);
289
0
                startHasProperty = -1;
290
0
            }
291
0
        }
292
0
    }
293
0
    if (startHasProperty >= 0) {
294
0
        set->add(startHasProperty, 0x10FFFF);
295
0
    }
296
0
    set->freeze();
297
0
    return set.orphan();
298
0
}
299
300
0
UCPMap *makeMap(UProperty property, UErrorCode &errorCode) {
301
0
    if (U_FAILURE(errorCode)) { return nullptr; }
302
0
    uint32_t nullValue = property == UCHAR_SCRIPT ? USCRIPT_UNKNOWN : 0;
303
0
    icu::LocalUMutableCPTriePointer mutableTrie(
304
0
        umutablecptrie_open(nullValue, nullValue, &errorCode));
305
0
    const UnicodeSet *inclusions =
306
0
        icu::CharacterProperties::getInclusionsForProperty(property, errorCode);
307
0
    if (U_FAILURE(errorCode)) { return nullptr; }
308
0
    int32_t numRanges = inclusions->getRangeCount();
309
0
    UChar32 start = 0;
310
0
    uint32_t value = nullValue;
311
312
0
    for (int32_t i = 0; i < numRanges; ++i) {
313
0
        UChar32 rangeEnd = inclusions->getRangeEnd(i);
314
0
        for (UChar32 c = inclusions->getRangeStart(i); c <= rangeEnd; ++c) {
315
            // TODO: Get a UCharacterProperty.IntProperty to avoid the property dispatch.
316
0
            uint32_t nextValue = u_getIntPropertyValue(c, property);
317
0
            if (value != nextValue) {
318
0
                if (value != nullValue) {
319
0
                    umutablecptrie_setRange(mutableTrie.getAlias(), start, c - 1, value, &errorCode);
320
0
                }
321
0
                start = c;
322
0
                value = nextValue;
323
0
            }
324
0
        }
325
0
    }
326
0
    if (value != 0) {
327
0
        umutablecptrie_setRange(mutableTrie.getAlias(), start, 0x10FFFF, value, &errorCode);
328
0
    }
329
330
0
    UCPTrieType type;
331
0
    if (property == UCHAR_BIDI_CLASS || property == UCHAR_GENERAL_CATEGORY) {
332
0
        type = UCPTRIE_TYPE_FAST;
333
0
    } else {
334
0
        type = UCPTRIE_TYPE_SMALL;
335
0
    }
336
0
    UCPTrieValueWidth valueWidth;
337
    // TODO: UCharacterProperty.IntProperty
338
0
    int32_t max = u_getIntPropertyMaxValue(property);
339
0
    if (max <= 0xff) {
340
0
        valueWidth = UCPTRIE_VALUE_BITS_8;
341
0
    } else if (max <= 0xffff) {
342
0
        valueWidth = UCPTRIE_VALUE_BITS_16;
343
0
    } else {
344
0
        valueWidth = UCPTRIE_VALUE_BITS_32;
345
0
    }
346
0
    return reinterpret_cast<UCPMap *>(
347
0
        umutablecptrie_buildImmutable(mutableTrie.getAlias(), type, valueWidth, &errorCode));
348
0
}
349
350
}  // namespace
351
352
U_NAMESPACE_USE
353
354
U_CAPI const USet * U_EXPORT2
355
0
u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode) {
356
0
    if (U_FAILURE(*pErrorCode)) { return nullptr; }
357
0
    if (property < 0 || UCHAR_BINARY_LIMIT <= property) {
358
0
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
359
0
        return nullptr;
360
0
    }
361
0
    Mutex m(&cpMutex);
362
0
    UnicodeSet *set = sets[property];
363
0
    if (set == nullptr) {
364
0
        sets[property] = set = makeSet(property, *pErrorCode);
365
0
    }
366
0
    if (U_FAILURE(*pErrorCode)) { return nullptr; }
367
0
    return set->toUSet();
368
0
}
369
370
U_CAPI const UCPMap * U_EXPORT2
371
0
u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode) {
372
0
    if (U_FAILURE(*pErrorCode)) { return nullptr; }
373
0
    if (property < UCHAR_INT_START || UCHAR_INT_LIMIT <= property) {
374
0
        *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
375
0
        return nullptr;
376
0
    }
377
0
    Mutex m(&cpMutex);
378
0
    UCPMap *map = maps[property - UCHAR_INT_START];
379
0
    if (map == nullptr) {
380
0
        maps[property - UCHAR_INT_START] = map = makeMap(property, *pErrorCode);
381
0
    }
382
0
    return map;
383
0
}