Coverage Report

Created: 2025-07-11 06:23

/src/icu/source/common/loadednormalizer2impl.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* loadednormalizer2impl.cpp
9
*
10
* created on: 2014sep03
11
* created by: Markus W. Scherer
12
*/
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_NORMALIZATION
17
18
#include "unicode/udata.h"
19
#include "unicode/localpointer.h"
20
#include "unicode/normalizer2.h"
21
#include "unicode/unistr.h"
22
#include "unicode/unorm.h"
23
#include "cstring.h"
24
#include "mutex.h"
25
#include "norm2allmodes.h"
26
#include "normalizer2impl.h"
27
#include "uassert.h"
28
#include "ucln_cmn.h"
29
#include "uhash.h"
30
31
U_NAMESPACE_BEGIN
32
33
class LoadedNormalizer2Impl : public Normalizer2Impl {
34
public:
35
2
    LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
36
    virtual ~LoadedNormalizer2Impl();
37
38
    void load(const char *packageName, const char *name, UErrorCode &errorCode);
39
40
private:
41
    static UBool U_CALLCONV
42
    isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
43
44
    UDataMemory *memory;
45
    UTrie2 *ownedTrie;
46
};
47
48
0
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
49
0
    udata_close(memory);
50
0
    utrie2_close(ownedTrie);
51
0
}
52
53
UBool U_CALLCONV
54
LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
55
                                    const char * /* type */, const char * /*name*/,
56
2
                                    const UDataInfo *pInfo) {
57
2
    if(
58
2
        pInfo->size>=20 &&
59
2
        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
60
2
        pInfo->charsetFamily==U_CHARSET_FAMILY &&
61
2
        pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
62
2
        pInfo->dataFormat[1]==0x72 &&
63
2
        pInfo->dataFormat[2]==0x6d &&
64
2
        pInfo->dataFormat[3]==0x32 &&
65
2
        pInfo->formatVersion[0]==2
66
2
    ) {
67
        // Normalizer2Impl *me=(Normalizer2Impl *)context;
68
        // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
69
2
        return TRUE;
70
2
    } else {
71
0
        return FALSE;
72
0
    }
73
2
}
74
75
void
76
2
LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
77
2
    if(U_FAILURE(errorCode)) {
78
0
        return;
79
0
    }
80
2
    memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
81
2
    if(U_FAILURE(errorCode)) {
82
0
        return;
83
0
    }
84
2
    const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
85
2
    const int32_t *inIndexes=(const int32_t *)inBytes;
86
2
    int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
87
2
    if(indexesLength<=IX_MIN_MAYBE_YES) {
88
0
        errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
89
0
        return;
90
0
    }
91
92
2
    int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
93
2
    int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
94
2
    ownedTrie=utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS,
95
2
                                        inBytes+offset, nextOffset-offset, NULL,
96
2
                                        &errorCode);
97
2
    if(U_FAILURE(errorCode)) {
98
0
        return;
99
0
    }
100
101
2
    offset=nextOffset;
102
2
    nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
103
2
    const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
104
105
    // smallFCD: new in formatVersion 2
106
2
    offset=nextOffset;
107
2
    const uint8_t *inSmallFCD=inBytes+offset;
108
109
2
    init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
110
2
}
111
112
// instance cache ---------------------------------------------------------- ***
113
114
Norm2AllModes *
115
Norm2AllModes::createInstance(const char *packageName,
116
                              const char *name,
117
2
                              UErrorCode &errorCode) {
118
2
    if(U_FAILURE(errorCode)) {
119
0
        return NULL;
120
0
    }
121
2
    LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
122
2
    if(impl==NULL) {
123
0
        errorCode=U_MEMORY_ALLOCATION_ERROR;
124
0
        return NULL;
125
0
    }
126
2
    impl->load(packageName, name, errorCode);
127
2
    return createInstance(impl, errorCode);
128
2
}
129
130
U_CDECL_BEGIN
131
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
132
U_CDECL_END
133
134
static Norm2AllModes *nfkcSingleton;
135
static Norm2AllModes *nfkc_cfSingleton;
136
static UHashtable    *cache=NULL;
137
138
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
139
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
140
141
// UInitOnce singleton initialization function
142
0
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
143
0
    if (uprv_strcmp(what, "nfkc") == 0) {
144
0
        nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
145
0
    } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
146
0
        nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
147
0
    } else {
148
0
        U_ASSERT(FALSE);   // Unknown singleton
149
0
    }
150
0
    ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
151
0
}
152
153
U_CDECL_BEGIN
154
155
0
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
156
0
    delete (Norm2AllModes *)allModes;
157
0
}
158
159
0
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
160
0
    delete nfkcSingleton;
161
0
    nfkcSingleton = NULL;
162
0
    delete nfkc_cfSingleton;
163
0
    nfkc_cfSingleton = NULL;
164
0
    uhash_close(cache);
165
0
    cache=NULL;
166
0
    nfkcInitOnce.reset(); 
167
0
    nfkc_cfInitOnce.reset(); 
168
0
    return TRUE;
169
0
}
170
171
U_CDECL_END
172
173
const Norm2AllModes *
174
0
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
175
0
    if(U_FAILURE(errorCode)) { return NULL; }
176
0
    umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
177
0
    return nfkcSingleton;
178
0
}
179
180
const Norm2AllModes *
181
0
Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
182
0
    if(U_FAILURE(errorCode)) { return NULL; }
183
0
    umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
184
0
    return nfkc_cfSingleton;
185
0
}
186
187
const Normalizer2 *
188
0
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
189
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
190
0
    return allModes!=NULL ? &allModes->comp : NULL;
191
0
}
192
193
const Normalizer2 *
194
0
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
195
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
196
0
    return allModes!=NULL ? &allModes->decomp : NULL;
197
0
}
198
199
const Normalizer2 *
200
0
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
201
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
202
0
    return allModes!=NULL ? &allModes->comp : NULL;
203
0
}
204
205
const Normalizer2 *
206
Normalizer2::getInstance(const char *packageName,
207
                         const char *name,
208
                         UNormalization2Mode mode,
209
4.00k
                         UErrorCode &errorCode) {
210
4.00k
    if(U_FAILURE(errorCode)) {
211
0
        return NULL;
212
0
    }
213
4.00k
    if(name==NULL || *name==0) {
214
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
215
0
        return NULL;
216
0
    }
217
4.00k
    const Norm2AllModes *allModes=NULL;
218
4.00k
    if(packageName==NULL) {
219
4.00k
        if(0==uprv_strcmp(name, "nfc")) {
220
0
            allModes=Norm2AllModes::getNFCInstance(errorCode);
221
4.00k
        } else if(0==uprv_strcmp(name, "nfkc")) {
222
0
            allModes=Norm2AllModes::getNFKCInstance(errorCode);
223
4.00k
        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
224
0
            allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
225
0
        }
226
4.00k
    }
227
4.00k
    if(allModes==NULL && U_SUCCESS(errorCode)) {
228
4.00k
        {
229
4.00k
            Mutex lock;
230
4.00k
            if(cache!=NULL) {
231
4.00k
                allModes=(Norm2AllModes *)uhash_get(cache, name);
232
4.00k
            }
233
4.00k
        }
234
4.00k
        if(allModes==NULL) {
235
2
            ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
236
2
            LocalPointer<Norm2AllModes> localAllModes(
237
2
                Norm2AllModes::createInstance(packageName, name, errorCode));
238
2
            if(U_SUCCESS(errorCode)) {
239
2
                Mutex lock;
240
2
                if(cache==NULL) {
241
2
                    cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
242
2
                    if(U_FAILURE(errorCode)) {
243
0
                        return NULL;
244
0
                    }
245
2
                    uhash_setKeyDeleter(cache, uprv_free);
246
2
                    uhash_setValueDeleter(cache, deleteNorm2AllModes);
247
2
                }
248
2
                void *temp=uhash_get(cache, name);
249
2
                if(temp==NULL) {
250
2
                    int32_t keyLength=uprv_strlen(name)+1;
251
2
                    char *nameCopy=(char *)uprv_malloc(keyLength);
252
2
                    if(nameCopy==NULL) {
253
0
                        errorCode=U_MEMORY_ALLOCATION_ERROR;
254
0
                        return NULL;
255
0
                    }
256
2
                    uprv_memcpy(nameCopy, name, keyLength);
257
2
                    allModes=localAllModes.getAlias();
258
2
                    uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
259
2
                } else {
260
                    // race condition
261
0
                    allModes=(Norm2AllModes *)temp;
262
0
                }
263
2
            }
264
2
        }
265
4.00k
    }
266
4.00k
    if(allModes!=NULL && U_SUCCESS(errorCode)) {
267
4.00k
        switch(mode) {
268
4.00k
        case UNORM2_COMPOSE:
269
4.00k
            return &allModes->comp;
270
0
        case UNORM2_DECOMPOSE:
271
0
            return &allModes->decomp;
272
0
        case UNORM2_FCD:
273
0
            return &allModes->fcd;
274
0
        case UNORM2_COMPOSE_CONTIGUOUS:
275
0
            return &allModes->fcc;
276
0
        default:
277
0
            break;  // do nothing
278
4.00k
        }
279
4.00k
    }
280
0
    return NULL;
281
4.00k
}
282
283
const Normalizer2 *
284
0
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
285
0
    if(U_FAILURE(errorCode)) {
286
0
        return NULL;
287
0
    }
288
0
    switch(mode) {
289
0
    case UNORM_NFD:
290
0
        return Normalizer2::getNFDInstance(errorCode);
291
0
    case UNORM_NFKD:
292
0
        return Normalizer2::getNFKDInstance(errorCode);
293
0
    case UNORM_NFC:
294
0
        return Normalizer2::getNFCInstance(errorCode);
295
0
    case UNORM_NFKC:
296
0
        return Normalizer2::getNFKCInstance(errorCode);
297
0
    case UNORM_FCD:
298
0
        return getFCDInstance(errorCode);
299
0
    default:  // UNORM_NONE
300
0
        return getNoopInstance(errorCode);
301
0
    }
302
0
}
303
304
const Normalizer2Impl *
305
0
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
306
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
307
0
    return allModes!=NULL ? allModes->impl : NULL;
308
0
}
309
310
const Normalizer2Impl *
311
0
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
312
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
313
0
    return allModes!=NULL ? allModes->impl : NULL;
314
0
}
315
316
U_NAMESPACE_END
317
318
// C API ------------------------------------------------------------------- ***
319
320
U_NAMESPACE_USE
321
322
U_CAPI const UNormalizer2 * U_EXPORT2
323
0
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
324
0
    return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
325
0
}
326
327
U_CAPI const UNormalizer2 * U_EXPORT2
328
0
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
329
0
    return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
330
0
}
331
332
U_CAPI const UNormalizer2 * U_EXPORT2
333
0
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
334
0
    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
335
0
}
336
337
U_CAPI const UNormalizer2 * U_EXPORT2
338
unorm2_getInstance(const char *packageName,
339
                   const char *name,
340
                   UNormalization2Mode mode,
341
0
                   UErrorCode *pErrorCode) {
342
0
    return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
343
0
}
344
345
U_CFUNC UNormalizationCheckResult
346
0
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
347
0
    if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
348
0
        return UNORM_YES;
349
0
    }
350
0
    UErrorCode errorCode=U_ZERO_ERROR;
351
0
    const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
352
0
    if(U_SUCCESS(errorCode)) {
353
0
        return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
354
0
    } else {
355
0
        return UNORM_MAYBE;
356
0
    }
357
0
}
358
359
#endif  // !UCONFIG_NO_NORMALIZATION