Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/loadednormalizer2impl.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* loadednormalizer2impl.cpp
9
*
10
* created on: 2014sep03
11
* created by: Markus W. Scherer
12
*/
13
14
#include "unicode/utypes.h"
15
16
#if !UCONFIG_NO_NORMALIZATION
17
18
#include "unicode/udata.h"
19
#include "unicode/localpointer.h"
20
#include "unicode/normalizer2.h"
21
#include "unicode/ucptrie.h"
22
#include "unicode/unistr.h"
23
#include "unicode/unorm.h"
24
#include "cstring.h"
25
#include "mutex.h"
26
#include "norm2allmodes.h"
27
#include "normalizer2impl.h"
28
#include "uassert.h"
29
#include "ucln_cmn.h"
30
#include "uhash.h"
31
32
U_NAMESPACE_BEGIN
33
34
class LoadedNormalizer2Impl : public Normalizer2Impl {
35
public:
36
0
    LoadedNormalizer2Impl() : memory(NULL), ownedTrie(NULL) {}
37
    virtual ~LoadedNormalizer2Impl();
38
39
    void load(const char *packageName, const char *name, UErrorCode &errorCode);
40
41
private:
42
    static UBool U_CALLCONV
43
    isAcceptable(void *context, const char *type, const char *name, const UDataInfo *pInfo);
44
45
    UDataMemory *memory;
46
    UCPTrie *ownedTrie;
47
};
48
49
0
LoadedNormalizer2Impl::~LoadedNormalizer2Impl() {
50
0
    udata_close(memory);
51
0
    ucptrie_close(ownedTrie);
52
0
}
53
54
UBool U_CALLCONV
55
LoadedNormalizer2Impl::isAcceptable(void * /*context*/,
56
                                    const char * /* type */, const char * /*name*/,
57
0
                                    const UDataInfo *pInfo) {
58
0
    if(
59
0
        pInfo->size>=20 &&
60
0
        pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
61
0
        pInfo->charsetFamily==U_CHARSET_FAMILY &&
62
0
        pInfo->dataFormat[0]==0x4e &&    /* dataFormat="Nrm2" */
63
0
        pInfo->dataFormat[1]==0x72 &&
64
0
        pInfo->dataFormat[2]==0x6d &&
65
0
        pInfo->dataFormat[3]==0x32 &&
66
0
        pInfo->formatVersion[0]==4
67
0
    ) {
68
        // Normalizer2Impl *me=(Normalizer2Impl *)context;
69
        // uprv_memcpy(me->dataVersion, pInfo->dataVersion, 4);
70
0
        return TRUE;
71
0
    } else {
72
0
        return FALSE;
73
0
    }
74
0
}
75
76
void
77
0
LoadedNormalizer2Impl::load(const char *packageName, const char *name, UErrorCode &errorCode) {
78
0
    if(U_FAILURE(errorCode)) {
79
0
        return;
80
0
    }
81
0
    memory=udata_openChoice(packageName, "nrm", name, isAcceptable, this, &errorCode);
82
0
    if(U_FAILURE(errorCode)) {
83
0
        return;
84
0
    }
85
0
    const uint8_t *inBytes=(const uint8_t *)udata_getMemory(memory);
86
0
    const int32_t *inIndexes=(const int32_t *)inBytes;
87
0
    int32_t indexesLength=inIndexes[IX_NORM_TRIE_OFFSET]/4;
88
0
    if(indexesLength<=IX_MIN_LCCC_CP) {
89
0
        errorCode=U_INVALID_FORMAT_ERROR;  // Not enough indexes.
90
0
        return;
91
0
    }
92
93
0
    int32_t offset=inIndexes[IX_NORM_TRIE_OFFSET];
94
0
    int32_t nextOffset=inIndexes[IX_EXTRA_DATA_OFFSET];
95
0
    ownedTrie=ucptrie_openFromBinary(UCPTRIE_TYPE_FAST, UCPTRIE_VALUE_BITS_16,
96
0
                                     inBytes+offset, nextOffset-offset, NULL,
97
0
                                     &errorCode);
98
0
    if(U_FAILURE(errorCode)) {
99
0
        return;
100
0
    }
101
102
0
    offset=nextOffset;
103
0
    nextOffset=inIndexes[IX_SMALL_FCD_OFFSET];
104
0
    const uint16_t *inExtraData=(const uint16_t *)(inBytes+offset);
105
106
    // smallFCD: new in formatVersion 2
107
0
    offset=nextOffset;
108
0
    const uint8_t *inSmallFCD=inBytes+offset;
109
110
0
    init(inIndexes, ownedTrie, inExtraData, inSmallFCD);
111
0
}
112
113
// instance cache ---------------------------------------------------------- ***
114
115
Norm2AllModes *
116
Norm2AllModes::createInstance(const char *packageName,
117
                              const char *name,
118
0
                              UErrorCode &errorCode) {
119
0
    if(U_FAILURE(errorCode)) {
120
0
        return NULL;
121
0
    }
122
0
    LoadedNormalizer2Impl *impl=new LoadedNormalizer2Impl;
123
0
    if(impl==NULL) {
124
0
        errorCode=U_MEMORY_ALLOCATION_ERROR;
125
0
        return NULL;
126
0
    }
127
0
    impl->load(packageName, name, errorCode);
128
0
    return createInstance(impl, errorCode);
129
0
}
130
131
U_CDECL_BEGIN
132
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup();
133
U_CDECL_END
134
135
#if !NORM2_HARDCODE_NFC_DATA
136
static Norm2AllModes *nfcSingleton;
137
static icu::UInitOnce nfcInitOnce = U_INITONCE_INITIALIZER;
138
#endif
139
140
static Norm2AllModes *nfkcSingleton;
141
static icu::UInitOnce nfkcInitOnce = U_INITONCE_INITIALIZER;
142
143
static Norm2AllModes *nfkc_cfSingleton;
144
static icu::UInitOnce nfkc_cfInitOnce = U_INITONCE_INITIALIZER;
145
146
static UHashtable    *cache=NULL;
147
148
// UInitOnce singleton initialization function
149
0
static void U_CALLCONV initSingletons(const char *what, UErrorCode &errorCode) {
150
#if !NORM2_HARDCODE_NFC_DATA
151
    if (uprv_strcmp(what, "nfc") == 0) {
152
        nfcSingleton    = Norm2AllModes::createInstance(NULL, "nfc", errorCode);
153
    } else
154
#endif
155
0
    if (uprv_strcmp(what, "nfkc") == 0) {
156
0
        nfkcSingleton    = Norm2AllModes::createInstance(NULL, "nfkc", errorCode);
157
0
    } else if (uprv_strcmp(what, "nfkc_cf") == 0) {
158
0
        nfkc_cfSingleton = Norm2AllModes::createInstance(NULL, "nfkc_cf", errorCode);
159
0
    } else {
160
0
        UPRV_UNREACHABLE;   // Unknown singleton
161
0
    }
162
0
    ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
163
0
}
164
165
U_CDECL_BEGIN
166
167
0
static void U_CALLCONV deleteNorm2AllModes(void *allModes) {
168
0
    delete (Norm2AllModes *)allModes;
169
0
}
170
171
0
static UBool U_CALLCONV uprv_loaded_normalizer2_cleanup() {
172
#if !NORM2_HARDCODE_NFC_DATA
173
    delete nfcSingleton;
174
    nfcSingleton = NULL;
175
    nfcInitOnce.reset();
176
#endif
177
178
0
    delete nfkcSingleton;
179
0
    nfkcSingleton = NULL;
180
0
    nfkcInitOnce.reset();
181
182
0
    delete nfkc_cfSingleton;
183
0
    nfkc_cfSingleton = NULL;
184
0
    nfkc_cfInitOnce.reset();
185
186
0
    uhash_close(cache);
187
0
    cache=NULL;
188
0
    return TRUE;
189
0
}
190
191
U_CDECL_END
192
193
#if !NORM2_HARDCODE_NFC_DATA
194
const Norm2AllModes *
195
Norm2AllModes::getNFCInstance(UErrorCode &errorCode) {
196
    if(U_FAILURE(errorCode)) { return NULL; }
197
    umtx_initOnce(nfcInitOnce, &initSingletons, "nfc", errorCode);
198
    return nfcSingleton;
199
}
200
#endif
201
202
const Norm2AllModes *
203
0
Norm2AllModes::getNFKCInstance(UErrorCode &errorCode) {
204
0
    if(U_FAILURE(errorCode)) { return NULL; }
205
0
    umtx_initOnce(nfkcInitOnce, &initSingletons, "nfkc", errorCode);
206
0
    return nfkcSingleton;
207
0
}
208
209
const Norm2AllModes *
210
0
Norm2AllModes::getNFKC_CFInstance(UErrorCode &errorCode) {
211
0
    if(U_FAILURE(errorCode)) { return NULL; }
212
0
    umtx_initOnce(nfkc_cfInitOnce, &initSingletons, "nfkc_cf", errorCode);
213
0
    return nfkc_cfSingleton;
214
0
}
215
216
#if !NORM2_HARDCODE_NFC_DATA
217
const Normalizer2 *
218
Normalizer2::getNFCInstance(UErrorCode &errorCode) {
219
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
220
    return allModes!=NULL ? &allModes->comp : NULL;
221
}
222
223
const Normalizer2 *
224
Normalizer2::getNFDInstance(UErrorCode &errorCode) {
225
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
226
    return allModes!=NULL ? &allModes->decomp : NULL;
227
}
228
229
const Normalizer2 *Normalizer2Factory::getFCDInstance(UErrorCode &errorCode) {
230
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
231
    return allModes!=NULL ? &allModes->fcd : NULL;
232
}
233
234
const Normalizer2 *Normalizer2Factory::getFCCInstance(UErrorCode &errorCode) {
235
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
236
    return allModes!=NULL ? &allModes->fcc : NULL;
237
}
238
239
const Normalizer2Impl *
240
Normalizer2Factory::getNFCImpl(UErrorCode &errorCode) {
241
    const Norm2AllModes *allModes=Norm2AllModes::getNFCInstance(errorCode);
242
    return allModes!=NULL ? allModes->impl : NULL;
243
}
244
#endif
245
246
const Normalizer2 *
247
0
Normalizer2::getNFKCInstance(UErrorCode &errorCode) {
248
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
249
0
    return allModes!=NULL ? &allModes->comp : NULL;
250
0
}
251
252
const Normalizer2 *
253
0
Normalizer2::getNFKDInstance(UErrorCode &errorCode) {
254
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
255
0
    return allModes!=NULL ? &allModes->decomp : NULL;
256
0
}
257
258
const Normalizer2 *
259
0
Normalizer2::getNFKCCasefoldInstance(UErrorCode &errorCode) {
260
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
261
0
    return allModes!=NULL ? &allModes->comp : NULL;
262
0
}
263
264
const Normalizer2 *
265
Normalizer2::getInstance(const char *packageName,
266
                         const char *name,
267
                         UNormalization2Mode mode,
268
0
                         UErrorCode &errorCode) {
269
0
    if(U_FAILURE(errorCode)) {
270
0
        return NULL;
271
0
    }
272
0
    if(name==NULL || *name==0) {
273
0
        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
274
0
        return NULL;
275
0
    }
276
0
    const Norm2AllModes *allModes=NULL;
277
0
    if(packageName==NULL) {
278
0
        if(0==uprv_strcmp(name, "nfc")) {
279
0
            allModes=Norm2AllModes::getNFCInstance(errorCode);
280
0
        } else if(0==uprv_strcmp(name, "nfkc")) {
281
0
            allModes=Norm2AllModes::getNFKCInstance(errorCode);
282
0
        } else if(0==uprv_strcmp(name, "nfkc_cf")) {
283
0
            allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
284
0
        }
285
0
    }
286
0
    if(allModes==NULL && U_SUCCESS(errorCode)) {
287
0
        {
288
0
            Mutex lock;
289
0
            if(cache!=NULL) {
290
0
                allModes=(Norm2AllModes *)uhash_get(cache, name);
291
0
            }
292
0
        }
293
0
        if(allModes==NULL) {
294
0
            ucln_common_registerCleanup(UCLN_COMMON_LOADED_NORMALIZER2, uprv_loaded_normalizer2_cleanup);
295
0
            LocalPointer<Norm2AllModes> localAllModes(
296
0
                Norm2AllModes::createInstance(packageName, name, errorCode));
297
0
            if(U_SUCCESS(errorCode)) {
298
0
                Mutex lock;
299
0
                if(cache==NULL) {
300
0
                    cache=uhash_open(uhash_hashChars, uhash_compareChars, NULL, &errorCode);
301
0
                    if(U_FAILURE(errorCode)) {
302
0
                        return NULL;
303
0
                    }
304
0
                    uhash_setKeyDeleter(cache, uprv_free);
305
0
                    uhash_setValueDeleter(cache, deleteNorm2AllModes);
306
0
                }
307
0
                void *temp=uhash_get(cache, name);
308
0
                if(temp==NULL) {
309
0
                    int32_t keyLength= static_cast<int32_t>(uprv_strlen(name)+1);
310
0
                    char *nameCopy=(char *)uprv_malloc(keyLength);
311
0
                    if(nameCopy==NULL) {
312
0
                        errorCode=U_MEMORY_ALLOCATION_ERROR;
313
0
                        return NULL;
314
0
                    }
315
0
                    uprv_memcpy(nameCopy, name, keyLength);
316
0
                    allModes=localAllModes.getAlias();
317
0
                    uhash_put(cache, nameCopy, localAllModes.orphan(), &errorCode);
318
0
                } else {
319
                    // race condition
320
0
                    allModes=(Norm2AllModes *)temp;
321
0
                }
322
0
            }
323
0
        }
324
0
    }
325
0
    if(allModes!=NULL && U_SUCCESS(errorCode)) {
326
0
        switch(mode) {
327
0
        case UNORM2_COMPOSE:
328
0
            return &allModes->comp;
329
0
        case UNORM2_DECOMPOSE:
330
0
            return &allModes->decomp;
331
0
        case UNORM2_FCD:
332
0
            return &allModes->fcd;
333
0
        case UNORM2_COMPOSE_CONTIGUOUS:
334
0
            return &allModes->fcc;
335
0
        default:
336
0
            break;  // do nothing
337
0
        }
338
0
    }
339
0
    return NULL;
340
0
}
341
342
const Normalizer2 *
343
0
Normalizer2Factory::getInstance(UNormalizationMode mode, UErrorCode &errorCode) {
344
0
    if(U_FAILURE(errorCode)) {
345
0
        return NULL;
346
0
    }
347
0
    switch(mode) {
348
0
    case UNORM_NFD:
349
0
        return Normalizer2::getNFDInstance(errorCode);
350
0
    case UNORM_NFKD:
351
0
        return Normalizer2::getNFKDInstance(errorCode);
352
0
    case UNORM_NFC:
353
0
        return Normalizer2::getNFCInstance(errorCode);
354
0
    case UNORM_NFKC:
355
0
        return Normalizer2::getNFKCInstance(errorCode);
356
0
    case UNORM_FCD:
357
0
        return getFCDInstance(errorCode);
358
0
    default:  // UNORM_NONE
359
0
        return getNoopInstance(errorCode);
360
0
    }
361
0
}
362
363
const Normalizer2Impl *
364
0
Normalizer2Factory::getNFKCImpl(UErrorCode &errorCode) {
365
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKCInstance(errorCode);
366
0
    return allModes!=NULL ? allModes->impl : NULL;
367
0
}
368
369
const Normalizer2Impl *
370
0
Normalizer2Factory::getNFKC_CFImpl(UErrorCode &errorCode) {
371
0
    const Norm2AllModes *allModes=Norm2AllModes::getNFKC_CFInstance(errorCode);
372
0
    return allModes!=NULL ? allModes->impl : NULL;
373
0
}
374
375
U_NAMESPACE_END
376
377
// C API ------------------------------------------------------------------- ***
378
379
U_NAMESPACE_USE
380
381
U_CAPI const UNormalizer2 * U_EXPORT2
382
0
unorm2_getNFKCInstance(UErrorCode *pErrorCode) {
383
0
    return (const UNormalizer2 *)Normalizer2::getNFKCInstance(*pErrorCode);
384
0
}
385
386
U_CAPI const UNormalizer2 * U_EXPORT2
387
0
unorm2_getNFKDInstance(UErrorCode *pErrorCode) {
388
0
    return (const UNormalizer2 *)Normalizer2::getNFKDInstance(*pErrorCode);
389
0
}
390
391
U_CAPI const UNormalizer2 * U_EXPORT2
392
0
unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode) {
393
0
    return (const UNormalizer2 *)Normalizer2::getNFKCCasefoldInstance(*pErrorCode);
394
0
}
395
396
U_CAPI const UNormalizer2 * U_EXPORT2
397
unorm2_getInstance(const char *packageName,
398
                   const char *name,
399
                   UNormalization2Mode mode,
400
0
                   UErrorCode *pErrorCode) {
401
0
    return (const UNormalizer2 *)Normalizer2::getInstance(packageName, name, mode, *pErrorCode);
402
0
}
403
404
U_CFUNC UNormalizationCheckResult
405
0
unorm_getQuickCheck(UChar32 c, UNormalizationMode mode) {
406
0
    if(mode<=UNORM_NONE || UNORM_FCD<=mode) {
407
0
        return UNORM_YES;
408
0
    }
409
0
    UErrorCode errorCode=U_ZERO_ERROR;
410
0
    const Normalizer2 *norm2=Normalizer2Factory::getInstance(mode, errorCode);
411
0
    if(U_SUCCESS(errorCode)) {
412
0
        return ((const Normalizer2WithImpl *)norm2)->getQuickCheck(c);
413
0
    } else {
414
0
        return UNORM_MAYBE;
415
0
    }
416
0
}
417
418
#endif  // !UCONFIG_NO_NORMALIZATION