Coverage Report

Created: 2025-07-11 06:23

/src/icu/source/common/norm2allmodes.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* loadednormalizer2impl.h
9
*
10
* created on: 2014sep07
11
* created by: Markus W. Scherer
12
*/
13
14
#ifndef __NORM2ALLMODES_H__
15
#define __NORM2ALLMODES_H__
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_NORMALIZATION
20
21
#include "unicode/normalizer2.h"
22
#include "unicode/unistr.h"
23
#include "cpputils.h"
24
#include "normalizer2impl.h"
25
26
U_NAMESPACE_BEGIN
27
28
// Intermediate class:
29
// Has Normalizer2Impl and does boilerplate argument checking and setup.
30
class Normalizer2WithImpl : public Normalizer2 {
31
public:
32
8
    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
33
    virtual ~Normalizer2WithImpl();
34
35
    // normalize
36
    virtual UnicodeString &
37
    normalize(const UnicodeString &src,
38
              UnicodeString &dest,
39
5.96M
              UErrorCode &errorCode) const {
40
5.96M
        if(U_FAILURE(errorCode)) {
41
0
            dest.setToBogus();
42
0
            return dest;
43
0
        }
44
5.96M
        const UChar *sArray=src.getBuffer();
45
5.96M
        if(&dest==&src || sArray==NULL) {
46
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
47
0
            dest.setToBogus();
48
0
            return dest;
49
0
        }
50
5.96M
        dest.remove();
51
5.96M
        ReorderingBuffer buffer(impl, dest);
52
5.96M
        if(buffer.init(src.length(), errorCode)) {
53
5.96M
            normalize(sArray, sArray+src.length(), buffer, errorCode);
54
5.96M
        }
55
5.96M
        return dest;
56
5.96M
    }
57
    virtual void
58
    normalize(const UChar *src, const UChar *limit,
59
              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
60
61
    // normalize and append
62
    virtual UnicodeString &
63
    normalizeSecondAndAppend(UnicodeString &first,
64
                             const UnicodeString &second,
65
559k
                             UErrorCode &errorCode) const {
66
559k
        return normalizeSecondAndAppend(first, second, TRUE, errorCode);
67
559k
    }
68
    virtual UnicodeString &
69
    append(UnicodeString &first,
70
           const UnicodeString &second,
71
0
           UErrorCode &errorCode) const {
72
0
        return normalizeSecondAndAppend(first, second, FALSE, errorCode);
73
0
    }
74
    UnicodeString &
75
    normalizeSecondAndAppend(UnicodeString &first,
76
                             const UnicodeString &second,
77
                             UBool doNormalize,
78
559k
                             UErrorCode &errorCode) const {
79
559k
        uprv_checkCanGetBuffer(first, errorCode);
80
559k
        if(U_FAILURE(errorCode)) {
81
0
            return first;
82
0
        }
83
559k
        const UChar *secondArray=second.getBuffer();
84
559k
        if(&first==&second || secondArray==NULL) {
85
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
86
0
            return first;
87
0
        }
88
559k
        int32_t firstLength=first.length();
89
559k
        UnicodeString safeMiddle;
90
559k
        {
91
559k
            ReorderingBuffer buffer(impl, first);
92
559k
            if(buffer.init(firstLength+second.length(), errorCode)) {
93
559k
                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
94
559k
                                   safeMiddle, buffer, errorCode);
95
559k
            }
96
559k
        }  // The ReorderingBuffer destructor finalizes the first string.
97
559k
        if(U_FAILURE(errorCode)) {
98
            // Restore the modified suffix of the first string.
99
0
            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
100
0
        }
101
559k
        return first;
102
559k
    }
103
    virtual void
104
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
105
                       UnicodeString &safeMiddle,
106
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
107
    virtual UBool
108
0
    getDecomposition(UChar32 c, UnicodeString &decomposition) const {
109
0
        UChar buffer[4];
110
0
        int32_t length;
111
0
        const UChar *d=impl.getDecomposition(c, buffer, length);
112
0
        if(d==NULL) {
113
0
            return FALSE;
114
0
        }
115
0
        if(d==buffer) {
116
0
            decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
117
0
        } else {
118
0
            decomposition.setTo(FALSE, d, length);  // read-only alias
119
0
        }
120
0
        return TRUE;
121
0
    }
122
    virtual UBool
123
0
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const {
124
0
        UChar buffer[30];
125
0
        int32_t length;
126
0
        const UChar *d=impl.getRawDecomposition(c, buffer, length);
127
0
        if(d==NULL) {
128
0
            return FALSE;
129
0
        }
130
0
        if(d==buffer) {
131
0
            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
132
0
        } else {
133
0
            decomposition.setTo(FALSE, d, length);  // read-only alias
134
0
        }
135
0
        return TRUE;
136
0
    }
137
    virtual UChar32
138
0
    composePair(UChar32 a, UChar32 b) const {
139
0
        return impl.composePair(a, b);
140
0
    }
141
142
    virtual uint8_t
143
0
    getCombiningClass(UChar32 c) const {
144
0
        return impl.getCC(impl.getNorm16(c));
145
0
    }
146
147
    // quick checks
148
    virtual UBool
149
0
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
150
0
        if(U_FAILURE(errorCode)) {
151
0
            return FALSE;
152
0
        }
153
0
        const UChar *sArray=s.getBuffer();
154
0
        if(sArray==NULL) {
155
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
156
0
            return FALSE;
157
0
        }
158
0
        const UChar *sLimit=sArray+s.length();
159
0
        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
160
0
    }
161
    virtual UNormalizationCheckResult
162
0
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
163
0
        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
164
0
    }
165
    virtual int32_t
166
0
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const {
167
0
        if(U_FAILURE(errorCode)) {
168
0
            return 0;
169
0
        }
170
0
        const UChar *sArray=s.getBuffer();
171
0
        if(sArray==NULL) {
172
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
173
0
            return 0;
174
0
        }
175
0
        return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
176
0
    }
177
    virtual const UChar *
178
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
179
180
0
    virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
181
0
        return UNORM_YES;
182
0
    }
183
184
    const Normalizer2Impl &impl;
185
};
186
187
class DecomposeNormalizer2 : public Normalizer2WithImpl {
188
public:
189
2
    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
190
    virtual ~DecomposeNormalizer2();
191
192
private:
193
    virtual void
194
    normalize(const UChar *src, const UChar *limit,
195
0
              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
196
0
        impl.decompose(src, limit, &buffer, errorCode);
197
0
    }
198
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
199
    virtual void
200
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
201
                       UnicodeString &safeMiddle,
202
0
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
203
0
        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
204
0
    }
205
    virtual const UChar *
206
0
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
207
0
        return impl.decompose(src, limit, NULL, errorCode);
208
0
    }
209
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
210
0
    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
211
0
        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
212
0
    }
213
0
    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundary(c, TRUE); }
214
0
    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundary(c, FALSE); }
215
0
    virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); }
216
};
217
218
class ComposeNormalizer2 : public Normalizer2WithImpl {
219
public:
220
    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
221
4
        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
222
    virtual ~ComposeNormalizer2();
223
224
private:
225
    virtual void
226
    normalize(const UChar *src, const UChar *limit,
227
5.96M
              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
228
5.96M
        impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode);
229
5.96M
    }
230
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
231
    virtual void
232
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
233
                       UnicodeString &safeMiddle,
234
559k
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
235
559k
        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
236
559k
    }
237
238
    virtual UBool
239
5.69k
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const {
240
5.69k
        if(U_FAILURE(errorCode)) {
241
0
            return FALSE;
242
0
        }
243
5.69k
        const UChar *sArray=s.getBuffer();
244
5.69k
        if(sArray==NULL) {
245
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
246
0
            return FALSE;
247
0
        }
248
5.69k
        UnicodeString temp;
249
5.69k
        ReorderingBuffer buffer(impl, temp);
250
5.69k
        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
251
0
            return FALSE;
252
0
        }
253
5.69k
        return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode);
254
5.69k
    }
255
    virtual UNormalizationCheckResult
256
0
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const {
257
0
        if(U_FAILURE(errorCode)) {
258
0
            return UNORM_MAYBE;
259
0
        }
260
0
        const UChar *sArray=s.getBuffer();
261
0
        if(sArray==NULL) {
262
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
263
0
            return UNORM_MAYBE;
264
0
        }
265
0
        UNormalizationCheckResult qcResult=UNORM_YES;
266
0
        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
267
0
        return qcResult;
268
0
    }
269
    virtual const UChar *
270
0
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const {
271
0
        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
272
0
    }
273
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
274
0
    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const {
275
0
        return impl.getCompQuickCheck(impl.getNorm16(c));
276
0
    }
277
0
    virtual UBool hasBoundaryBefore(UChar32 c) const {
278
0
        return impl.hasCompBoundaryBefore(c);
279
0
    }
280
0
    virtual UBool hasBoundaryAfter(UChar32 c) const {
281
0
        return impl.hasCompBoundaryAfter(c, onlyContiguous, FALSE);
282
0
    }
283
0
    virtual UBool isInert(UChar32 c) const {
284
0
        return impl.hasCompBoundaryAfter(c, onlyContiguous, TRUE);
285
0
    }
286
287
    const UBool onlyContiguous;
288
};
289
290
class FCDNormalizer2 : public Normalizer2WithImpl {
291
public:
292
2
    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
293
    virtual ~FCDNormalizer2();
294
295
private:
296
    virtual void
297
    normalize(const UChar *src, const UChar *limit,
298
0
              ReorderingBuffer &buffer, UErrorCode &errorCode) const {
299
0
        impl.makeFCD(src, limit, &buffer, errorCode);
300
0
    }
301
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
302
    virtual void
303
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
304
                       UnicodeString &safeMiddle,
305
0
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const {
306
0
        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
307
0
    }
308
    virtual const UChar *
309
0
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const {
310
0
        return impl.makeFCD(src, limit, NULL, errorCode);
311
0
    }
312
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
313
0
    virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); }
314
0
    virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); }
315
0
    virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); }
316
};
317
318
struct Norm2AllModes : public UMemory {
319
    Norm2AllModes(Normalizer2Impl *i)
320
2
            : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {}
321
    ~Norm2AllModes();
322
323
    static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
324
    static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
325
    static Norm2AllModes *createInstance(const char *packageName,
326
                                         const char *name,
327
                                         UErrorCode &errorCode);
328
329
    static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
330
    static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
331
    static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
332
333
    Normalizer2Impl *impl;
334
    ComposeNormalizer2 comp;
335
    DecomposeNormalizer2 decomp;
336
    FCDNormalizer2 fcd;
337
    ComposeNormalizer2 fcc;
338
};
339
340
U_NAMESPACE_END
341
342
#endif  // !UCONFIG_NO_NORMALIZATION
343
#endif  // __NORM2ALLMODES_H__