Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/norm2allmodes.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
* Copyright (C) 2014, International Business Machines
6
* Corporation and others.  All Rights Reserved.
7
*******************************************************************************
8
* norm2allmodes.h
9
*
10
* created on: 2014sep07
11
* created by: Markus W. Scherer
12
*/
13
14
#ifndef __NORM2ALLMODES_H__
15
#define __NORM2ALLMODES_H__
16
17
#include "unicode/utypes.h"
18
19
#if !UCONFIG_NO_NORMALIZATION
20
21
#include "unicode/edits.h"
22
#include "unicode/normalizer2.h"
23
#include "unicode/stringoptions.h"
24
#include "unicode/unistr.h"
25
#include "cpputils.h"
26
#include "normalizer2impl.h"
27
28
U_NAMESPACE_BEGIN
29
30
// Intermediate class:
31
// Has Normalizer2Impl and does boilerplate argument checking and setup.
32
class Normalizer2WithImpl : public Normalizer2 {
33
public:
34
0
    Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
35
    virtual ~Normalizer2WithImpl();
36
37
    // normalize
38
    virtual UnicodeString &
39
    normalize(const UnicodeString &src,
40
              UnicodeString &dest,
41
0
              UErrorCode &errorCode) const U_OVERRIDE {
42
0
        if(U_FAILURE(errorCode)) {
43
0
            dest.setToBogus();
44
0
            return dest;
45
0
        }
46
0
        const UChar *sArray=src.getBuffer();
47
0
        if(&dest==&src || sArray==NULL) {
48
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
49
0
            dest.setToBogus();
50
0
            return dest;
51
0
        }
52
0
        dest.remove();
53
0
        ReorderingBuffer buffer(impl, dest);
54
0
        if(buffer.init(src.length(), errorCode)) {
55
0
            normalize(sArray, sArray+src.length(), buffer, errorCode);
56
0
        }
57
0
        return dest;
58
0
    }
59
    virtual void
60
    normalize(const UChar *src, const UChar *limit,
61
              ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
62
63
    // normalize and append
64
    virtual UnicodeString &
65
    normalizeSecondAndAppend(UnicodeString &first,
66
                             const UnicodeString &second,
67
0
                             UErrorCode &errorCode) const U_OVERRIDE {
68
0
        return normalizeSecondAndAppend(first, second, true, errorCode);
69
0
    }
70
    virtual UnicodeString &
71
    append(UnicodeString &first,
72
           const UnicodeString &second,
73
0
           UErrorCode &errorCode) const U_OVERRIDE {
74
0
        return normalizeSecondAndAppend(first, second, false, errorCode);
75
0
    }
76
    UnicodeString &
77
    normalizeSecondAndAppend(UnicodeString &first,
78
                             const UnicodeString &second,
79
                             UBool doNormalize,
80
0
                             UErrorCode &errorCode) const {
81
0
        uprv_checkCanGetBuffer(first, errorCode);
82
0
        if(U_FAILURE(errorCode)) {
83
0
            return first;
84
0
        }
85
0
        const UChar *secondArray=second.getBuffer();
86
0
        if(&first==&second || secondArray==NULL) {
87
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
88
0
            return first;
89
0
        }
90
0
        int32_t firstLength=first.length();
91
0
        UnicodeString safeMiddle;
92
0
        {
93
0
            ReorderingBuffer buffer(impl, first);
94
0
            if(buffer.init(firstLength+second.length(), errorCode)) {
95
0
                normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
96
0
                                   safeMiddle, buffer, errorCode);
97
0
            }
98
0
        }  // The ReorderingBuffer destructor finalizes the first string.
99
0
        if(U_FAILURE(errorCode)) {
100
            // Restore the modified suffix of the first string.
101
0
            first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
102
0
        }
103
0
        return first;
104
0
    }
105
    virtual void
106
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
107
                       UnicodeString &safeMiddle,
108
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
109
    virtual UBool
110
0
    getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
111
0
        UChar buffer[4];
112
0
        int32_t length;
113
0
        const UChar *d=impl.getDecomposition(c, buffer, length);
114
0
        if(d==NULL) {
115
0
            return false;
116
0
        }
117
0
        if(d==buffer) {
118
0
            decomposition.setTo(buffer, length);  // copy the string (Jamos from Hangul syllable c)
119
0
        } else {
120
0
            decomposition.setTo(false, d, length);  // read-only alias
121
0
        }
122
0
        return true;
123
0
    }
124
    virtual UBool
125
0
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE {
126
0
        UChar buffer[30];
127
0
        int32_t length;
128
0
        const UChar *d=impl.getRawDecomposition(c, buffer, length);
129
0
        if(d==NULL) {
130
0
            return false;
131
0
        }
132
0
        if(d==buffer) {
133
0
            decomposition.setTo(buffer, length);  // copy the string (algorithmic decomposition)
134
0
        } else {
135
0
            decomposition.setTo(false, d, length);  // read-only alias
136
0
        }
137
0
        return true;
138
0
    }
139
    virtual UChar32
140
0
    composePair(UChar32 a, UChar32 b) const U_OVERRIDE {
141
0
        return impl.composePair(a, b);
142
0
    }
143
144
    virtual uint8_t
145
0
    getCombiningClass(UChar32 c) const U_OVERRIDE {
146
0
        return impl.getCC(impl.getNorm16(c));
147
0
    }
148
149
    // quick checks
150
    virtual UBool
151
0
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
152
0
        if(U_FAILURE(errorCode)) {
153
0
            return false;
154
0
        }
155
0
        const UChar *sArray=s.getBuffer();
156
0
        if(sArray==NULL) {
157
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
158
0
            return false;
159
0
        }
160
0
        const UChar *sLimit=sArray+s.length();
161
0
        return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
162
0
    }
163
    virtual UNormalizationCheckResult
164
0
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
165
0
        return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
166
0
    }
167
    virtual int32_t
168
0
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
169
0
        if(U_FAILURE(errorCode)) {
170
0
            return 0;
171
0
        }
172
0
        const UChar *sArray=s.getBuffer();
173
0
        if(sArray==NULL) {
174
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
175
0
            return 0;
176
0
        }
177
0
        return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
178
0
    }
179
    virtual const UChar *
180
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0;
181
182
0
    virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
183
0
        return UNORM_YES;
184
0
    }
185
186
    const Normalizer2Impl &impl;
187
};
188
189
class DecomposeNormalizer2 : public Normalizer2WithImpl {
190
public:
191
0
    DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
192
    virtual ~DecomposeNormalizer2();
193
194
private:
195
    virtual void
196
    normalize(const UChar *src, const UChar *limit,
197
0
              ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
198
0
        impl.decompose(src, limit, &buffer, errorCode);
199
0
    }
200
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
201
    virtual void
202
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
203
                       UnicodeString &safeMiddle,
204
0
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
205
0
        impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
206
0
    }
207
208
    void
209
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
210
0
                  Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
211
0
        if (U_FAILURE(errorCode)) {
212
0
            return;
213
0
        }
214
0
        if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
215
0
            edits->reset();
216
0
        }
217
0
        const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
218
0
        impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
219
0
        sink.Flush();
220
0
    }
221
    virtual UBool
222
0
    isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
223
0
        if(U_FAILURE(errorCode)) {
224
0
            return false;
225
0
        }
226
0
        const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
227
0
        const uint8_t *sLimit = s + sp.length();
228
0
        return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
229
0
    }
230
231
    virtual const UChar *
232
0
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
233
0
        return impl.decompose(src, limit, NULL, errorCode);
234
0
    }
235
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
236
0
    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
237
0
        return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
238
0
    }
239
0
    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
240
0
        return impl.hasDecompBoundaryBefore(c);
241
0
    }
242
0
    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
243
0
        return impl.hasDecompBoundaryAfter(c);
244
0
    }
245
0
    virtual UBool isInert(UChar32 c) const U_OVERRIDE {
246
0
        return impl.isDecompInert(c);
247
0
    }
248
};
249
250
class ComposeNormalizer2 : public Normalizer2WithImpl {
251
public:
252
    ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
253
0
        Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
254
    virtual ~ComposeNormalizer2();
255
256
private:
257
    virtual void
258
    normalize(const UChar *src, const UChar *limit,
259
0
              ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
260
0
        impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
261
0
    }
262
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
263
264
    void
265
    normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
266
0
                  Edits *edits, UErrorCode &errorCode) const U_OVERRIDE {
267
0
        if (U_FAILURE(errorCode)) {
268
0
            return;
269
0
        }
270
0
        if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
271
0
            edits->reset();
272
0
        }
273
0
        const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
274
0
        impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
275
0
                         &sink, edits, errorCode);
276
0
        sink.Flush();
277
0
    }
278
279
    virtual void
280
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
281
                       UnicodeString &safeMiddle,
282
0
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
283
0
        impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
284
0
    }
285
286
    virtual UBool
287
0
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
288
0
        if(U_FAILURE(errorCode)) {
289
0
            return false;
290
0
        }
291
0
        const UChar *sArray=s.getBuffer();
292
0
        if(sArray==NULL) {
293
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
294
0
            return false;
295
0
        }
296
0
        UnicodeString temp;
297
0
        ReorderingBuffer buffer(impl, temp);
298
0
        if(!buffer.init(5, errorCode)) {  // small destCapacity for substring normalization
299
0
            return false;
300
0
        }
301
0
        return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
302
0
    }
303
    virtual UBool
304
0
    isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE {
305
0
        if(U_FAILURE(errorCode)) {
306
0
            return false;
307
0
        }
308
0
        const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
309
0
        return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
310
0
    }
311
    virtual UNormalizationCheckResult
312
0
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE {
313
0
        if(U_FAILURE(errorCode)) {
314
0
            return UNORM_MAYBE;
315
0
        }
316
0
        const UChar *sArray=s.getBuffer();
317
0
        if(sArray==NULL) {
318
0
            errorCode=U_ILLEGAL_ARGUMENT_ERROR;
319
0
            return UNORM_MAYBE;
320
0
        }
321
0
        UNormalizationCheckResult qcResult=UNORM_YES;
322
0
        impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
323
0
        return qcResult;
324
0
    }
325
    virtual const UChar *
326
0
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE {
327
0
        return impl.composeQuickCheck(src, limit, onlyContiguous, NULL);
328
0
    }
329
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
330
0
    virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE {
331
0
        return impl.getCompQuickCheck(impl.getNorm16(c));
332
0
    }
333
0
    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
334
0
        return impl.hasCompBoundaryBefore(c);
335
0
    }
336
0
    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
337
0
        return impl.hasCompBoundaryAfter(c, onlyContiguous);
338
0
    }
339
0
    virtual UBool isInert(UChar32 c) const U_OVERRIDE {
340
0
        return impl.isCompInert(c, onlyContiguous);
341
0
    }
342
343
    const UBool onlyContiguous;
344
};
345
346
class FCDNormalizer2 : public Normalizer2WithImpl {
347
public:
348
0
    FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
349
    virtual ~FCDNormalizer2();
350
351
private:
352
    virtual void
353
    normalize(const UChar *src, const UChar *limit,
354
0
              ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
355
0
        impl.makeFCD(src, limit, &buffer, errorCode);
356
0
    }
357
    using Normalizer2WithImpl::normalize;  // Avoid warning about hiding base class function.
358
    virtual void
359
    normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize,
360
                       UnicodeString &safeMiddle,
361
0
                       ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE {
362
0
        impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
363
0
    }
364
    virtual const UChar *
365
0
    spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const U_OVERRIDE {
366
0
        return impl.makeFCD(src, limit, NULL, errorCode);
367
0
    }
368
    using Normalizer2WithImpl::spanQuickCheckYes;  // Avoid warning about hiding base class function.
369
0
    virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE {
370
0
        return impl.hasFCDBoundaryBefore(c);
371
0
    }
372
0
    virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE {
373
0
        return impl.hasFCDBoundaryAfter(c);
374
0
    }
375
0
    virtual UBool isInert(UChar32 c) const U_OVERRIDE {
376
0
        return impl.isFCDInert(c);
377
0
    }
378
};
379
380
struct Norm2AllModes : public UMemory {
381
    Norm2AllModes(Normalizer2Impl *i)
382
0
            : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
383
    ~Norm2AllModes();
384
385
    static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
386
    static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
387
    static Norm2AllModes *createInstance(const char *packageName,
388
                                         const char *name,
389
                                         UErrorCode &errorCode);
390
391
    static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
392
    static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
393
    static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
394
395
    Normalizer2Impl *impl;
396
    ComposeNormalizer2 comp;
397
    DecomposeNormalizer2 decomp;
398
    FCDNormalizer2 fcd;
399
    ComposeNormalizer2 fcc;
400
};
401
402
U_NAMESPACE_END
403
404
#endif  // !UCONFIG_NO_NORMALIZATION
405
#endif  // __NORM2ALLMODES_H__