Coverage Report

Created: 2025-07-11 06:23

/src/icu/source/common/unicode/normalizer2.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2009-2013, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  normalizer2.h
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2009nov22
16
*   created by: Markus W. Scherer
17
*/
18
19
#ifndef __NORMALIZER2_H__
20
#define __NORMALIZER2_H__
21
22
/**
23
 * \file
24
 * \brief C++ API: New API for Unicode Normalization.
25
 */
26
27
#include "unicode/utypes.h"
28
29
#if !UCONFIG_NO_NORMALIZATION
30
31
#include "unicode/uniset.h"
32
#include "unicode/unistr.h"
33
#include "unicode/unorm2.h"
34
35
U_NAMESPACE_BEGIN
36
37
/**
38
 * Unicode normalization functionality for standard Unicode normalization or
39
 * for using custom mapping tables.
40
 * All instances of this class are unmodifiable/immutable.
41
 * Instances returned by getInstance() are singletons that must not be deleted by the caller.
42
 * The Normalizer2 class is not intended for public subclassing.
43
 *
44
 * The primary functions are to produce a normalized string and to detect whether
45
 * a string is already normalized.
46
 * The most commonly used normalization forms are those defined in
47
 * http://www.unicode.org/unicode/reports/tr15/
48
 * However, this API supports additional normalization forms for specialized purposes.
49
 * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
50
 * and can be used in implementations of UTS #46.
51
 *
52
 * Not only are the standard compose and decompose modes supplied,
53
 * but additional modes are provided as documented in the Mode enum.
54
 *
55
 * Some of the functions in this class identify normalization boundaries.
56
 * At a normalization boundary, the portions of the string
57
 * before it and starting from it do not interact and can be handled independently.
58
 *
59
 * The spanQuickCheckYes() stops at a normalization boundary.
60
 * When the goal is a normalized string, then the text before the boundary
61
 * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
62
 *
63
 * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
64
 * a character is guaranteed to be at a normalization boundary,
65
 * regardless of context.
66
 * This is used for moving from one normalization boundary to the next
67
 * or preceding boundary, and for performing iterative normalization.
68
 *
69
 * Iterative normalization is useful when only a small portion of a
70
 * longer string needs to be processed.
71
 * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
72
 * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
73
 * (to process only the substring for which sort key bytes are computed).
74
 *
75
 * The set of normalization boundaries returned by these functions may not be
76
 * complete: There may be more boundaries that could be returned.
77
 * Different functions may return different boundaries.
78
 * @stable ICU 4.4
79
 */
80
class U_COMMON_API Normalizer2 : public UObject {
81
public:
82
    /**
83
     * Destructor.
84
     * @stable ICU 4.4
85
     */
86
    ~Normalizer2();
87
88
    /**
89
     * Returns a Normalizer2 instance for Unicode NFC normalization.
90
     * Same as getInstance(NULL, "nfc", UNORM2_COMPOSE, errorCode).
91
     * Returns an unmodifiable singleton instance. Do not delete it.
92
     * @param errorCode Standard ICU error code. Its input value must
93
     *                  pass the U_SUCCESS() test, or else the function returns
94
     *                  immediately. Check for U_FAILURE() on output or use with
95
     *                  function chaining. (See User Guide for details.)
96
     * @return the requested Normalizer2, if successful
97
     * @stable ICU 49
98
     */
99
    static const Normalizer2 *
100
    getNFCInstance(UErrorCode &errorCode);
101
102
    /**
103
     * Returns a Normalizer2 instance for Unicode NFD normalization.
104
     * Same as getInstance(NULL, "nfc", UNORM2_DECOMPOSE, errorCode).
105
     * Returns an unmodifiable singleton instance. Do not delete it.
106
     * @param errorCode Standard ICU error code. Its input value must
107
     *                  pass the U_SUCCESS() test, or else the function returns
108
     *                  immediately. Check for U_FAILURE() on output or use with
109
     *                  function chaining. (See User Guide for details.)
110
     * @return the requested Normalizer2, if successful
111
     * @stable ICU 49
112
     */
113
    static const Normalizer2 *
114
    getNFDInstance(UErrorCode &errorCode);
115
116
    /**
117
     * Returns a Normalizer2 instance for Unicode NFKC normalization.
118
     * Same as getInstance(NULL, "nfkc", UNORM2_COMPOSE, errorCode).
119
     * Returns an unmodifiable singleton instance. Do not delete it.
120
     * @param errorCode Standard ICU error code. Its input value must
121
     *                  pass the U_SUCCESS() test, or else the function returns
122
     *                  immediately. Check for U_FAILURE() on output or use with
123
     *                  function chaining. (See User Guide for details.)
124
     * @return the requested Normalizer2, if successful
125
     * @stable ICU 49
126
     */
127
    static const Normalizer2 *
128
    getNFKCInstance(UErrorCode &errorCode);
129
130
    /**
131
     * Returns a Normalizer2 instance for Unicode NFKD normalization.
132
     * Same as getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, errorCode).
133
     * Returns an unmodifiable singleton instance. Do not delete it.
134
     * @param errorCode Standard ICU error code. Its input value must
135
     *                  pass the U_SUCCESS() test, or else the function returns
136
     *                  immediately. Check for U_FAILURE() on output or use with
137
     *                  function chaining. (See User Guide for details.)
138
     * @return the requested Normalizer2, if successful
139
     * @stable ICU 49
140
     */
141
    static const Normalizer2 *
142
    getNFKDInstance(UErrorCode &errorCode);
143
144
    /**
145
     * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
146
     * Same as getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, errorCode).
147
     * Returns an unmodifiable singleton instance. Do not delete it.
148
     * @param errorCode Standard ICU error code. Its input value must
149
     *                  pass the U_SUCCESS() test, or else the function returns
150
     *                  immediately. Check for U_FAILURE() on output or use with
151
     *                  function chaining. (See User Guide for details.)
152
     * @return the requested Normalizer2, if successful
153
     * @stable ICU 49
154
     */
155
    static const Normalizer2 *
156
    getNFKCCasefoldInstance(UErrorCode &errorCode);
157
158
    /**
159
     * Returns a Normalizer2 instance which uses the specified data file
160
     * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
161
     * and which composes or decomposes text according to the specified mode.
162
     * Returns an unmodifiable singleton instance. Do not delete it.
163
     *
164
     * Use packageName=NULL for data files that are part of ICU's own data.
165
     * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
166
     * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
167
     * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
168
     *
169
     * @param packageName NULL for ICU built-in data, otherwise application data package name
170
     * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
171
     * @param mode normalization mode (compose or decompose etc.)
172
     * @param errorCode Standard ICU error code. Its input value must
173
     *                  pass the U_SUCCESS() test, or else the function returns
174
     *                  immediately. Check for U_FAILURE() on output or use with
175
     *                  function chaining. (See User Guide for details.)
176
     * @return the requested Normalizer2, if successful
177
     * @stable ICU 4.4
178
     */
179
    static const Normalizer2 *
180
    getInstance(const char *packageName,
181
                const char *name,
182
                UNormalization2Mode mode,
183
                UErrorCode &errorCode);
184
185
    /**
186
     * Returns the normalized form of the source string.
187
     * @param src source string
188
     * @param errorCode Standard ICU error code. Its input value must
189
     *                  pass the U_SUCCESS() test, or else the function returns
190
     *                  immediately. Check for U_FAILURE() on output or use with
191
     *                  function chaining. (See User Guide for details.)
192
     * @return normalized src
193
     * @stable ICU 4.4
194
     */
195
    UnicodeString
196
0
    normalize(const UnicodeString &src, UErrorCode &errorCode) const {
197
0
        UnicodeString result;
198
0
        normalize(src, result, errorCode);
199
0
        return result;
200
0
    }
201
    /**
202
     * Writes the normalized form of the source string to the destination string
203
     * (replacing its contents) and returns the destination string.
204
     * The source and destination strings must be different objects.
205
     * @param src source string
206
     * @param dest destination string; its contents is replaced with normalized src
207
     * @param errorCode Standard ICU error code. Its input value must
208
     *                  pass the U_SUCCESS() test, or else the function returns
209
     *                  immediately. Check for U_FAILURE() on output or use with
210
     *                  function chaining. (See User Guide for details.)
211
     * @return dest
212
     * @stable ICU 4.4
213
     */
214
    virtual UnicodeString &
215
    normalize(const UnicodeString &src,
216
              UnicodeString &dest,
217
              UErrorCode &errorCode) const = 0;
218
    /**
219
     * Appends the normalized form of the second string to the first string
220
     * (merging them at the boundary) and returns the first string.
221
     * The result is normalized if the first string was normalized.
222
     * The first and second strings must be different objects.
223
     * @param first string, should be normalized
224
     * @param second string, will be normalized
225
     * @param errorCode Standard ICU error code. Its input value must
226
     *                  pass the U_SUCCESS() test, or else the function returns
227
     *                  immediately. Check for U_FAILURE() on output or use with
228
     *                  function chaining. (See User Guide for details.)
229
     * @return first
230
     * @stable ICU 4.4
231
     */
232
    virtual UnicodeString &
233
    normalizeSecondAndAppend(UnicodeString &first,
234
                             const UnicodeString &second,
235
                             UErrorCode &errorCode) const = 0;
236
    /**
237
     * Appends the second string to the first string
238
     * (merging them at the boundary) and returns the first string.
239
     * The result is normalized if both the strings were normalized.
240
     * The first and second strings must be different objects.
241
     * @param first string, should be normalized
242
     * @param second string, should be normalized
243
     * @param errorCode Standard ICU error code. Its input value must
244
     *                  pass the U_SUCCESS() test, or else the function returns
245
     *                  immediately. Check for U_FAILURE() on output or use with
246
     *                  function chaining. (See User Guide for details.)
247
     * @return first
248
     * @stable ICU 4.4
249
     */
250
    virtual UnicodeString &
251
    append(UnicodeString &first,
252
           const UnicodeString &second,
253
           UErrorCode &errorCode) const = 0;
254
255
    /**
256
     * Gets the decomposition mapping of c.
257
     * Roughly equivalent to normalizing the String form of c
258
     * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
259
     * returns FALSE and does not write a string
260
     * if c does not have a decomposition mapping in this instance's data.
261
     * This function is independent of the mode of the Normalizer2.
262
     * @param c code point
263
     * @param decomposition String object which will be set to c's
264
     *                      decomposition mapping, if there is one.
265
     * @return TRUE if c has a decomposition, otherwise FALSE
266
     * @stable ICU 4.6
267
     */
268
    virtual UBool
269
    getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
270
271
    /**
272
     * Gets the raw decomposition mapping of c.
273
     *
274
     * This is similar to the getDecomposition() method but returns the
275
     * raw decomposition mapping as specified in UnicodeData.txt or
276
     * (for custom data) in the mapping files processed by the gennorm2 tool.
277
     * By contrast, getDecomposition() returns the processed,
278
     * recursively-decomposed version of this mapping.
279
     *
280
     * When used on a standard NFKC Normalizer2 instance,
281
     * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
282
     *
283
     * When used on a standard NFC Normalizer2 instance,
284
     * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
285
     * in this case, the result contains either one or two code points (=1..4 char16_ts).
286
     *
287
     * This function is independent of the mode of the Normalizer2.
288
     * The default implementation returns FALSE.
289
     * @param c code point
290
     * @param decomposition String object which will be set to c's
291
     *                      raw decomposition mapping, if there is one.
292
     * @return TRUE if c has a decomposition, otherwise FALSE
293
     * @stable ICU 49
294
     */
295
    virtual UBool
296
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
297
298
    /**
299
     * Performs pairwise composition of a & b and returns the composite if there is one.
300
     *
301
     * Returns a composite code point c only if c has a two-way mapping to a+b.
302
     * In standard Unicode normalization, this means that
303
     * c has a canonical decomposition to a+b
304
     * and c does not have the Full_Composition_Exclusion property.
305
     *
306
     * This function is independent of the mode of the Normalizer2.
307
     * The default implementation returns a negative value.
308
     * @param a A (normalization starter) code point.
309
     * @param b Another code point.
310
     * @return The non-negative composite code point if there is one; otherwise a negative value.
311
     * @stable ICU 49
312
     */
313
    virtual UChar32
314
    composePair(UChar32 a, UChar32 b) const;
315
316
    /**
317
     * Gets the combining class of c.
318
     * The default implementation returns 0
319
     * but all standard implementations return the Unicode Canonical_Combining_Class value.
320
     * @param c code point
321
     * @return c's combining class
322
     * @stable ICU 49
323
     */
324
    virtual uint8_t
325
    getCombiningClass(UChar32 c) const;
326
327
    /**
328
     * Tests if the string is normalized.
329
     * Internally, in cases where the quickCheck() method would return "maybe"
330
     * (which is only possible for the two COMPOSE modes) this method
331
     * resolves to "yes" or "no" to provide a definitive result,
332
     * at the cost of doing more work in those cases.
333
     * @param s input string
334
     * @param errorCode Standard ICU error code. Its input value must
335
     *                  pass the U_SUCCESS() test, or else the function returns
336
     *                  immediately. Check for U_FAILURE() on output or use with
337
     *                  function chaining. (See User Guide for details.)
338
     * @return TRUE if s is normalized
339
     * @stable ICU 4.4
340
     */
341
    virtual UBool
342
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
343
344
    /**
345
     * Tests if the string is normalized.
346
     * For the two COMPOSE modes, the result could be "maybe" in cases that
347
     * would take a little more work to resolve definitively.
348
     * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
349
     * combination of quick check + normalization, to avoid
350
     * re-checking the "yes" prefix.
351
     * @param s input string
352
     * @param errorCode Standard ICU error code. Its input value must
353
     *                  pass the U_SUCCESS() test, or else the function returns
354
     *                  immediately. Check for U_FAILURE() on output or use with
355
     *                  function chaining. (See User Guide for details.)
356
     * @return UNormalizationCheckResult
357
     * @stable ICU 4.4
358
     */
359
    virtual UNormalizationCheckResult
360
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
361
362
    /**
363
     * Returns the end of the normalized substring of the input string.
364
     * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
365
     * the substring <code>UnicodeString(s, 0, end)</code>
366
     * will pass the quick check with a "yes" result.
367
     *
368
     * The returned end index is usually one or more characters before the
369
     * "no" or "maybe" character: The end index is at a normalization boundary.
370
     * (See the class documentation for more about normalization boundaries.)
371
     *
372
     * When the goal is a normalized string and most input strings are expected
373
     * to be normalized already, then call this method,
374
     * and if it returns a prefix shorter than the input string,
375
     * copy that prefix and use normalizeSecondAndAppend() for the remainder.
376
     * @param s input string
377
     * @param errorCode Standard ICU error code. Its input value must
378
     *                  pass the U_SUCCESS() test, or else the function returns
379
     *                  immediately. Check for U_FAILURE() on output or use with
380
     *                  function chaining. (See User Guide for details.)
381
     * @return "yes" span end index
382
     * @stable ICU 4.4
383
     */
384
    virtual int32_t
385
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
386
387
    /**
388
     * Tests if the character always has a normalization boundary before it,
389
     * regardless of context.
390
     * If true, then the character does not normalization-interact with
391
     * preceding characters.
392
     * In other words, a string containing this character can be normalized
393
     * by processing portions before this character and starting from this
394
     * character independently.
395
     * This is used for iterative normalization. See the class documentation for details.
396
     * @param c character to test
397
     * @return TRUE if c has a normalization boundary before it
398
     * @stable ICU 4.4
399
     */
400
    virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
401
402
    /**
403
     * Tests if the character always has a normalization boundary after it,
404
     * regardless of context.
405
     * If true, then the character does not normalization-interact with
406
     * following characters.
407
     * In other words, a string containing this character can be normalized
408
     * by processing portions up to this character and after this
409
     * character independently.
410
     * This is used for iterative normalization. See the class documentation for details.
411
     * Note that this operation may be significantly slower than hasBoundaryBefore().
412
     * @param c character to test
413
     * @return TRUE if c has a normalization boundary after it
414
     * @stable ICU 4.4
415
     */
416
    virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
417
418
    /**
419
     * Tests if the character is normalization-inert.
420
     * If true, then the character does not change, nor normalization-interact with
421
     * preceding or following characters.
422
     * In other words, a string containing this character can be normalized
423
     * by processing portions before this character and after this
424
     * character independently.
425
     * This is used for iterative normalization. See the class documentation for details.
426
     * Note that this operation may be significantly slower than hasBoundaryBefore().
427
     * @param c character to test
428
     * @return TRUE if c is normalization-inert
429
     * @stable ICU 4.4
430
     */
431
    virtual UBool isInert(UChar32 c) const = 0;
432
};
433
434
/**
435
 * Normalization filtered by a UnicodeSet.
436
 * Normalizes portions of the text contained in the filter set and leaves
437
 * portions not contained in the filter set unchanged.
438
 * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
439
 * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
440
 * This class implements all of (and only) the Normalizer2 API.
441
 * An instance of this class is unmodifiable/immutable but is constructed and
442
 * must be destructed by the owner.
443
 * @stable ICU 4.4
444
 */
445
class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
446
public:
447
    /**
448
     * Constructs a filtered normalizer wrapping any Normalizer2 instance
449
     * and a filter set.
450
     * Both are aliased and must not be modified or deleted while this object
451
     * is used.
452
     * The filter set should be frozen; otherwise the performance will suffer greatly.
453
     * @param n2 wrapped Normalizer2 instance
454
     * @param filterSet UnicodeSet which determines the characters to be normalized
455
     * @stable ICU 4.4
456
     */
457
    FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
458
0
            norm2(n2), set(filterSet) {}
459
460
    /**
461
     * Destructor.
462
     * @stable ICU 4.4
463
     */
464
    ~FilteredNormalizer2();
465
466
    /**
467
     * Writes the normalized form of the source string to the destination string
468
     * (replacing its contents) and returns the destination string.
469
     * The source and destination strings must be different objects.
470
     * @param src source string
471
     * @param dest destination string; its contents is replaced with normalized src
472
     * @param errorCode Standard ICU error code. Its input value must
473
     *                  pass the U_SUCCESS() test, or else the function returns
474
     *                  immediately. Check for U_FAILURE() on output or use with
475
     *                  function chaining. (See User Guide for details.)
476
     * @return dest
477
     * @stable ICU 4.4
478
     */
479
    virtual UnicodeString &
480
    normalize(const UnicodeString &src,
481
              UnicodeString &dest,
482
              UErrorCode &errorCode) const;
483
    /**
484
     * Appends the normalized form of the second string to the first string
485
     * (merging them at the boundary) and returns the first string.
486
     * The result is normalized if the first string was normalized.
487
     * The first and second strings must be different objects.
488
     * @param first string, should be normalized
489
     * @param second string, will be normalized
490
     * @param errorCode Standard ICU error code. Its input value must
491
     *                  pass the U_SUCCESS() test, or else the function returns
492
     *                  immediately. Check for U_FAILURE() on output or use with
493
     *                  function chaining. (See User Guide for details.)
494
     * @return first
495
     * @stable ICU 4.4
496
     */
497
    virtual UnicodeString &
498
    normalizeSecondAndAppend(UnicodeString &first,
499
                             const UnicodeString &second,
500
                             UErrorCode &errorCode) const;
501
    /**
502
     * Appends the second string to the first string
503
     * (merging them at the boundary) and returns the first string.
504
     * The result is normalized if both the strings were normalized.
505
     * The first and second strings must be different objects.
506
     * @param first string, should be normalized
507
     * @param second string, should be normalized
508
     * @param errorCode Standard ICU error code. Its input value must
509
     *                  pass the U_SUCCESS() test, or else the function returns
510
     *                  immediately. Check for U_FAILURE() on output or use with
511
     *                  function chaining. (See User Guide for details.)
512
     * @return first
513
     * @stable ICU 4.4
514
     */
515
    virtual UnicodeString &
516
    append(UnicodeString &first,
517
           const UnicodeString &second,
518
           UErrorCode &errorCode) const;
519
520
    /**
521
     * Gets the decomposition mapping of c.
522
     * For details see the base class documentation.
523
     *
524
     * This function is independent of the mode of the Normalizer2.
525
     * @param c code point
526
     * @param decomposition String object which will be set to c's
527
     *                      decomposition mapping, if there is one.
528
     * @return TRUE if c has a decomposition, otherwise FALSE
529
     * @stable ICU 4.6
530
     */
531
    virtual UBool
532
    getDecomposition(UChar32 c, UnicodeString &decomposition) const;
533
534
    /**
535
     * Gets the raw decomposition mapping of c.
536
     * For details see the base class documentation.
537
     *
538
     * This function is independent of the mode of the Normalizer2.
539
     * @param c code point
540
     * @param decomposition String object which will be set to c's
541
     *                      raw decomposition mapping, if there is one.
542
     * @return TRUE if c has a decomposition, otherwise FALSE
543
     * @stable ICU 49
544
     */
545
    virtual UBool
546
    getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
547
548
    /**
549
     * Performs pairwise composition of a & b and returns the composite if there is one.
550
     * For details see the base class documentation.
551
     *
552
     * This function is independent of the mode of the Normalizer2.
553
     * @param a A (normalization starter) code point.
554
     * @param b Another code point.
555
     * @return The non-negative composite code point if there is one; otherwise a negative value.
556
     * @stable ICU 49
557
     */
558
    virtual UChar32
559
    composePair(UChar32 a, UChar32 b) const;
560
561
    /**
562
     * Gets the combining class of c.
563
     * The default implementation returns 0
564
     * but all standard implementations return the Unicode Canonical_Combining_Class value.
565
     * @param c code point
566
     * @return c's combining class
567
     * @stable ICU 49
568
     */
569
    virtual uint8_t
570
    getCombiningClass(UChar32 c) const;
571
572
    /**
573
     * Tests if the string is normalized.
574
     * For details see the Normalizer2 base class documentation.
575
     * @param s input string
576
     * @param errorCode Standard ICU error code. Its input value must
577
     *                  pass the U_SUCCESS() test, or else the function returns
578
     *                  immediately. Check for U_FAILURE() on output or use with
579
     *                  function chaining. (See User Guide for details.)
580
     * @return TRUE if s is normalized
581
     * @stable ICU 4.4
582
     */
583
    virtual UBool
584
    isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
585
    /**
586
     * Tests if the string is normalized.
587
     * For details see the Normalizer2 base class documentation.
588
     * @param s input string
589
     * @param errorCode Standard ICU error code. Its input value must
590
     *                  pass the U_SUCCESS() test, or else the function returns
591
     *                  immediately. Check for U_FAILURE() on output or use with
592
     *                  function chaining. (See User Guide for details.)
593
     * @return UNormalizationCheckResult
594
     * @stable ICU 4.4
595
     */
596
    virtual UNormalizationCheckResult
597
    quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
598
    /**
599
     * Returns the end of the normalized substring of the input string.
600
     * For details see the Normalizer2 base class documentation.
601
     * @param s input string
602
     * @param errorCode Standard ICU error code. Its input value must
603
     *                  pass the U_SUCCESS() test, or else the function returns
604
     *                  immediately. Check for U_FAILURE() on output or use with
605
     *                  function chaining. (See User Guide for details.)
606
     * @return "yes" span end index
607
     * @stable ICU 4.4
608
     */
609
    virtual int32_t
610
    spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
611
612
    /**
613
     * Tests if the character always has a normalization boundary before it,
614
     * regardless of context.
615
     * For details see the Normalizer2 base class documentation.
616
     * @param c character to test
617
     * @return TRUE if c has a normalization boundary before it
618
     * @stable ICU 4.4
619
     */
620
    virtual UBool hasBoundaryBefore(UChar32 c) const;
621
622
    /**
623
     * Tests if the character always has a normalization boundary after it,
624
     * regardless of context.
625
     * For details see the Normalizer2 base class documentation.
626
     * @param c character to test
627
     * @return TRUE if c has a normalization boundary after it
628
     * @stable ICU 4.4
629
     */
630
    virtual UBool hasBoundaryAfter(UChar32 c) const;
631
632
    /**
633
     * Tests if the character is normalization-inert.
634
     * For details see the Normalizer2 base class documentation.
635
     * @param c character to test
636
     * @return TRUE if c is normalization-inert
637
     * @stable ICU 4.4
638
     */
639
    virtual UBool isInert(UChar32 c) const;
640
private:
641
    UnicodeString &
642
    normalize(const UnicodeString &src,
643
              UnicodeString &dest,
644
              USetSpanCondition spanCondition,
645
              UErrorCode &errorCode) const;
646
647
    UnicodeString &
648
    normalizeSecondAndAppend(UnicodeString &first,
649
                             const UnicodeString &second,
650
                             UBool doNormalize,
651
                             UErrorCode &errorCode) const;
652
653
    const Normalizer2 &norm2;
654
    const UnicodeSet &set;
655
};
656
657
U_NAMESPACE_END
658
659
#endif  // !UCONFIG_NO_NORMALIZATION
660
#endif  // __NORMALIZER2_H__