Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/unormcmp.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2001-2014, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  unormcmp.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2004sep13
16
*   created by: Markus W. Scherer
17
*
18
*   unorm_compare() function moved here from unorm.cpp for better modularization.
19
*   Depends on both normalization and case folding.
20
*   Allows unorm.cpp to not depend on any character properties code.
21
*/
22
23
#include "unicode/utypes.h"
24
25
#if !UCONFIG_NO_NORMALIZATION
26
27
#include "unicode/unorm.h"
28
#include "unicode/ustring.h"
29
#include "cmemory.h"
30
#include "normalizer2impl.h"
31
#include "ucase.h"
32
#include "uprops.h"
33
#include "ustr_imp.h"
34
35
U_NAMESPACE_USE
36
37
/* compare canonically equivalent ------------------------------------------- */
38
39
/*
40
 * Compare two strings for canonical equivalence.
41
 * Further options include case-insensitive comparison and
42
 * code point order (as opposed to code unit order).
43
 *
44
 * In this function, canonical equivalence is optional as well.
45
 * If canonical equivalence is tested, then both strings must fulfill
46
 * the FCD check.
47
 *
48
 * Semantically, this is equivalent to
49
 *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
50
 * where code point order, NFD and foldCase are all optional.
51
 *
52
 * String comparisons almost always yield results before processing both strings
53
 * completely.
54
 * They are generally more efficient working incrementally instead of
55
 * performing the sub-processing (strlen, normalization, case-folding)
56
 * on the entire strings first.
57
 *
58
 * It is also unnecessary to not normalize identical characters.
59
 *
60
 * This function works in principle as follows:
61
 *
62
 * loop {
63
 *   get one code unit c1 from s1 (-1 if end of source)
64
 *   get one code unit c2 from s2 (-1 if end of source)
65
 *
66
 *   if(either string finished) {
67
 *     return result;
68
 *   }
69
 *   if(c1==c2) {
70
 *     continue;
71
 *   }
72
 *
73
 *   // c1!=c2
74
 *   try to decompose/case-fold c1/c2, and continue if one does;
75
 *
76
 *   // still c1!=c2 and neither decomposes/case-folds, return result
77
 *   return c1-c2;
78
 * }
79
 *
80
 * When a character decomposes, then the pointer for that source changes to
81
 * the decomposition, pushing the previous pointer onto a stack.
82
 * When the end of the decomposition is reached, then the code unit reader
83
 * pops the previous source from the stack.
84
 * (Same for case-folding.)
85
 *
86
 * This is complicated further by operating on variable-width UTF-16.
87
 * The top part of the loop works on code units, while lookups for decomposition
88
 * and case-folding need code points.
89
 * Code points are assembled after the equality/end-of-source part.
90
 * The source pointer is only advanced beyond all code units when the code point
91
 * actually decomposes/case-folds.
92
 *
93
 * If we were on a trail surrogate unit when assembling a code point,
94
 * and the code point decomposes/case-folds, then the decomposition/folding
95
 * result must be compared with the part of the other string that corresponds to
96
 * this string's lead surrogate.
97
 * Since we only assemble a code point when hitting a trail unit when the
98
 * preceding lead units were identical, we back up the other string by one unit
99
 * in such a case.
100
 *
101
 * The optional code point order comparison at the end works with
102
 * the same fix-up as the other code point order comparison functions.
103
 * See ustring.c and the comment near the end of this function.
104
 *
105
 * Assumption: A decomposition or case-folding result string never contains
106
 * a single surrogate. This is a safe assumption in the Unicode Standard.
107
 * Therefore, we do not need to check for surrogate pairs across
108
 * decomposition/case-folding boundaries.
109
 *
110
 * Further assumptions (see verifications tstnorm.cpp):
111
 * The API function checks for FCD first, while the core function
112
 * first case-folds and then decomposes. This requires that case-folding does not
113
 * un-FCD any strings.
114
 *
115
 * The API function may also NFD the input and turn off decomposition.
116
 * This requires that case-folding does not un-NFD strings either.
117
 *
118
 * TODO If any of the above two assumptions is violated,
119
 * then this entire code must be re-thought.
120
 * If this happens, then a simple solution is to case-fold both strings up front
121
 * and to turn off UNORM_INPUT_IS_FCD.
122
 * We already do this when not both strings are in FCD because makeFCD
123
 * would be a partial NFD before the case folding, which does not work.
124
 * Note that all of this is only a problem when case-folding _and_
125
 * canonical equivalence come together.
126
 * (Comments in unorm_compare() are more up to date than this TODO.)
127
 */
128
129
/* stack element for previous-level source/decomposition pointers */
130
struct CmpEquivLevel {
131
    const UChar *start, *s, *limit;
132
};
133
typedef struct CmpEquivLevel CmpEquivLevel;
134
135
/**
136
 * Internal option for unorm_cmpEquivFold() for decomposing.
137
 * If not set, just do strcasecmp().
138
 */
139
0
#define _COMPARE_EQUIV 0x80000
140
141
/* internal function */
142
static int32_t
143
unorm_cmpEquivFold(const UChar *s1, int32_t length1,
144
                   const UChar *s2, int32_t length2,
145
                   uint32_t options,
146
0
                   UErrorCode *pErrorCode) {
147
0
    const Normalizer2Impl *nfcImpl;
148
0
149
0
    /* current-level start/limit - s1/s2 as current */
150
0
    const UChar *start1, *start2, *limit1, *limit2;
151
0
152
0
    /* decomposition and case folding variables */
153
0
    const UChar *p;
154
0
    int32_t length;
155
0
156
0
    /* stacks of previous-level start/current/limit */
157
0
    CmpEquivLevel stack1[2], stack2[2];
158
0
159
0
    /* buffers for algorithmic decompositions */
160
0
    UChar decomp1[4], decomp2[4];
161
0
162
0
    /* case folding buffers, only use current-level start/limit */
163
0
    UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
164
0
165
0
    /* track which is the current level per string */
166
0
    int32_t level1, level2;
167
0
168
0
    /* current code units, and code points for lookups */
169
0
    UChar32 c1, c2, cp1, cp2;
170
0
171
0
    /* no argument error checking because this itself is not an API */
172
0
173
0
    /*
174
0
     * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
175
0
     * otherwise this function must behave exactly as uprv_strCompare()
176
0
     * not checking for that here makes testing this function easier
177
0
     */
178
0
179
0
    /* normalization/properties data loaded? */
180
0
    if((options&_COMPARE_EQUIV)!=0) {
181
0
        nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode);
182
0
    } else {
183
0
        nfcImpl=NULL;
184
0
    }
185
0
    if(U_FAILURE(*pErrorCode)) {
186
0
        return 0;
187
0
    }
188
0
189
0
    /* initialize */
190
0
    start1=s1;
191
0
    if(length1==-1) {
192
0
        limit1=NULL;
193
0
    } else {
194
0
        limit1=s1+length1;
195
0
    }
196
0
197
0
    start2=s2;
198
0
    if(length2==-1) {
199
0
        limit2=NULL;
200
0
    } else {
201
0
        limit2=s2+length2;
202
0
    }
203
0
204
0
    level1=level2=0;
205
0
    c1=c2=-1;
206
0
207
0
    /* comparison loop */
208
0
    for(;;) {
209
0
        /*
210
0
         * here a code unit value of -1 means "get another code unit"
211
0
         * below it will mean "this source is finished"
212
0
         */
213
0
214
0
        if(c1<0) {
215
0
            /* get next code unit from string 1, post-increment */
216
0
            for(;;) {
217
0
                if(s1==limit1 || ((c1=*s1)==0 && (limit1==NULL || (options&_STRNCMP_STYLE)))) {
218
0
                    if(level1==0) {
219
0
                        c1=-1;
220
0
                        break;
221
0
                    }
222
0
                } else {
223
0
                    ++s1;
224
0
                    break;
225
0
                }
226
0
227
0
                /* reached end of level buffer, pop one level */
228
0
                do {
229
0
                    --level1;
230
0
                    start1=stack1[level1].start;    /*Not uninitialized*/
231
0
                } while(start1==NULL);
232
0
                s1=stack1[level1].s;                /*Not uninitialized*/
233
0
                limit1=stack1[level1].limit;        /*Not uninitialized*/
234
0
            }
235
0
        }
236
0
237
0
        if(c2<0) {
238
0
            /* get next code unit from string 2, post-increment */
239
0
            for(;;) {
240
0
                if(s2==limit2 || ((c2=*s2)==0 && (limit2==NULL || (options&_STRNCMP_STYLE)))) {
241
0
                    if(level2==0) {
242
0
                        c2=-1;
243
0
                        break;
244
0
                    }
245
0
                } else {
246
0
                    ++s2;
247
0
                    break;
248
0
                }
249
0
250
0
                /* reached end of level buffer, pop one level */
251
0
                do {
252
0
                    --level2;
253
0
                    start2=stack2[level2].start;    /*Not uninitialized*/
254
0
                } while(start2==NULL);
255
0
                s2=stack2[level2].s;                /*Not uninitialized*/
256
0
                limit2=stack2[level2].limit;        /*Not uninitialized*/
257
0
            }
258
0
        }
259
0
260
0
        /*
261
0
         * compare c1 and c2
262
0
         * either variable c1, c2 is -1 only if the corresponding string is finished
263
0
         */
264
0
        if(c1==c2) {
265
0
            if(c1<0) {
266
0
                return 0;   /* c1==c2==-1 indicating end of strings */
267
0
            }
268
0
            c1=c2=-1;       /* make us fetch new code units */
269
0
            continue;
270
0
        } else if(c1<0) {
271
0
            return -1;      /* string 1 ends before string 2 */
272
0
        } else if(c2<0) {
273
0
            return 1;       /* string 2 ends before string 1 */
274
0
        }
275
0
        /* c1!=c2 && c1>=0 && c2>=0 */
276
0
277
0
        /* get complete code points for c1, c2 for lookups if either is a surrogate */
278
0
        cp1=c1;
279
0
        if(U_IS_SURROGATE(c1)) {
280
0
            UChar c;
281
0
282
0
            if(U_IS_SURROGATE_LEAD(c1)) {
283
0
                if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
284
0
                    /* advance ++s1; only below if cp1 decomposes/case-folds */
285
0
                    cp1=U16_GET_SUPPLEMENTARY(c1, c);
286
0
                }
287
0
            } else /* isTrail(c1) */ {
288
0
                if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
289
0
                    cp1=U16_GET_SUPPLEMENTARY(c, c1);
290
0
                }
291
0
            }
292
0
        }
293
0
294
0
        cp2=c2;
295
0
        if(U_IS_SURROGATE(c2)) {
296
0
            UChar c;
297
0
298
0
            if(U_IS_SURROGATE_LEAD(c2)) {
299
0
                if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
300
0
                    /* advance ++s2; only below if cp2 decomposes/case-folds */
301
0
                    cp2=U16_GET_SUPPLEMENTARY(c2, c);
302
0
                }
303
0
            } else /* isTrail(c2) */ {
304
0
                if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
305
0
                    cp2=U16_GET_SUPPLEMENTARY(c, c2);
306
0
                }
307
0
            }
308
0
        }
309
0
310
0
        /*
311
0
         * go down one level for each string
312
0
         * continue with the main loop as soon as there is a real change
313
0
         */
314
0
315
0
        if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
316
0
            (length=ucase_toFullFolding((UChar32)cp1, &p, options))>=0
317
0
        ) {
318
0
            /* cp1 case-folds to the code point "length" or to p[length] */
319
0
            if(U_IS_SURROGATE(c1)) {
320
0
                if(U_IS_SURROGATE_LEAD(c1)) {
321
0
                    /* advance beyond source surrogate pair if it case-folds */
322
0
                    ++s1;
323
0
                } else /* isTrail(c1) */ {
324
0
                    /*
325
0
                     * we got a supplementary code point when hitting its trail surrogate,
326
0
                     * therefore the lead surrogate must have been the same as in the other string;
327
0
                     * compare this decomposition with the lead surrogate in the other string
328
0
                     * remember that this simulates bulk text replacement:
329
0
                     * the decomposition would replace the entire code point
330
0
                     */
331
0
                    --s2;
332
0
                    c2=*(s2-1);
333
0
                }
334
0
            }
335
0
336
0
            /* push current level pointers */
337
0
            stack1[0].start=start1;
338
0
            stack1[0].s=s1;
339
0
            stack1[0].limit=limit1;
340
0
            ++level1;
341
0
342
0
            /* copy the folding result to fold1[] */
343
0
            if(length<=UCASE_MAX_STRING_LENGTH) {
344
0
                u_memcpy(fold1, p, length);
345
0
            } else {
346
0
                int32_t i=0;
347
0
                U16_APPEND_UNSAFE(fold1, i, length);
348
0
                length=i;
349
0
            }
350
0
351
0
            /* set next level pointers to case folding */
352
0
            start1=s1=fold1;
353
0
            limit1=fold1+length;
354
0
355
0
            /* get ready to read from decomposition, continue with loop */
356
0
            c1=-1;
357
0
            continue;
358
0
        }
359
0
360
0
        if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
361
0
            (length=ucase_toFullFolding((UChar32)cp2, &p, options))>=0
362
0
        ) {
363
0
            /* cp2 case-folds to the code point "length" or to p[length] */
364
0
            if(U_IS_SURROGATE(c2)) {
365
0
                if(U_IS_SURROGATE_LEAD(c2)) {
366
0
                    /* advance beyond source surrogate pair if it case-folds */
367
0
                    ++s2;
368
0
                } else /* isTrail(c2) */ {
369
0
                    /*
370
0
                     * we got a supplementary code point when hitting its trail surrogate,
371
0
                     * therefore the lead surrogate must have been the same as in the other string;
372
0
                     * compare this decomposition with the lead surrogate in the other string
373
0
                     * remember that this simulates bulk text replacement:
374
0
                     * the decomposition would replace the entire code point
375
0
                     */
376
0
                    --s1;
377
0
                    c1=*(s1-1);
378
0
                }
379
0
            }
380
0
381
0
            /* push current level pointers */
382
0
            stack2[0].start=start2;
383
0
            stack2[0].s=s2;
384
0
            stack2[0].limit=limit2;
385
0
            ++level2;
386
0
387
0
            /* copy the folding result to fold2[] */
388
0
            if(length<=UCASE_MAX_STRING_LENGTH) {
389
0
                u_memcpy(fold2, p, length);
390
0
            } else {
391
0
                int32_t i=0;
392
0
                U16_APPEND_UNSAFE(fold2, i, length);
393
0
                length=i;
394
0
            }
395
0
396
0
            /* set next level pointers to case folding */
397
0
            start2=s2=fold2;
398
0
            limit2=fold2+length;
399
0
400
0
            /* get ready to read from decomposition, continue with loop */
401
0
            c2=-1;
402
0
            continue;
403
0
        }
404
0
405
0
        if( level1<2 && (options&_COMPARE_EQUIV) &&
406
0
            0!=(p=nfcImpl->getDecomposition((UChar32)cp1, decomp1, length))
407
0
        ) {
408
0
            /* cp1 decomposes into p[length] */
409
0
            if(U_IS_SURROGATE(c1)) {
410
0
                if(U_IS_SURROGATE_LEAD(c1)) {
411
0
                    /* advance beyond source surrogate pair if it decomposes */
412
0
                    ++s1;
413
0
                } else /* isTrail(c1) */ {
414
0
                    /*
415
0
                     * we got a supplementary code point when hitting its trail surrogate,
416
0
                     * therefore the lead surrogate must have been the same as in the other string;
417
0
                     * compare this decomposition with the lead surrogate in the other string
418
0
                     * remember that this simulates bulk text replacement:
419
0
                     * the decomposition would replace the entire code point
420
0
                     */
421
0
                    --s2;
422
0
                    c2=*(s2-1);
423
0
                }
424
0
            }
425
0
426
0
            /* push current level pointers */
427
0
            stack1[level1].start=start1;
428
0
            stack1[level1].s=s1;
429
0
            stack1[level1].limit=limit1;
430
0
            ++level1;
431
0
432
0
            /* set empty intermediate level if skipped */
433
0
            if(level1<2) {
434
0
                stack1[level1++].start=NULL;
435
0
            }
436
0
437
0
            /* set next level pointers to decomposition */
438
0
            start1=s1=p;
439
0
            limit1=p+length;
440
0
441
0
            /* get ready to read from decomposition, continue with loop */
442
0
            c1=-1;
443
0
            continue;
444
0
        }
445
0
446
0
        if( level2<2 && (options&_COMPARE_EQUIV) &&
447
0
            0!=(p=nfcImpl->getDecomposition((UChar32)cp2, decomp2, length))
448
0
        ) {
449
0
            /* cp2 decomposes into p[length] */
450
0
            if(U_IS_SURROGATE(c2)) {
451
0
                if(U_IS_SURROGATE_LEAD(c2)) {
452
0
                    /* advance beyond source surrogate pair if it decomposes */
453
0
                    ++s2;
454
0
                } else /* isTrail(c2) */ {
455
0
                    /*
456
0
                     * we got a supplementary code point when hitting its trail surrogate,
457
0
                     * therefore the lead surrogate must have been the same as in the other string;
458
0
                     * compare this decomposition with the lead surrogate in the other string
459
0
                     * remember that this simulates bulk text replacement:
460
0
                     * the decomposition would replace the entire code point
461
0
                     */
462
0
                    --s1;
463
0
                    c1=*(s1-1);
464
0
                }
465
0
            }
466
0
467
0
            /* push current level pointers */
468
0
            stack2[level2].start=start2;
469
0
            stack2[level2].s=s2;
470
0
            stack2[level2].limit=limit2;
471
0
            ++level2;
472
0
473
0
            /* set empty intermediate level if skipped */
474
0
            if(level2<2) {
475
0
                stack2[level2++].start=NULL;
476
0
            }
477
0
478
0
            /* set next level pointers to decomposition */
479
0
            start2=s2=p;
480
0
            limit2=p+length;
481
0
482
0
            /* get ready to read from decomposition, continue with loop */
483
0
            c2=-1;
484
0
            continue;
485
0
        }
486
0
487
0
        /*
488
0
         * no decomposition/case folding, max level for both sides:
489
0
         * return difference result
490
0
         *
491
0
         * code point order comparison must not just return cp1-cp2
492
0
         * because when single surrogates are present then the surrogate pairs
493
0
         * that formed cp1 and cp2 may be from different string indexes
494
0
         *
495
0
         * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
496
0
         * c1=d800 cp1=10001 c2=dc00 cp2=10000
497
0
         * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
498
0
         *
499
0
         * therefore, use same fix-up as in ustring.c/uprv_strCompare()
500
0
         * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
501
0
         * so we have slightly different pointer/start/limit comparisons here
502
0
         */
503
0
504
0
        if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
505
0
            /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
506
0
            if(
507
0
                (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
508
0
                (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
509
0
            ) {
510
0
                /* part of a surrogate pair, leave >=d800 */
511
0
            } else {
512
0
                /* BMP code point - may be surrogate code point - make <d800 */
513
0
                c1-=0x2800;
514
0
            }
515
0
516
0
            if(
517
0
                (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
518
0
                (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
519
0
            ) {
520
0
                /* part of a surrogate pair, leave >=d800 */
521
0
            } else {
522
0
                /* BMP code point - may be surrogate code point - make <d800 */
523
0
                c2-=0x2800;
524
0
            }
525
0
        }
526
0
527
0
        return c1-c2;
528
0
    }
529
0
}
530
531
static
532
UBool _normalize(const Normalizer2 *n2, const UChar *s, int32_t length,
533
0
                UnicodeString &normalized, UErrorCode *pErrorCode) {
534
0
    UnicodeString str(length<0, s, length);
535
0
536
0
    // check if s fulfill the conditions
537
0
    int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode);
538
0
    if (U_FAILURE(*pErrorCode)) {
539
0
        return FALSE;
540
0
    }
541
0
    /*
542
0
     * ICU 2.4 had a further optimization:
543
0
     * If both strings were not in FCD, then they were both NFD'ed,
544
0
     * and the _COMPARE_EQUIV option was turned off.
545
0
     * It is not entirely clear that this is valid with the current
546
0
     * definition of the canonical caseless match.
547
0
     * Therefore, ICU 2.6 removes that optimization.
548
0
     */
549
0
    if(spanQCYes<str.length()) {
550
0
        UnicodeString unnormalized=str.tempSubString(spanQCYes);
551
0
        normalized.setTo(FALSE, str.getBuffer(), spanQCYes);
552
0
        n2->normalizeSecondAndAppend(normalized, unnormalized, *pErrorCode);
553
0
        if (U_SUCCESS(*pErrorCode)) {
554
0
            return TRUE;
555
0
        }
556
0
    }
557
0
    return FALSE;
558
0
}
559
560
U_CAPI int32_t U_EXPORT2
561
unorm_compare(const UChar *s1, int32_t length1,
562
              const UChar *s2, int32_t length2,
563
              uint32_t options,
564
0
              UErrorCode *pErrorCode) {
565
0
    /* argument checking */
566
0
    if(U_FAILURE(*pErrorCode)) {
567
0
        return 0;
568
0
    }
569
0
    if(s1==0 || length1<-1 || s2==0 || length2<-1) {
570
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
571
0
        return 0;
572
0
    }
573
0
574
0
    UnicodeString fcd1, fcd2;
575
0
    int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT);
576
0
    options|=_COMPARE_EQUIV;
577
0
578
0
    /*
579
0
     * UAX #21 Case Mappings, as fixed for Unicode version 4
580
0
     * (see Jitterbug 2021), defines a canonical caseless match as
581
0
     *
582
0
     * A string X is a canonical caseless match
583
0
     * for a string Y if and only if
584
0
     * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
585
0
     *
586
0
     * For better performance, we check for FCD (or let the caller tell us that
587
0
     * both strings are in FCD) for the inner normalization.
588
0
     * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
589
0
     * case-folding preserves the FCD-ness of a string.
590
0
     * The outer normalization is then only performed by unorm_cmpEquivFold()
591
0
     * when there is a difference.
592
0
     *
593
0
     * Exception: When using the Turkic case-folding option, we do perform
594
0
     * full NFD first. This is because in the Turkic case precomposed characters
595
0
     * with 0049 capital I or 0069 small i fold differently whether they
596
0
     * are first decomposed or not, so an FCD check - a check only for
597
0
     * canonical order - is not sufficient.
598
0
     */
599
0
    if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
600
0
        const Normalizer2 *n2;
601
0
        if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
602
0
            n2=Normalizer2::getNFDInstance(*pErrorCode);
603
0
        } else {
604
0
            n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
605
0
        }
606
0
        if (U_FAILURE(*pErrorCode)) {
607
0
            return 0;
608
0
        }
609
0
610
0
        if(normOptions&UNORM_UNICODE_3_2) {
611
0
            const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode);
612
0
            FilteredNormalizer2 fn2(*n2, *uni32);
613
0
            if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) {
614
0
                s1=fcd1.getBuffer();
615
0
                length1=fcd1.length();
616
0
            }
617
0
            if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) {
618
0
                s2=fcd2.getBuffer();
619
0
                length2=fcd2.length();
620
0
            }
621
0
        } else {
622
0
            if(_normalize(n2, s1, length1, fcd1, pErrorCode)) {
623
0
                s1=fcd1.getBuffer();
624
0
                length1=fcd1.length();
625
0
            }
626
0
            if(_normalize(n2, s2, length2, fcd2, pErrorCode)) {
627
0
                s2=fcd2.getBuffer();
628
0
                length2=fcd2.length();
629
0
            }
630
0
        }
631
0
    }
632
0
633
0
    if(U_SUCCESS(*pErrorCode)) {
634
0
        return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
635
0
    } else {
636
0
        return 0;
637
0
    }
638
0
}
639
640
#endif /* #if !UCONFIG_NO_NORMALIZATION */