Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/ustring.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
******************************************************************************
5
*
6
*   Copyright (C) 1998-2016, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
******************************************************************************
10
*
11
* File ustring.cpp
12
*
13
* Modification History:
14
*
15
*   Date        Name        Description
16
*   12/07/98    bertrand    Creation.
17
******************************************************************************
18
*/
19
20
#include "unicode/utypes.h"
21
#include "unicode/putil.h"
22
#include "unicode/uchar.h"
23
#include "unicode/ustring.h"
24
#include "unicode/utf16.h"
25
#include "cstring.h"
26
#include "cwchar.h"
27
#include "cmemory.h"
28
#include "ustr_imp.h"
29
30
/* ANSI string.h - style functions ------------------------------------------ */
31
32
/* U+ffff is the highest BMP code point, the highest one that fits into a 16-bit UChar */
33
0
#define U_BMP_MAX 0xffff
34
35
/* Forward binary string search functions ----------------------------------- */
36
37
/*
38
 * Test if a substring match inside a string is at code point boundaries.
39
 * All pointers refer to the same buffer.
40
 * The limit pointer may be NULL, all others must be real pointers.
41
 */
42
static inline UBool
43
0
isMatchAtCPBoundary(const UChar *start, const UChar *match, const UChar *matchLimit, const UChar *limit) {
44
0
    if(U16_IS_TRAIL(*match) && start!=match && U16_IS_LEAD(*(match-1))) {
45
        /* the leading edge of the match is in the middle of a surrogate pair */
46
0
        return FALSE;
47
0
    }
48
0
    if(U16_IS_LEAD(*(matchLimit-1)) && matchLimit!=limit && U16_IS_TRAIL(*matchLimit)) {
49
        /* the trailing edge of the match is in the middle of a surrogate pair */
50
0
        return FALSE;
51
0
    }
52
0
    return TRUE;
53
0
}
54
55
U_CAPI UChar * U_EXPORT2
56
u_strFindFirst(const UChar *s, int32_t length,
57
0
               const UChar *sub, int32_t subLength) {
58
0
    const UChar *start, *p, *q, *subLimit;
59
0
    UChar c, cs, cq;
60
61
0
    if(sub==NULL || subLength<-1) {
62
0
        return (UChar *)s;
63
0
    }
64
0
    if(s==NULL || length<-1) {
65
0
        return NULL;
66
0
    }
67
68
0
    start=s;
69
70
0
    if(length<0 && subLength<0) {
71
        /* both strings are NUL-terminated */
72
0
        if((cs=*sub++)==0) {
73
0
            return (UChar *)s;
74
0
        }
75
0
        if(*sub==0 && !U16_IS_SURROGATE(cs)) {
76
            /* the substring consists of a single, non-surrogate BMP code point */
77
0
            return u_strchr(s, cs);
78
0
        }
79
80
0
        while((c=*s++)!=0) {
81
0
            if(c==cs) {
82
                /* found first substring UChar, compare rest */
83
0
                p=s;
84
0
                q=sub;
85
0
                for(;;) {
86
0
                    if((cq=*q)==0) {
87
0
                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
88
0
                            return (UChar *)(s-1); /* well-formed match */
89
0
                        } else {
90
0
                            break; /* no match because surrogate pair is split */
91
0
                        }
92
0
                    }
93
0
                    if((c=*p)==0) {
94
0
                        return NULL; /* no match, and none possible after s */
95
0
                    }
96
0
                    if(c!=cq) {
97
0
                        break; /* no match */
98
0
                    }
99
0
                    ++p;
100
0
                    ++q;
101
0
                }
102
0
            }
103
0
        }
104
105
        /* not found */
106
0
        return NULL;
107
0
    }
108
109
0
    if(subLength<0) {
110
0
        subLength=u_strlen(sub);
111
0
    }
112
0
    if(subLength==0) {
113
0
        return (UChar *)s;
114
0
    }
115
116
    /* get sub[0] to search for it fast */
117
0
    cs=*sub++;
118
0
    --subLength;
119
0
    subLimit=sub+subLength;
120
121
0
    if(subLength==0 && !U16_IS_SURROGATE(cs)) {
122
        /* the substring consists of a single, non-surrogate BMP code point */
123
0
        return length<0 ? u_strchr(s, cs) : u_memchr(s, cs, length);
124
0
    }
125
126
0
    if(length<0) {
127
        /* s is NUL-terminated */
128
0
        while((c=*s++)!=0) {
129
0
            if(c==cs) {
130
                /* found first substring UChar, compare rest */
131
0
                p=s;
132
0
                q=sub;
133
0
                for(;;) {
134
0
                    if(q==subLimit) {
135
0
                        if(isMatchAtCPBoundary(start, s-1, p, NULL)) {
136
0
                            return (UChar *)(s-1); /* well-formed match */
137
0
                        } else {
138
0
                            break; /* no match because surrogate pair is split */
139
0
                        }
140
0
                    }
141
0
                    if((c=*p)==0) {
142
0
                        return NULL; /* no match, and none possible after s */
143
0
                    }
144
0
                    if(c!=*q) {
145
0
                        break; /* no match */
146
0
                    }
147
0
                    ++p;
148
0
                    ++q;
149
0
                }
150
0
            }
151
0
        }
152
0
    } else {
153
0
        const UChar *limit, *preLimit;
154
155
        /* subLength was decremented above */
156
0
        if(length<=subLength) {
157
0
            return NULL; /* s is shorter than sub */
158
0
        }
159
160
0
        limit=s+length;
161
162
        /* the substring must start before preLimit */
163
0
        preLimit=limit-subLength;
164
165
0
        while(s!=preLimit) {
166
0
            c=*s++;
167
0
            if(c==cs) {
168
                /* found first substring UChar, compare rest */
169
0
                p=s;
170
0
                q=sub;
171
0
                for(;;) {
172
0
                    if(q==subLimit) {
173
0
                        if(isMatchAtCPBoundary(start, s-1, p, limit)) {
174
0
                            return (UChar *)(s-1); /* well-formed match */
175
0
                        } else {
176
0
                            break; /* no match because surrogate pair is split */
177
0
                        }
178
0
                    }
179
0
                    if(*p!=*q) {
180
0
                        break; /* no match */
181
0
                    }
182
0
                    ++p;
183
0
                    ++q;
184
0
                }
185
0
            }
186
0
        }
187
0
    }
188
189
    /* not found */
190
0
    return NULL;
191
0
}
192
193
U_CAPI UChar * U_EXPORT2
194
0
u_strstr(const UChar *s, const UChar *substring) {
195
0
    return u_strFindFirst(s, -1, substring, -1);
196
0
}
197
198
U_CAPI UChar * U_EXPORT2
199
0
u_strchr(const UChar *s, UChar c) {
200
0
    if(U16_IS_SURROGATE(c)) {
201
        /* make sure to not find half of a surrogate pair */
202
0
        return u_strFindFirst(s, -1, &c, 1);
203
0
    } else {
204
0
        UChar cs;
205
206
        /* trivial search for a BMP code point */
207
0
        for(;;) {
208
0
            if((cs=*s)==c) {
209
0
                return (UChar *)s;
210
0
            }
211
0
            if(cs==0) {
212
0
                return NULL;
213
0
            }
214
0
            ++s;
215
0
        }
216
0
    }
217
0
}
218
219
U_CAPI UChar * U_EXPORT2
220
0
u_strchr32(const UChar *s, UChar32 c) {
221
0
    if((uint32_t)c<=U_BMP_MAX) {
222
        /* find BMP code point */
223
0
        return u_strchr(s, (UChar)c);
224
0
    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
225
        /* find supplementary code point as surrogate pair */
226
0
        UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
227
228
0
        while((cs=*s++)!=0) {
229
0
            if(cs==lead && *s==trail) {
230
0
                return (UChar *)(s-1);
231
0
            }
232
0
        }
233
0
        return NULL;
234
0
    } else {
235
        /* not a Unicode code point, not findable */
236
0
        return NULL;
237
0
    }
238
0
}
239
240
U_CAPI UChar * U_EXPORT2
241
0
u_memchr(const UChar *s, UChar c, int32_t count) {
242
0
    if(count<=0) {
243
0
        return NULL; /* no string */
244
0
    } else if(U16_IS_SURROGATE(c)) {
245
        /* make sure to not find half of a surrogate pair */
246
0
        return u_strFindFirst(s, count, &c, 1);
247
0
    } else {
248
        /* trivial search for a BMP code point */
249
0
        const UChar *limit=s+count;
250
0
        do {
251
0
            if(*s==c) {
252
0
                return (UChar *)s;
253
0
            }
254
0
        } while(++s!=limit);
255
0
        return NULL;
256
0
    }
257
0
}
258
259
U_CAPI UChar * U_EXPORT2
260
0
u_memchr32(const UChar *s, UChar32 c, int32_t count) {
261
0
    if((uint32_t)c<=U_BMP_MAX) {
262
        /* find BMP code point */
263
0
        return u_memchr(s, (UChar)c, count);
264
0
    } else if(count<2) {
265
        /* too short for a surrogate pair */
266
0
        return NULL;
267
0
    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
268
        /* find supplementary code point as surrogate pair */
269
0
        const UChar *limit=s+count-1; /* -1 so that we do not need a separate check for the trail unit */
270
0
        UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
271
272
0
        do {
273
0
            if(*s==lead && *(s+1)==trail) {
274
0
                return (UChar *)s;
275
0
            }
276
0
        } while(++s!=limit);
277
0
        return NULL;
278
0
    } else {
279
        /* not a Unicode code point, not findable */
280
0
        return NULL;
281
0
    }
282
0
}
283
284
/* Backward binary string search functions ---------------------------------- */
285
286
U_CAPI UChar * U_EXPORT2
287
u_strFindLast(const UChar *s, int32_t length,
288
0
              const UChar *sub, int32_t subLength) {
289
0
    const UChar *start, *limit, *p, *q, *subLimit;
290
0
    UChar c, cs;
291
292
0
    if(sub==NULL || subLength<-1) {
293
0
        return (UChar *)s;
294
0
    }
295
0
    if(s==NULL || length<-1) {
296
0
        return NULL;
297
0
    }
298
299
    /*
300
     * This implementation is more lazy than the one for u_strFindFirst():
301
     * There is no special search code for NUL-terminated strings.
302
     * It does not seem to be worth it for searching substrings to
303
     * search forward and find all matches like in u_strrchr() and similar.
304
     * Therefore, we simply get both string lengths and search backward.
305
     *
306
     * markus 2002oct23
307
     */
308
309
0
    if(subLength<0) {
310
0
        subLength=u_strlen(sub);
311
0
    }
312
0
    if(subLength==0) {
313
0
        return (UChar *)s;
314
0
    }
315
316
    /* get sub[subLength-1] to search for it fast */
317
0
    subLimit=sub+subLength;
318
0
    cs=*(--subLimit);
319
0
    --subLength;
320
321
0
    if(subLength==0 && !U16_IS_SURROGATE(cs)) {
322
        /* the substring consists of a single, non-surrogate BMP code point */
323
0
        return length<0 ? u_strrchr(s, cs) : u_memrchr(s, cs, length);
324
0
    }
325
326
0
    if(length<0) {
327
0
        length=u_strlen(s);
328
0
    }
329
330
    /* subLength was decremented above */
331
0
    if(length<=subLength) {
332
0
        return NULL; /* s is shorter than sub */
333
0
    }
334
335
0
    start=s;
336
0
    limit=s+length;
337
338
    /* the substring must start no later than s+subLength */
339
0
    s+=subLength;
340
341
0
    while(s!=limit) {
342
0
        c=*(--limit);
343
0
        if(c==cs) {
344
            /* found last substring UChar, compare rest */
345
0
            p=limit;
346
0
            q=subLimit;
347
0
            for(;;) {
348
0
                if(q==sub) {
349
0
                    if(isMatchAtCPBoundary(start, p, limit+1, start+length)) {
350
0
                        return (UChar *)p; /* well-formed match */
351
0
                    } else {
352
0
                        break; /* no match because surrogate pair is split */
353
0
                    }
354
0
                }
355
0
                if(*(--p)!=*(--q)) {
356
0
                    break; /* no match */
357
0
                }
358
0
            }
359
0
        }
360
0
    }
361
362
    /* not found */
363
0
    return NULL;
364
0
}
365
366
U_CAPI UChar * U_EXPORT2
367
0
u_strrstr(const UChar *s, const UChar *substring) {
368
0
    return u_strFindLast(s, -1, substring, -1);
369
0
}
370
371
U_CAPI UChar * U_EXPORT2
372
0
u_strrchr(const UChar *s, UChar c) {
373
0
    if(U16_IS_SURROGATE(c)) {
374
        /* make sure to not find half of a surrogate pair */
375
0
        return u_strFindLast(s, -1, &c, 1);
376
0
    } else {
377
0
        const UChar *result=NULL;
378
0
        UChar cs;
379
380
        /* trivial search for a BMP code point */
381
0
        for(;;) {
382
0
            if((cs=*s)==c) {
383
0
                result=s;
384
0
            }
385
0
            if(cs==0) {
386
0
                return (UChar *)result;
387
0
            }
388
0
            ++s;
389
0
        }
390
0
    }
391
0
}
392
393
U_CAPI UChar * U_EXPORT2
394
0
u_strrchr32(const UChar *s, UChar32 c) {
395
0
    if((uint32_t)c<=U_BMP_MAX) {
396
        /* find BMP code point */
397
0
        return u_strrchr(s, (UChar)c);
398
0
    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
399
        /* find supplementary code point as surrogate pair */
400
0
        const UChar *result=NULL;
401
0
        UChar cs, lead=U16_LEAD(c), trail=U16_TRAIL(c);
402
403
0
        while((cs=*s++)!=0) {
404
0
            if(cs==lead && *s==trail) {
405
0
                result=s-1;
406
0
            }
407
0
        }
408
0
        return (UChar *)result;
409
0
    } else {
410
        /* not a Unicode code point, not findable */
411
0
        return NULL;
412
0
    }
413
0
}
414
415
U_CAPI UChar * U_EXPORT2
416
0
u_memrchr(const UChar *s, UChar c, int32_t count) {
417
0
    if(count<=0) {
418
0
        return NULL; /* no string */
419
0
    } else if(U16_IS_SURROGATE(c)) {
420
        /* make sure to not find half of a surrogate pair */
421
0
        return u_strFindLast(s, count, &c, 1);
422
0
    } else {
423
        /* trivial search for a BMP code point */
424
0
        const UChar *limit=s+count;
425
0
        do {
426
0
            if(*(--limit)==c) {
427
0
                return (UChar *)limit;
428
0
            }
429
0
        } while(s!=limit);
430
0
        return NULL;
431
0
    }
432
0
}
433
434
U_CAPI UChar * U_EXPORT2
435
0
u_memrchr32(const UChar *s, UChar32 c, int32_t count) {
436
0
    if((uint32_t)c<=U_BMP_MAX) {
437
        /* find BMP code point */
438
0
        return u_memrchr(s, (UChar)c, count);
439
0
    } else if(count<2) {
440
        /* too short for a surrogate pair */
441
0
        return NULL;
442
0
    } else if((uint32_t)c<=UCHAR_MAX_VALUE) {
443
        /* find supplementary code point as surrogate pair */
444
0
        const UChar *limit=s+count-1;
445
0
        UChar lead=U16_LEAD(c), trail=U16_TRAIL(c);
446
447
0
        do {
448
0
            if(*limit==trail && *(limit-1)==lead) {
449
0
                return (UChar *)(limit-1);
450
0
            }
451
0
        } while(s!=--limit);
452
0
        return NULL;
453
0
    } else {
454
        /* not a Unicode code point, not findable */
455
0
        return NULL;
456
0
    }
457
0
}
458
459
/* Tokenization functions --------------------------------------------------- */
460
461
/*
462
 * Match each code point in a string against each code point in the matchSet.
463
 * Return the index of the first string code point that
464
 * is (polarity==TRUE) or is not (FALSE) contained in the matchSet.
465
 * Return -(string length)-1 if there is no such code point.
466
 */
467
static int32_t
468
0
_matchFromSet(const UChar *string, const UChar *matchSet, UBool polarity) {
469
0
    int32_t matchLen, matchBMPLen, strItr, matchItr;
470
0
    UChar32 stringCh, matchCh;
471
0
    UChar c, c2;
472
473
    /* first part of matchSet contains only BMP code points */
474
0
    matchBMPLen = 0;
475
0
    while((c = matchSet[matchBMPLen]) != 0 && U16_IS_SINGLE(c)) {
476
0
        ++matchBMPLen;
477
0
    }
478
479
    /* second part of matchSet contains BMP and supplementary code points */
480
0
    matchLen = matchBMPLen;
481
0
    while(matchSet[matchLen] != 0) {
482
0
        ++matchLen;
483
0
    }
484
485
0
    for(strItr = 0; (c = string[strItr]) != 0;) {
486
0
        ++strItr;
487
0
        if(U16_IS_SINGLE(c)) {
488
0
            if(polarity) {
489
0
                for(matchItr = 0; matchItr < matchLen; ++matchItr) {
490
0
                    if(c == matchSet[matchItr]) {
491
0
                        return strItr - 1; /* one matches */
492
0
                    }
493
0
                }
494
0
            } else {
495
0
                for(matchItr = 0; matchItr < matchLen; ++matchItr) {
496
0
                    if(c == matchSet[matchItr]) {
497
0
                        goto endloop;
498
0
                    }
499
0
                }
500
0
                return strItr - 1; /* none matches */
501
0
            }
502
0
        } else {
503
            /*
504
             * No need to check for string length before U16_IS_TRAIL
505
             * because c2 could at worst be the terminating NUL.
506
             */
507
0
            if(U16_IS_SURROGATE_LEAD(c) && U16_IS_TRAIL(c2 = string[strItr])) {
508
0
                ++strItr;
509
0
                stringCh = U16_GET_SUPPLEMENTARY(c, c2);
510
0
            } else {
511
0
                stringCh = c; /* unpaired trail surrogate */
512
0
            }
513
514
0
            if(polarity) {
515
0
                for(matchItr = matchBMPLen; matchItr < matchLen;) {
516
0
                    U16_NEXT(matchSet, matchItr, matchLen, matchCh);
517
0
                    if(stringCh == matchCh) {
518
0
                        return strItr - U16_LENGTH(stringCh); /* one matches */
519
0
                    }
520
0
                }
521
0
            } else {
522
0
                for(matchItr = matchBMPLen; matchItr < matchLen;) {
523
0
                    U16_NEXT(matchSet, matchItr, matchLen, matchCh);
524
0
                    if(stringCh == matchCh) {
525
0
                        goto endloop;
526
0
                    }
527
0
                }
528
0
                return strItr - U16_LENGTH(stringCh); /* none matches */
529
0
            }
530
0
        }
531
0
endloop:
532
0
        /* wish C had continue with labels like Java... */;
533
0
    }
534
535
    /* Didn't find it. */
536
0
    return -strItr-1;
537
0
}
538
539
/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
540
U_CAPI UChar * U_EXPORT2
541
u_strpbrk(const UChar *string, const UChar *matchSet)
542
0
{
543
0
    int32_t idx = _matchFromSet(string, matchSet, TRUE);
544
0
    if(idx >= 0) {
545
0
        return (UChar *)string + idx;
546
0
    } else {
547
0
        return NULL;
548
0
    }
549
0
}
550
551
/* Search for a codepoint in a string that matches one of the matchSet codepoints. */
552
U_CAPI int32_t U_EXPORT2
553
u_strcspn(const UChar *string, const UChar *matchSet)
554
0
{
555
0
    int32_t idx = _matchFromSet(string, matchSet, TRUE);
556
0
    if(idx >= 0) {
557
0
        return idx;
558
0
    } else {
559
0
        return -idx - 1; /* == u_strlen(string) */
560
0
    }
561
0
}
562
563
/* Search for a codepoint in a string that does not match one of the matchSet codepoints. */
564
U_CAPI int32_t U_EXPORT2
565
u_strspn(const UChar *string, const UChar *matchSet)
566
0
{
567
0
    int32_t idx = _matchFromSet(string, matchSet, FALSE);
568
0
    if(idx >= 0) {
569
0
        return idx;
570
0
    } else {
571
0
        return -idx - 1; /* == u_strlen(string) */
572
0
    }
573
0
}
574
575
/* ----- Text manipulation functions --- */
576
577
U_CAPI UChar* U_EXPORT2
578
u_strtok_r(UChar    *src, 
579
     const UChar    *delim,
580
           UChar   **saveState)
581
0
{
582
0
    UChar *tokSource;
583
0
    UChar *nextToken;
584
0
    uint32_t nonDelimIdx;
585
586
    /* If saveState is NULL, the user messed up. */
587
0
    if (src != NULL) {
588
0
        tokSource = src;
589
0
        *saveState = src; /* Set to "src" in case there are no delimiters */
590
0
    }
591
0
    else if (*saveState) {
592
0
        tokSource = *saveState;
593
0
    }
594
0
    else {
595
        /* src == NULL && *saveState == NULL */
596
        /* This shouldn't happen. We already finished tokenizing. */
597
0
        return NULL;
598
0
    }
599
600
    /* Skip initial delimiters */
601
0
    nonDelimIdx = u_strspn(tokSource, delim);
602
0
    tokSource = &tokSource[nonDelimIdx];
603
604
0
    if (*tokSource) {
605
0
        nextToken = u_strpbrk(tokSource, delim);
606
0
        if (nextToken != NULL) {
607
            /* Create a token */
608
0
            *(nextToken++) = 0;
609
0
            *saveState = nextToken;
610
0
            return tokSource;
611
0
        }
612
0
        else if (*saveState) {
613
            /* Return the last token */
614
0
            *saveState = NULL;
615
0
            return tokSource;
616
0
        }
617
0
    }
618
0
    else {
619
        /* No tokens were found. Only delimiters were left. */
620
0
        *saveState = NULL;
621
0
    }
622
0
    return NULL;
623
0
}
624
625
/* Miscellaneous functions -------------------------------------------------- */
626
627
U_CAPI UChar* U_EXPORT2
628
u_strcat(UChar     *dst, 
629
    const UChar     *src)
630
0
{
631
0
    UChar *anchor = dst;            /* save a pointer to start of dst */
632
633
0
    while(*dst != 0) {              /* To end of first string          */
634
0
        ++dst;
635
0
    }
636
0
    while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
637
0
    }
638
639
0
    return anchor;
640
0
}
641
642
U_CAPI UChar*  U_EXPORT2
643
u_strncat(UChar     *dst, 
644
     const UChar     *src, 
645
     int32_t     n ) 
646
0
{
647
0
    if(n > 0) {
648
0
        UChar *anchor = dst;            /* save a pointer to start of dst */
649
650
0
        while(*dst != 0) {              /* To end of first string          */
651
0
            ++dst;
652
0
        }
653
0
        while((*dst = *src) != 0) {     /* copy string 2 over              */
654
0
            ++dst;
655
0
            if(--n == 0) {
656
0
                *dst = 0;
657
0
                break;
658
0
            }
659
0
            ++src;
660
0
        }
661
662
0
        return anchor;
663
0
    } else {
664
0
        return dst;
665
0
    }
666
0
}
667
668
/* ----- Text property functions --- */
669
670
U_CAPI int32_t   U_EXPORT2
671
u_strcmp(const UChar *s1, 
672
    const UChar *s2) 
673
0
{
674
0
    UChar  c1, c2;
675
676
0
    for(;;) {
677
0
        c1=*s1++;
678
0
        c2=*s2++;
679
0
        if (c1 != c2 || c1 == 0) {
680
0
            break;
681
0
        }
682
0
    }
683
0
    return (int32_t)c1 - (int32_t)c2;
684
0
}
685
686
U_CFUNC int32_t U_EXPORT2
687
uprv_strCompare(const UChar *s1, int32_t length1,
688
                const UChar *s2, int32_t length2,
689
0
                UBool strncmpStyle, UBool codePointOrder) {
690
0
    const UChar *start1, *start2, *limit1, *limit2;
691
0
    UChar c1, c2;
692
693
    /* setup for fix-up */
694
0
    start1=s1;
695
0
    start2=s2;
696
697
    /* compare identical prefixes - they do not need to be fixed up */
698
0
    if(length1<0 && length2<0) {
699
        /* strcmp style, both NUL-terminated */
700
0
        if(s1==s2) {
701
0
            return 0;
702
0
        }
703
704
0
        for(;;) {
705
0
            c1=*s1;
706
0
            c2=*s2;
707
0
            if(c1!=c2) {
708
0
                break;
709
0
            }
710
0
            if(c1==0) {
711
0
                return 0;
712
0
            }
713
0
            ++s1;
714
0
            ++s2;
715
0
        }
716
717
        /* setup for fix-up */
718
0
        limit1=limit2=NULL;
719
0
    } else if(strncmpStyle) {
720
        /* special handling for strncmp, assume length1==length2>=0 but also check for NUL */
721
0
        if(s1==s2) {
722
0
            return 0;
723
0
        }
724
725
0
        limit1=start1+length1;
726
727
0
        for(;;) {
728
            /* both lengths are same, check only one limit */
729
0
            if(s1==limit1) {
730
0
                return 0;
731
0
            }
732
733
0
            c1=*s1;
734
0
            c2=*s2;
735
0
            if(c1!=c2) {
736
0
                break;
737
0
            }
738
0
            if(c1==0) {
739
0
                return 0;
740
0
            }
741
0
            ++s1;
742
0
            ++s2;
743
0
        }
744
745
        /* setup for fix-up */
746
0
        limit2=start2+length1; /* use length1 here, too, to enforce assumption */
747
0
    } else {
748
        /* memcmp/UnicodeString style, both length-specified */
749
0
        int32_t lengthResult;
750
751
0
        if(length1<0) {
752
0
            length1=u_strlen(s1);
753
0
        }
754
0
        if(length2<0) {
755
0
            length2=u_strlen(s2);
756
0
        }
757
758
        /* limit1=start1+min(length1, length2) */
759
0
        if(length1<length2) {
760
0
            lengthResult=-1;
761
0
            limit1=start1+length1;
762
0
        } else if(length1==length2) {
763
0
            lengthResult=0;
764
0
            limit1=start1+length1;
765
0
        } else /* length1>length2 */ {
766
0
            lengthResult=1;
767
0
            limit1=start1+length2;
768
0
        }
769
770
0
        if(s1==s2) {
771
0
            return lengthResult;
772
0
        }
773
774
0
        for(;;) {
775
            /* check pseudo-limit */
776
0
            if(s1==limit1) {
777
0
                return lengthResult;
778
0
            }
779
780
0
            c1=*s1;
781
0
            c2=*s2;
782
0
            if(c1!=c2) {
783
0
                break;
784
0
            }
785
0
            ++s1;
786
0
            ++s2;
787
0
        }
788
789
        /* setup for fix-up */
790
0
        limit1=start1+length1;
791
0
        limit2=start2+length2;
792
0
    }
793
794
    /* if both values are in or above the surrogate range, fix them up */
795
0
    if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
796
        /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
797
0
        if(
798
0
            (c1<=0xdbff && (s1+1)!=limit1 && U16_IS_TRAIL(*(s1+1))) ||
799
0
            (U16_IS_TRAIL(c1) && start1!=s1 && U16_IS_LEAD(*(s1-1)))
800
0
        ) {
801
            /* part of a surrogate pair, leave >=d800 */
802
0
        } else {
803
            /* BMP code point - may be surrogate code point - make <d800 */
804
0
            c1-=0x2800;
805
0
        }
806
807
0
        if(
808
0
            (c2<=0xdbff && (s2+1)!=limit2 && U16_IS_TRAIL(*(s2+1))) ||
809
0
            (U16_IS_TRAIL(c2) && start2!=s2 && U16_IS_LEAD(*(s2-1)))
810
0
        ) {
811
            /* part of a surrogate pair, leave >=d800 */
812
0
        } else {
813
            /* BMP code point - may be surrogate code point - make <d800 */
814
0
            c2-=0x2800;
815
0
        }
816
0
    }
817
818
    /* now c1 and c2 are in the requested (code unit or code point) order */
819
0
    return (int32_t)c1-(int32_t)c2;
820
0
}
821
822
/*
823
 * Compare two strings as presented by UCharIterators.
824
 * Use code unit or code point order.
825
 * When the function returns, it is undefined where the iterators
826
 * have stopped.
827
 */
828
U_CAPI int32_t U_EXPORT2
829
0
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder) {
830
0
    UChar32 c1, c2;
831
832
    /* argument checking */
833
0
    if(iter1==NULL || iter2==NULL) {
834
0
        return 0; /* bad arguments */
835
0
    }
836
0
    if(iter1==iter2) {
837
0
        return 0; /* identical iterators */
838
0
    }
839
840
    /* reset iterators to start? */
841
0
    iter1->move(iter1, 0, UITER_START);
842
0
    iter2->move(iter2, 0, UITER_START);
843
844
    /* compare identical prefixes - they do not need to be fixed up */
845
0
    for(;;) {
846
0
        c1=iter1->next(iter1);
847
0
        c2=iter2->next(iter2);
848
0
        if(c1!=c2) {
849
0
            break;
850
0
        }
851
0
        if(c1==-1) {
852
0
            return 0;
853
0
        }
854
0
    }
855
856
    /* if both values are in or above the surrogate range, fix them up */
857
0
    if(c1>=0xd800 && c2>=0xd800 && codePointOrder) {
858
        /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
859
0
        if(
860
0
            (c1<=0xdbff && U16_IS_TRAIL(iter1->current(iter1))) ||
861
0
            (U16_IS_TRAIL(c1) && (iter1->previous(iter1), U16_IS_LEAD(iter1->previous(iter1))))
862
0
        ) {
863
            /* part of a surrogate pair, leave >=d800 */
864
0
        } else {
865
            /* BMP code point - may be surrogate code point - make <d800 */
866
0
            c1-=0x2800;
867
0
        }
868
869
0
        if(
870
0
            (c2<=0xdbff && U16_IS_TRAIL(iter2->current(iter2))) ||
871
0
            (U16_IS_TRAIL(c2) && (iter2->previous(iter2), U16_IS_LEAD(iter2->previous(iter2))))
872
0
        ) {
873
            /* part of a surrogate pair, leave >=d800 */
874
0
        } else {
875
            /* BMP code point - may be surrogate code point - make <d800 */
876
0
            c2-=0x2800;
877
0
        }
878
0
    }
879
880
    /* now c1 and c2 are in the requested (code unit or code point) order */
881
0
    return (int32_t)c1-(int32_t)c2;
882
0
}
883
884
#if 0
885
/*
886
 * u_strCompareIter() does not leave the iterators _on_ the different units.
887
 * This is possible but would cost a few extra indirect function calls to back
888
 * up if the last unit (c1 or c2 respectively) was >=0.
889
 *
890
 * Consistently leaving them _behind_ the different units is not an option
891
 * because the current "unit" is the end of the string if that is reached,
892
 * and in such a case the iterator does not move.
893
 * For example, when comparing "ab" with "abc", both iterators rest _on_ the end
894
 * of their strings. Calling previous() on each does not move them to where
895
 * the comparison fails.
896
 *
897
 * So the simplest semantics is to not define where the iterators end up.
898
 *
899
 * The following fragment is part of what would need to be done for backing up.
900
 */
901
void fragment {
902
        /* iff a surrogate is part of a surrogate pair, leave >=d800 */
903
        if(c1<=0xdbff) {
904
            if(!U16_IS_TRAIL(iter1->current(iter1))) {
905
                /* lead surrogate code point - make <d800 */
906
                c1-=0x2800;
907
            }
908
        } else if(c1<=0xdfff) {
909
            int32_t idx=iter1->getIndex(iter1, UITER_CURRENT);
910
            iter1->previous(iter1); /* ==c1 */
911
            if(!U16_IS_LEAD(iter1->previous(iter1))) {
912
                /* trail surrogate code point - make <d800 */
913
                c1-=0x2800;
914
            }
915
            /* go back to behind where the difference is */
916
            iter1->move(iter1, idx, UITER_ZERO);
917
        } else /* 0xe000<=c1<=0xffff */ {
918
            /* BMP code point - make <d800 */
919
            c1-=0x2800;
920
        }
921
}
922
#endif
923
924
U_CAPI int32_t U_EXPORT2
925
u_strCompare(const UChar *s1, int32_t length1,
926
             const UChar *s2, int32_t length2,
927
0
             UBool codePointOrder) {
928
    /* argument checking */
929
0
    if(s1==NULL || length1<-1 || s2==NULL || length2<-1) {
930
0
        return 0;
931
0
    }
932
0
    return uprv_strCompare(s1, length1, s2, length2, FALSE, codePointOrder);
933
0
}
934
935
/* String compare in code point order - u_strcmp() compares in code unit order. */
936
U_CAPI int32_t U_EXPORT2
937
0
u_strcmpCodePointOrder(const UChar *s1, const UChar *s2) {
938
0
    return uprv_strCompare(s1, -1, s2, -1, FALSE, TRUE);
939
0
}
940
941
U_CAPI int32_t   U_EXPORT2
942
u_strncmp(const UChar     *s1, 
943
     const UChar     *s2, 
944
     int32_t     n) 
945
0
{
946
0
    if(n > 0) {
947
0
        int32_t rc;
948
0
        for(;;) {
949
0
            rc = (int32_t)*s1 - (int32_t)*s2;
950
0
            if(rc != 0 || *s1 == 0 || --n == 0) {
951
0
                return rc;
952
0
            }
953
0
            ++s1;
954
0
            ++s2;
955
0
        }
956
0
    } else {
957
0
        return 0;
958
0
    }
959
0
}
960
961
U_CAPI int32_t U_EXPORT2
962
0
u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n) {
963
0
    return uprv_strCompare(s1, n, s2, n, TRUE, TRUE);
964
0
}
965
966
U_CAPI UChar* U_EXPORT2
967
u_strcpy(UChar     *dst, 
968
    const UChar     *src) 
969
0
{
970
0
    UChar *anchor = dst;            /* save a pointer to start of dst */
971
972
0
    while((*(dst++) = *(src++)) != 0) {     /* copy string 2 over              */
973
0
    }
974
975
0
    return anchor;
976
0
}
977
978
U_CAPI UChar*  U_EXPORT2
979
u_strncpy(UChar     *dst, 
980
     const UChar     *src, 
981
     int32_t     n) 
982
0
{
983
0
    UChar *anchor = dst;            /* save a pointer to start of dst */
984
985
    /* copy string 2 over */
986
0
    while(n > 0 && (*(dst++) = *(src++)) != 0) {
987
0
        --n;
988
0
    }
989
990
0
    return anchor;
991
0
}
992
993
U_CAPI int32_t   U_EXPORT2
994
u_strlen(const UChar *s) 
995
0
{
996
#if U_SIZEOF_WCHAR_T == U_SIZEOF_UCHAR
997
    return (int32_t)uprv_wcslen((const wchar_t *)s);
998
#else
999
0
    const UChar *t = s;
1000
0
    while(*t != 0) {
1001
0
      ++t;
1002
0
    }
1003
0
    return t - s;
1004
0
#endif
1005
0
}
1006
1007
U_CAPI int32_t U_EXPORT2
1008
0
u_countChar32(const UChar *s, int32_t length) {
1009
0
    int32_t count;
1010
1011
0
    if(s==NULL || length<-1) {
1012
0
        return 0;
1013
0
    }
1014
1015
0
    count=0;
1016
0
    if(length>=0) {
1017
0
        while(length>0) {
1018
0
            ++count;
1019
0
            if(U16_IS_LEAD(*s) && length>=2 && U16_IS_TRAIL(*(s+1))) {
1020
0
                s+=2;
1021
0
                length-=2;
1022
0
            } else {
1023
0
                ++s;
1024
0
                --length;
1025
0
            }
1026
0
        }
1027
0
    } else /* length==-1 */ {
1028
0
        UChar c;
1029
1030
0
        for(;;) {
1031
0
            if((c=*s++)==0) {
1032
0
                break;
1033
0
            }
1034
0
            ++count;
1035
1036
            /*
1037
             * sufficient to look ahead one because of UTF-16;
1038
             * safe to look ahead one because at worst that would be the terminating NUL
1039
             */
1040
0
            if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
1041
0
                ++s;
1042
0
            }
1043
0
        }
1044
0
    }
1045
0
    return count;
1046
0
}
1047
1048
U_CAPI UBool U_EXPORT2
1049
0
u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number) {
1050
1051
0
    if(number<0) {
1052
0
        return TRUE;
1053
0
    }
1054
0
    if(s==NULL || length<-1) {
1055
0
        return FALSE;
1056
0
    }
1057
1058
0
    if(length==-1) {
1059
        /* s is NUL-terminated */
1060
0
        UChar c;
1061
1062
        /* count code points until they exceed */
1063
0
        for(;;) {
1064
0
            if((c=*s++)==0) {
1065
0
                return FALSE;
1066
0
            }
1067
0
            if(number==0) {
1068
0
                return TRUE;
1069
0
            }
1070
0
            if(U16_IS_LEAD(c) && U16_IS_TRAIL(*s)) {
1071
0
                ++s;
1072
0
            }
1073
0
            --number;
1074
0
        }
1075
0
    } else {
1076
        /* length>=0 known */
1077
0
        const UChar *limit;
1078
0
        int32_t maxSupplementary;
1079
1080
        /* s contains at least (length+1)/2 code points: <=2 UChars per cp */
1081
0
        if(((length+1)/2)>number) {
1082
0
            return TRUE;
1083
0
        }
1084
1085
        /* check if s does not even contain enough UChars */
1086
0
        maxSupplementary=length-number;
1087
0
        if(maxSupplementary<=0) {
1088
0
            return FALSE;
1089
0
        }
1090
        /* there are maxSupplementary=length-number more UChars than asked-for code points */
1091
1092
        /*
1093
         * count code points until they exceed and also check that there are
1094
         * no more than maxSupplementary supplementary code points (UChar pairs)
1095
         */
1096
0
        limit=s+length;
1097
0
        for(;;) {
1098
0
            if(s==limit) {
1099
0
                return FALSE;
1100
0
            }
1101
0
            if(number==0) {
1102
0
                return TRUE;
1103
0
            }
1104
0
            if(U16_IS_LEAD(*s++) && s!=limit && U16_IS_TRAIL(*s)) {
1105
0
                ++s;
1106
0
                if(--maxSupplementary<=0) {
1107
                    /* too many pairs - too few code points */
1108
0
                    return FALSE;
1109
0
                }
1110
0
            }
1111
0
            --number;
1112
0
        }
1113
0
    }
1114
0
}
1115
1116
U_CAPI UChar * U_EXPORT2
1117
0
u_memcpy(UChar *dest, const UChar *src, int32_t count) {
1118
0
    if(count > 0) {
1119
0
        uprv_memcpy(dest, src, (size_t)count*U_SIZEOF_UCHAR);
1120
0
    }
1121
0
    return dest;
1122
0
}
1123
1124
U_CAPI UChar * U_EXPORT2
1125
0
u_memmove(UChar *dest, const UChar *src, int32_t count) {
1126
0
    if(count > 0) {
1127
0
        uprv_memmove(dest, src, (size_t)count*U_SIZEOF_UCHAR);
1128
0
    }
1129
0
    return dest;
1130
0
}
1131
1132
U_CAPI UChar * U_EXPORT2
1133
0
u_memset(UChar *dest, UChar c, int32_t count) {
1134
0
    if(count > 0) {
1135
0
        UChar *ptr = dest;
1136
0
        UChar *limit = dest + count;
1137
1138
0
        while (ptr < limit) {
1139
0
            *(ptr++) = c;
1140
0
        }
1141
0
    }
1142
0
    return dest;
1143
0
}
1144
1145
U_CAPI int32_t U_EXPORT2
1146
0
u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count) {
1147
0
    if(count > 0) {
1148
0
        const UChar *limit = buf1 + count;
1149
0
        int32_t result;
1150
1151
0
        while (buf1 < limit) {
1152
0
            result = (int32_t)(uint16_t)*buf1 - (int32_t)(uint16_t)*buf2;
1153
0
            if (result != 0) {
1154
0
                return result;
1155
0
            }
1156
0
            buf1++;
1157
0
            buf2++;
1158
0
        }
1159
0
    }
1160
0
    return 0;
1161
0
}
1162
1163
U_CAPI int32_t U_EXPORT2
1164
0
u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count) {
1165
0
    return uprv_strCompare(s1, count, s2, count, FALSE, TRUE);
1166
0
}
1167
1168
/* u_unescape & support fns ------------------------------------------------- */
1169
1170
/* This map must be in ASCENDING ORDER OF THE ESCAPE CODE */
1171
static const UChar UNESCAPE_MAP[] = {
1172
    /*"   0x22, 0x22 */
1173
    /*'   0x27, 0x27 */
1174
    /*?   0x3F, 0x3F */
1175
    /*\   0x5C, 0x5C */
1176
    /*a*/ 0x61, 0x07,
1177
    /*b*/ 0x62, 0x08,
1178
    /*e*/ 0x65, 0x1b,
1179
    /*f*/ 0x66, 0x0c,
1180
    /*n*/ 0x6E, 0x0a,
1181
    /*r*/ 0x72, 0x0d,
1182
    /*t*/ 0x74, 0x09,
1183
    /*v*/ 0x76, 0x0b
1184
};
1185
enum { UNESCAPE_MAP_LENGTH = UPRV_LENGTHOF(UNESCAPE_MAP) };
1186
1187
/* Convert one octal digit to a numeric value 0..7, or -1 on failure */
1188
0
static int32_t _digit8(UChar c) {
1189
0
    if (c >= u'0' && c <= u'7') {
1190
0
        return c - u'0';
1191
0
    }
1192
0
    return -1;
1193
0
}
1194
1195
/* Convert one hex digit to a numeric value 0..F, or -1 on failure */
1196
0
static int32_t _digit16(UChar c) {
1197
0
    if (c >= u'0' && c <= u'9') {
1198
0
        return c - u'0';
1199
0
    }
1200
0
    if (c >= u'A' && c <= u'F') {
1201
0
        return c - (u'A' - 10);
1202
0
    }
1203
0
    if (c >= u'a' && c <= u'f') {
1204
0
        return c - (u'a' - 10);
1205
0
    }
1206
0
    return -1;
1207
0
}
1208
1209
/* Parse a single escape sequence.  Although this method deals in
1210
 * UChars, it does not use C++ or UnicodeString.  This allows it to
1211
 * be used from C contexts. */
1212
U_CAPI UChar32 U_EXPORT2
1213
u_unescapeAt(UNESCAPE_CHAR_AT charAt,
1214
             int32_t *offset,
1215
             int32_t length,
1216
0
             void *context) {
1217
1218
0
    int32_t start = *offset;
1219
0
    UChar32 c;
1220
0
    UChar32 result = 0;
1221
0
    int8_t n = 0;
1222
0
    int8_t minDig = 0;
1223
0
    int8_t maxDig = 0;
1224
0
    int8_t bitsPerDigit = 4; 
1225
0
    int32_t dig;
1226
0
    UBool braces = FALSE;
1227
1228
    /* Check that offset is in range */
1229
0
    if (*offset < 0 || *offset >= length) {
1230
0
        goto err;
1231
0
    }
1232
1233
    /* Fetch first UChar after '\\' */
1234
0
    c = charAt((*offset)++, context);
1235
1236
    /* Convert hexadecimal and octal escapes */
1237
0
    switch (c) {
1238
0
    case u'u':
1239
0
        minDig = maxDig = 4;
1240
0
        break;
1241
0
    case u'U':
1242
0
        minDig = maxDig = 8;
1243
0
        break;
1244
0
    case u'x':
1245
0
        minDig = 1;
1246
0
        if (*offset < length && charAt(*offset, context) == u'{') {
1247
0
            ++(*offset);
1248
0
            braces = TRUE;
1249
0
            maxDig = 8;
1250
0
        } else {
1251
0
            maxDig = 2;
1252
0
        }
1253
0
        break;
1254
0
    default:
1255
0
        dig = _digit8(c);
1256
0
        if (dig >= 0) {
1257
0
            minDig = 1;
1258
0
            maxDig = 3;
1259
0
            n = 1; /* Already have first octal digit */
1260
0
            bitsPerDigit = 3;
1261
0
            result = dig;
1262
0
        }
1263
0
        break;
1264
0
    }
1265
0
    if (minDig != 0) {
1266
0
        while (*offset < length && n < maxDig) {
1267
0
            c = charAt(*offset, context);
1268
0
            dig = (bitsPerDigit == 3) ? _digit8(c) : _digit16(c);
1269
0
            if (dig < 0) {
1270
0
                break;
1271
0
            }
1272
0
            result = (result << bitsPerDigit) | dig;
1273
0
            ++(*offset);
1274
0
            ++n;
1275
0
        }
1276
0
        if (n < minDig) {
1277
0
            goto err;
1278
0
        }
1279
0
        if (braces) {
1280
0
            if (c != u'}') {
1281
0
                goto err;
1282
0
            }
1283
0
            ++(*offset);
1284
0
        }
1285
0
        if (result < 0 || result >= 0x110000) {
1286
0
            goto err;
1287
0
        }
1288
        /* If an escape sequence specifies a lead surrogate, see if
1289
         * there is a trail surrogate after it, either as an escape or
1290
         * as a literal.  If so, join them up into a supplementary.
1291
         */
1292
0
        if (*offset < length && U16_IS_LEAD(result)) {
1293
0
            int32_t ahead = *offset + 1;
1294
0
            c = charAt(*offset, context);
1295
0
            if (c == u'\\' && ahead < length) {
1296
                // Calling ourselves recursively may cause a stack overflow if
1297
                // we have repeated escaped lead surrogates.
1298
                // Limit the length to 11 ("x{0000DFFF}") after ahead.
1299
0
                int32_t tailLimit = ahead + 11;
1300
0
                if (tailLimit > length) {
1301
0
                    tailLimit = length;
1302
0
                }
1303
0
                c = u_unescapeAt(charAt, &ahead, tailLimit, context);
1304
0
            }
1305
0
            if (U16_IS_TRAIL(c)) {
1306
0
                *offset = ahead;
1307
0
                result = U16_GET_SUPPLEMENTARY(result, c);
1308
0
            }
1309
0
        }
1310
0
        return result;
1311
0
    }
1312
1313
    /* Convert C-style escapes in table */
1314
0
    for (int32_t i=0; i<UNESCAPE_MAP_LENGTH; i+=2) {
1315
0
        if (c == UNESCAPE_MAP[i]) {
1316
0
            return UNESCAPE_MAP[i+1];
1317
0
        } else if (c < UNESCAPE_MAP[i]) {
1318
0
            break;
1319
0
        }
1320
0
    }
1321
1322
    /* Map \cX to control-X: X & 0x1F */
1323
0
    if (c == u'c' && *offset < length) {
1324
0
        c = charAt((*offset)++, context);
1325
0
        if (U16_IS_LEAD(c) && *offset < length) {
1326
0
            UChar c2 = charAt(*offset, context);
1327
0
            if (U16_IS_TRAIL(c2)) {
1328
0
                ++(*offset);
1329
0
                c = U16_GET_SUPPLEMENTARY(c, c2);
1330
0
            }
1331
0
        }
1332
0
        return 0x1F & c;
1333
0
    }
1334
1335
    /* If no special forms are recognized, then consider
1336
     * the backslash to generically escape the next character.
1337
     * Deal with surrogate pairs. */
1338
0
    if (U16_IS_LEAD(c) && *offset < length) {
1339
0
        UChar c2 = charAt(*offset, context);
1340
0
        if (U16_IS_TRAIL(c2)) {
1341
0
            ++(*offset);
1342
0
            return U16_GET_SUPPLEMENTARY(c, c2);
1343
0
        }
1344
0
    }
1345
0
    return c;
1346
1347
0
 err:
1348
    /* Invalid escape sequence */
1349
0
    *offset = start; /* Reset to initial value */
1350
0
    return (UChar32)0xFFFFFFFF;
1351
0
}
1352
1353
/* u_unescapeAt() callback to return a UChar from a char* */
1354
static UChar U_CALLCONV
1355
0
_charPtr_charAt(int32_t offset, void *context) {
1356
0
    UChar c16;
1357
    /* It would be more efficient to access the invariant tables
1358
     * directly but there is no API for that. */
1359
0
    u_charsToUChars(((char*) context) + offset, &c16, 1);
1360
0
    return c16;
1361
0
}
1362
1363
/* Append an escape-free segment of the text; used by u_unescape() */
1364
static void _appendUChars(UChar *dest, int32_t destCapacity,
1365
0
                          const char *src, int32_t srcLen) {
1366
0
    if (destCapacity < 0) {
1367
0
        destCapacity = 0;
1368
0
    }
1369
0
    if (srcLen > destCapacity) {
1370
0
        srcLen = destCapacity;
1371
0
    }
1372
0
    u_charsToUChars(src, dest, srcLen);
1373
0
}
1374
1375
/* Do an invariant conversion of char* -> UChar*, with escape parsing */
1376
U_CAPI int32_t U_EXPORT2
1377
0
u_unescape(const char *src, UChar *dest, int32_t destCapacity) {
1378
0
    const char *segment = src;
1379
0
    int32_t i = 0;
1380
0
    char c;
1381
1382
0
    while ((c=*src) != 0) {
1383
        /* '\\' intentionally written as compiler-specific
1384
         * character constant to correspond to compiler-specific
1385
         * char* constants. */
1386
0
        if (c == '\\') {
1387
0
            int32_t lenParsed = 0;
1388
0
            UChar32 c32;
1389
0
            if (src != segment) {
1390
0
                if (dest != NULL) {
1391
0
                    _appendUChars(dest + i, destCapacity - i,
1392
0
                                  segment, (int32_t)(src - segment));
1393
0
                }
1394
0
                i += (int32_t)(src - segment);
1395
0
            }
1396
0
            ++src; /* advance past '\\' */
1397
0
            c32 = (UChar32)u_unescapeAt(_charPtr_charAt, &lenParsed, (int32_t)uprv_strlen(src), (void*)src);
1398
0
            if (lenParsed == 0) {
1399
0
                goto err;
1400
0
            }
1401
0
            src += lenParsed; /* advance past escape seq. */
1402
0
            if (dest != NULL && U16_LENGTH(c32) <= (destCapacity - i)) {
1403
0
                U16_APPEND_UNSAFE(dest, i, c32);
1404
0
            } else {
1405
0
                i += U16_LENGTH(c32);
1406
0
            }
1407
0
            segment = src;
1408
0
        } else {
1409
0
            ++src;
1410
0
        }
1411
0
    }
1412
0
    if (src != segment) {
1413
0
        if (dest != NULL) {
1414
0
            _appendUChars(dest + i, destCapacity - i,
1415
0
                          segment, (int32_t)(src - segment));
1416
0
        }
1417
0
        i += (int32_t)(src - segment);
1418
0
    }
1419
0
    if (dest != NULL && i < destCapacity) {
1420
0
        dest[i] = 0;
1421
0
    }
1422
0
    return i;
1423
1424
0
 err:
1425
0
    if (dest != NULL && destCapacity > 0) {
1426
0
        *dest = 0;
1427
0
    }
1428
0
    return 0;
1429
0
}
1430
1431
/* NUL-termination of strings ----------------------------------------------- */
1432
1433
/**
1434
 * NUL-terminate a string no matter what its type.
1435
 * Set warning and error codes accordingly.
1436
 */
1437
0
#define __TERMINATE_STRING(dest, destCapacity, length, pErrorCode) UPRV_BLOCK_MACRO_BEGIN { \
1438
0
    if(pErrorCode!=NULL && U_SUCCESS(*pErrorCode)) {                    \
1439
0
        /* not a public function, so no complete argument checking */   \
1440
0
                                                                        \
1441
0
        if(length<0) {                                                  \
1442
0
            /* assume that the caller handles this */                   \
1443
0
        } else if(length<destCapacity) {                                \
1444
0
            /* NUL-terminate the string, the NUL fits */                \
1445
0
            dest[length]=0;                                             \
1446
0
            /* unset the not-terminated warning but leave all others */ \
1447
0
            if(*pErrorCode==U_STRING_NOT_TERMINATED_WARNING) {          \
1448
0
                *pErrorCode=U_ZERO_ERROR;                               \
1449
0
            }                                                           \
1450
0
        } else if(length==destCapacity) {                               \
1451
0
            /* unable to NUL-terminate, but the string itself fit - set a warning code */ \
1452
0
            *pErrorCode=U_STRING_NOT_TERMINATED_WARNING;                \
1453
0
        } else /* length>destCapacity */ {                              \
1454
0
            /* even the string itself did not fit - set an error code */ \
1455
0
            *pErrorCode=U_BUFFER_OVERFLOW_ERROR;                        \
1456
0
        }                                                               \
1457
0
    } \
1458
0
} UPRV_BLOCK_MACRO_END
1459
1460
U_CAPI UChar U_EXPORT2
1461
0
u_asciiToUpper(UChar c) {
1462
0
    if (u'a' <= c && c <= u'z') {
1463
0
        c = c + u'A' - u'a';
1464
0
    }
1465
0
    return c;
1466
0
}
1467
1468
U_CAPI int32_t U_EXPORT2
1469
0
u_terminateUChars(UChar *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1470
0
    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1471
0
    return length;
1472
0
}
1473
1474
U_CAPI int32_t U_EXPORT2
1475
0
u_terminateChars(char *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1476
0
    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1477
0
    return length;
1478
0
}
1479
1480
U_CAPI int32_t U_EXPORT2
1481
0
u_terminateUChar32s(UChar32 *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1482
0
    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1483
0
    return length;
1484
0
}
1485
1486
U_CAPI int32_t U_EXPORT2
1487
0
u_terminateWChars(wchar_t *dest, int32_t destCapacity, int32_t length, UErrorCode *pErrorCode) {
1488
0
    __TERMINATE_STRING(dest, destCapacity, length, pErrorCode);
1489
0
    return length;
1490
0
}
1491
1492
// Compute the hash code for a string -------------------------------------- ***
1493
1494
// Moved here from uhash.c so that UnicodeString::hashCode() does not depend
1495
// on UHashtable code.
1496
1497
/*
1498
  Compute the hash by iterating sparsely over about 32 (up to 63)
1499
  characters spaced evenly through the string.  For each character,
1500
  multiply the previous hash value by a prime number and add the new
1501
  character in, like a linear congruential random number generator,
1502
  producing a pseudorandom deterministic value well distributed over
1503
  the output range. [LIU]
1504
*/
1505
1506
0
#define STRING_HASH(TYPE, STR, STRLEN, DEREF) UPRV_BLOCK_MACRO_BEGIN { \
1507
0
    uint32_t hash = 0;                        \
1508
0
    const TYPE *p = (const TYPE*) STR;        \
1509
0
    if (p != NULL) {                          \
1510
0
        int32_t len = (int32_t)(STRLEN);      \
1511
0
        int32_t inc = ((len - 32) / 32) + 1;  \
1512
0
        const TYPE *limit = p + len;          \
1513
0
        while (p<limit) {                     \
1514
0
            hash = (hash * 37) + DEREF;       \
1515
0
            p += inc;                         \
1516
0
        }                                     \
1517
0
    }                                         \
1518
0
    return static_cast<int32_t>(hash);        \
1519
0
} UPRV_BLOCK_MACRO_END
1520
1521
/* Used by UnicodeString to compute its hashcode - Not public API. */
1522
U_CAPI int32_t U_EXPORT2
1523
0
ustr_hashUCharsN(const UChar *str, int32_t length) {
1524
0
    STRING_HASH(UChar, str, length, *p);
1525
0
}
1526
1527
U_CAPI int32_t U_EXPORT2
1528
0
ustr_hashCharsN(const char *str, int32_t length) {
1529
0
    STRING_HASH(uint8_t, str, length, *p);
1530
0
}
1531
1532
U_CAPI int32_t U_EXPORT2
1533
0
ustr_hashICharsN(const char *str, int32_t length) {
1534
0
    STRING_HASH(char, str, length, (uint8_t)uprv_tolower(*p));
1535
0
}