Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/uiter.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2002-2012, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  uiter.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2002jan18
16
*   created by: Markus W. Scherer
17
*/
18
19
#include "unicode/utypes.h"
20
#include "unicode/ustring.h"
21
#include "unicode/chariter.h"
22
#include "unicode/rep.h"
23
#include "unicode/uiter.h"
24
#include "unicode/utf.h"
25
#include "unicode/utf8.h"
26
#include "unicode/utf16.h"
27
#include "cstring.h"
28
29
U_NAMESPACE_USE
30
31
0
#define IS_EVEN(n) (((n)&1)==0)
32
0
#define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)
33
34
U_CDECL_BEGIN
35
36
/* No-Op UCharIterator implementation for illegal input --------------------- */
37
38
static int32_t U_CALLCONV
39
0
noopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) {
40
0
    return 0;
41
0
}
42
43
static int32_t U_CALLCONV
44
0
noopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) {
45
0
    return 0;
46
0
}
47
48
static UBool U_CALLCONV
49
0
noopHasNext(UCharIterator * /*iter*/) {
50
0
    return FALSE;
51
0
}
52
53
static UChar32 U_CALLCONV
54
0
noopCurrent(UCharIterator * /*iter*/) {
55
0
    return U_SENTINEL;
56
0
}
57
58
static uint32_t U_CALLCONV
59
0
noopGetState(const UCharIterator * /*iter*/) {
60
0
    return UITER_NO_STATE;
61
0
}
62
63
static void U_CALLCONV
64
0
noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) {
65
0
    *pErrorCode=U_UNSUPPORTED_ERROR;
66
0
}
67
68
static const UCharIterator noopIterator={
69
    0, 0, 0, 0, 0, 0,
70
    noopGetIndex,
71
    noopMove,
72
    noopHasNext,
73
    noopHasNext,
74
    noopCurrent,
75
    noopCurrent,
76
    noopCurrent,
77
    NULL,
78
    noopGetState,
79
    noopSetState
80
};
81
82
/* UCharIterator implementation for simple strings -------------------------- */
83
84
/*
85
 * This is an implementation of a code unit (UChar) iterator
86
 * for UChar * strings.
87
 *
88
 * The UCharIterator.context field holds a pointer to the string.
89
 */
90
91
static int32_t U_CALLCONV
92
0
stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
93
0
    switch(origin) {
94
0
    case UITER_ZERO:
95
0
        return 0;
96
0
    case UITER_START:
97
0
        return iter->start;
98
0
    case UITER_CURRENT:
99
0
        return iter->index;
100
0
    case UITER_LIMIT:
101
0
        return iter->limit;
102
0
    case UITER_LENGTH:
103
0
        return iter->length;
104
0
    default:
105
        /* not a valid origin */
106
        /* Should never get here! */
107
0
        return -1;
108
0
    }
109
0
}
110
111
static int32_t U_CALLCONV
112
0
stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
113
0
    int32_t pos;
114
115
0
    switch(origin) {
116
0
    case UITER_ZERO:
117
0
        pos=delta;
118
0
        break;
119
0
    case UITER_START:
120
0
        pos=iter->start+delta;
121
0
        break;
122
0
    case UITER_CURRENT:
123
0
        pos=iter->index+delta;
124
0
        break;
125
0
    case UITER_LIMIT:
126
0
        pos=iter->limit+delta;
127
0
        break;
128
0
    case UITER_LENGTH:
129
0
        pos=iter->length+delta;
130
0
        break;
131
0
    default:
132
0
        return -1;  /* Error */
133
0
    }
134
135
0
    if(pos<iter->start) {
136
0
        pos=iter->start;
137
0
    } else if(pos>iter->limit) {
138
0
        pos=iter->limit;
139
0
    }
140
141
0
    return iter->index=pos;
142
0
}
143
144
static UBool U_CALLCONV
145
0
stringIteratorHasNext(UCharIterator *iter) {
146
0
    return iter->index<iter->limit;
147
0
}
148
149
static UBool U_CALLCONV
150
0
stringIteratorHasPrevious(UCharIterator *iter) {
151
0
    return iter->index>iter->start;
152
0
}
153
154
static UChar32 U_CALLCONV
155
0
stringIteratorCurrent(UCharIterator *iter) {
156
0
    if(iter->index<iter->limit) {
157
0
        return ((const UChar *)(iter->context))[iter->index];
158
0
    } else {
159
0
        return U_SENTINEL;
160
0
    }
161
0
}
162
163
static UChar32 U_CALLCONV
164
0
stringIteratorNext(UCharIterator *iter) {
165
0
    if(iter->index<iter->limit) {
166
0
        return ((const UChar *)(iter->context))[iter->index++];
167
0
    } else {
168
0
        return U_SENTINEL;
169
0
    }
170
0
}
171
172
static UChar32 U_CALLCONV
173
0
stringIteratorPrevious(UCharIterator *iter) {
174
0
    if(iter->index>iter->start) {
175
0
        return ((const UChar *)(iter->context))[--iter->index];
176
0
    } else {
177
0
        return U_SENTINEL;
178
0
    }
179
0
}
180
181
static uint32_t U_CALLCONV
182
0
stringIteratorGetState(const UCharIterator *iter) {
183
0
    return (uint32_t)iter->index;
184
0
}
185
186
static void U_CALLCONV
187
0
stringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
188
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
189
        /* do nothing */
190
0
    } else if(iter==NULL) {
191
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
192
0
    } else if((int32_t)state<iter->start || iter->limit<(int32_t)state) {
193
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
194
0
    } else {
195
0
        iter->index=(int32_t)state;
196
0
    }
197
0
}
198
199
static const UCharIterator stringIterator={
200
    0, 0, 0, 0, 0, 0,
201
    stringIteratorGetIndex,
202
    stringIteratorMove,
203
    stringIteratorHasNext,
204
    stringIteratorHasPrevious,
205
    stringIteratorCurrent,
206
    stringIteratorNext,
207
    stringIteratorPrevious,
208
    NULL,
209
    stringIteratorGetState,
210
    stringIteratorSetState
211
};
212
213
U_CAPI void U_EXPORT2
214
0
uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) {
215
0
    if(iter!=0) {
216
0
        if(s!=0 && length>=-1) {
217
0
            *iter=stringIterator;
218
0
            iter->context=s;
219
0
            if(length>=0) {
220
0
                iter->length=length;
221
0
            } else {
222
0
                iter->length=u_strlen(s);
223
0
            }
224
0
            iter->limit=iter->length;
225
0
        } else {
226
0
            *iter=noopIterator;
227
0
        }
228
0
    }
229
0
}
230
231
/* UCharIterator implementation for UTF-16BE strings ------------------------ */
232
233
/*
234
 * This is an implementation of a code unit (UChar) iterator
235
 * for UTF-16BE strings, i.e., strings in byte-vectors where
236
 * each UChar is stored as a big-endian pair of bytes.
237
 *
238
 * The UCharIterator.context field holds a pointer to the string.
239
 * Everything works just like with a normal UChar iterator (uiter_setString),
240
 * except that UChars are assembled from byte pairs.
241
 */
242
243
/* internal helper function */
244
static inline UChar32
245
0
utf16BEIteratorGet(UCharIterator *iter, int32_t index) {
246
0
    const uint8_t *p=(const uint8_t *)iter->context;
247
0
    return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];
248
0
}
249
250
static UChar32 U_CALLCONV
251
0
utf16BEIteratorCurrent(UCharIterator *iter) {
252
0
    int32_t index;
253
254
0
    if((index=iter->index)<iter->limit) {
255
0
        return utf16BEIteratorGet(iter, index);
256
0
    } else {
257
0
        return U_SENTINEL;
258
0
    }
259
0
}
260
261
static UChar32 U_CALLCONV
262
0
utf16BEIteratorNext(UCharIterator *iter) {
263
0
    int32_t index;
264
265
0
    if((index=iter->index)<iter->limit) {
266
0
        iter->index=index+1;
267
0
        return utf16BEIteratorGet(iter, index);
268
0
    } else {
269
0
        return U_SENTINEL;
270
0
    }
271
0
}
272
273
static UChar32 U_CALLCONV
274
0
utf16BEIteratorPrevious(UCharIterator *iter) {
275
0
    int32_t index;
276
277
0
    if((index=iter->index)>iter->start) {
278
0
        iter->index=--index;
279
0
        return utf16BEIteratorGet(iter, index);
280
0
    } else {
281
0
        return U_SENTINEL;
282
0
    }
283
0
}
284
285
static const UCharIterator utf16BEIterator={
286
    0, 0, 0, 0, 0, 0,
287
    stringIteratorGetIndex,
288
    stringIteratorMove,
289
    stringIteratorHasNext,
290
    stringIteratorHasPrevious,
291
    utf16BEIteratorCurrent,
292
    utf16BEIteratorNext,
293
    utf16BEIteratorPrevious,
294
    NULL,
295
    stringIteratorGetState,
296
    stringIteratorSetState
297
};
298
299
/*
300
 * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,
301
 * i.e., before a pair of 0 bytes where the first 0 byte is at an even
302
 * offset from s.
303
 */
304
static int32_t
305
0
utf16BE_strlen(const char *s) {
306
0
    if(IS_POINTER_EVEN(s)) {
307
        /*
308
         * even-aligned, call u_strlen(s)
309
         * we are probably on a little-endian machine, but searching for UChar NUL
310
         * does not care about endianness
311
         */
312
0
        return u_strlen((const UChar *)s);
313
0
    } else {
314
        /* odd-aligned, search for pair of 0 bytes */
315
0
        const char *p=s;
316
317
0
        while(!(*p==0 && p[1]==0)) {
318
0
            p+=2;
319
0
        }
320
0
        return (int32_t)((p-s)/2);
321
0
    }
322
0
}
323
324
U_CAPI void U_EXPORT2
325
0
uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length) {
326
0
    if(iter!=NULL) {
327
        /* allow only even-length strings (the input length counts bytes) */
328
0
        if(s!=NULL && (length==-1 || (length>=0 && IS_EVEN(length)))) {
329
            /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */
330
0
            length>>=1;
331
332
0
            if(U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) {
333
                /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */
334
0
                uiter_setString(iter, (const UChar *)s, length);
335
0
                return;
336
0
            }
337
338
0
            *iter=utf16BEIterator;
339
0
            iter->context=s;
340
0
            if(length>=0) {
341
0
                iter->length=length;
342
0
            } else {
343
0
                iter->length=utf16BE_strlen(s);
344
0
            }
345
0
            iter->limit=iter->length;
346
0
        } else {
347
0
            *iter=noopIterator;
348
0
        }
349
0
    }
350
0
}
351
352
/* UCharIterator wrapper around CharacterIterator --------------------------- */
353
354
/*
355
 * This is wrapper code around a C++ CharacterIterator to
356
 * look like a C UCharIterator.
357
 *
358
 * The UCharIterator.context field holds a pointer to the CharacterIterator.
359
 */
360
361
static int32_t U_CALLCONV
362
0
characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
363
0
    switch(origin) {
364
0
    case UITER_ZERO:
365
0
        return 0;
366
0
    case UITER_START:
367
0
        return ((CharacterIterator *)(iter->context))->startIndex();
368
0
    case UITER_CURRENT:
369
0
        return ((CharacterIterator *)(iter->context))->getIndex();
370
0
    case UITER_LIMIT:
371
0
        return ((CharacterIterator *)(iter->context))->endIndex();
372
0
    case UITER_LENGTH:
373
0
        return ((CharacterIterator *)(iter->context))->getLength();
374
0
    default:
375
        /* not a valid origin */
376
        /* Should never get here! */
377
0
        return -1;
378
0
    }
379
0
}
380
381
static int32_t U_CALLCONV
382
0
characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
383
0
    switch(origin) {
384
0
    case UITER_ZERO:
385
0
        ((CharacterIterator *)(iter->context))->setIndex(delta);
386
0
        return ((CharacterIterator *)(iter->context))->getIndex();
387
0
    case UITER_START:
388
0
    case UITER_CURRENT:
389
0
    case UITER_LIMIT:
390
0
        return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin);
391
0
    case UITER_LENGTH:
392
0
        ((CharacterIterator *)(iter->context))->setIndex(((CharacterIterator *)(iter->context))->getLength()+delta);
393
0
        return ((CharacterIterator *)(iter->context))->getIndex();
394
0
    default:
395
        /* not a valid origin */
396
        /* Should never get here! */
397
0
        return -1;
398
0
    }
399
0
}
400
401
static UBool U_CALLCONV
402
0
characterIteratorHasNext(UCharIterator *iter) {
403
0
    return ((CharacterIterator *)(iter->context))->hasNext();
404
0
}
405
406
static UBool U_CALLCONV
407
0
characterIteratorHasPrevious(UCharIterator *iter) {
408
0
    return ((CharacterIterator *)(iter->context))->hasPrevious();
409
0
}
410
411
static UChar32 U_CALLCONV
412
0
characterIteratorCurrent(UCharIterator *iter) {
413
0
    UChar32 c;
414
415
0
    c=((CharacterIterator *)(iter->context))->current();
416
0
    if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) {
417
0
        return c;
418
0
    } else {
419
0
        return U_SENTINEL;
420
0
    }
421
0
}
422
423
static UChar32 U_CALLCONV
424
0
characterIteratorNext(UCharIterator *iter) {
425
0
    if(((CharacterIterator *)(iter->context))->hasNext()) {
426
0
        return ((CharacterIterator *)(iter->context))->nextPostInc();
427
0
    } else {
428
0
        return U_SENTINEL;
429
0
    }
430
0
}
431
432
static UChar32 U_CALLCONV
433
0
characterIteratorPrevious(UCharIterator *iter) {
434
0
    if(((CharacterIterator *)(iter->context))->hasPrevious()) {
435
0
        return ((CharacterIterator *)(iter->context))->previous();
436
0
    } else {
437
0
        return U_SENTINEL;
438
0
    }
439
0
}
440
441
static uint32_t U_CALLCONV
442
0
characterIteratorGetState(const UCharIterator *iter) {
443
0
    return ((CharacterIterator *)(iter->context))->getIndex();
444
0
}
445
446
static void U_CALLCONV
447
0
characterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
448
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
449
        /* do nothing */
450
0
    } else if(iter==NULL || iter->context==NULL) {
451
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
452
0
    } else if((int32_t)state<((CharacterIterator *)(iter->context))->startIndex() || ((CharacterIterator *)(iter->context))->endIndex()<(int32_t)state) {
453
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
454
0
    } else {
455
0
        ((CharacterIterator *)(iter->context))->setIndex((int32_t)state);
456
0
    }
457
0
}
458
459
static const UCharIterator characterIteratorWrapper={
460
    0, 0, 0, 0, 0, 0,
461
    characterIteratorGetIndex,
462
    characterIteratorMove,
463
    characterIteratorHasNext,
464
    characterIteratorHasPrevious,
465
    characterIteratorCurrent,
466
    characterIteratorNext,
467
    characterIteratorPrevious,
468
    NULL,
469
    characterIteratorGetState,
470
    characterIteratorSetState
471
};
472
473
U_CAPI void U_EXPORT2
474
0
uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) {
475
0
    if(iter!=0) {
476
0
        if(charIter!=0) {
477
0
            *iter=characterIteratorWrapper;
478
0
            iter->context=charIter;
479
0
        } else {
480
0
            *iter=noopIterator;
481
0
        }
482
0
    }
483
0
}
484
485
/* UCharIterator wrapper around Replaceable --------------------------------- */
486
487
/*
488
 * This is an implementation of a code unit (UChar) iterator
489
 * based on a Replaceable object.
490
 *
491
 * The UCharIterator.context field holds a pointer to the Replaceable.
492
 * UCharIterator.length and UCharIterator.index hold Replaceable.length()
493
 * and the iteration index.
494
 */
495
496
static UChar32 U_CALLCONV
497
0
replaceableIteratorCurrent(UCharIterator *iter) {
498
0
    if(iter->index<iter->limit) {
499
0
        return ((Replaceable *)(iter->context))->charAt(iter->index);
500
0
    } else {
501
0
        return U_SENTINEL;
502
0
    }
503
0
}
504
505
static UChar32 U_CALLCONV
506
0
replaceableIteratorNext(UCharIterator *iter) {
507
0
    if(iter->index<iter->limit) {
508
0
        return ((Replaceable *)(iter->context))->charAt(iter->index++);
509
0
    } else {
510
0
        return U_SENTINEL;
511
0
    }
512
0
}
513
514
static UChar32 U_CALLCONV
515
0
replaceableIteratorPrevious(UCharIterator *iter) {
516
0
    if(iter->index>iter->start) {
517
0
        return ((Replaceable *)(iter->context))->charAt(--iter->index);
518
0
    } else {
519
0
        return U_SENTINEL;
520
0
    }
521
0
}
522
523
static const UCharIterator replaceableIterator={
524
    0, 0, 0, 0, 0, 0,
525
    stringIteratorGetIndex,
526
    stringIteratorMove,
527
    stringIteratorHasNext,
528
    stringIteratorHasPrevious,
529
    replaceableIteratorCurrent,
530
    replaceableIteratorNext,
531
    replaceableIteratorPrevious,
532
    NULL,
533
    stringIteratorGetState,
534
    stringIteratorSetState
535
};
536
537
U_CAPI void U_EXPORT2
538
0
uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) {
539
0
    if(iter!=0) {
540
0
        if(rep!=0) {
541
0
            *iter=replaceableIterator;
542
0
            iter->context=rep;
543
0
            iter->limit=iter->length=rep->length();
544
0
        } else {
545
0
            *iter=noopIterator;
546
0
        }
547
0
    }
548
0
}
549
550
/* UCharIterator implementation for UTF-8 strings --------------------------- */
551
552
/*
553
 * Possible, probably necessary only for an implementation for arbitrary
554
 * converters:
555
 * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.
556
 * This would require to turn reservedFn into a close function and
557
 * to introduce a uiter_close(iter).
558
 */
559
560
#define UITER_CNV_CAPACITY 16
561
562
/*
563
 * Minimal implementation:
564
 * Maintain a single-UChar buffer for an additional surrogate.
565
 * The caller must not modify start and limit because they are used internally.
566
 *
567
 * Use UCharIterator fields as follows:
568
 *   context        pointer to UTF-8 string
569
 *   length         UTF-16 length of the string; -1 until lazy evaluation
570
 *   start          current UTF-8 index
571
 *   index          current UTF-16 index; may be -1="unknown" after setState()
572
 *   limit          UTF-8 length of the string
573
 *   reservedField  supplementary code point
574
 *
575
 * Since UCharIterator delivers 16-bit code units, the iteration can be
576
 * currently in the middle of the byte sequence for a supplementary code point.
577
 * In this case, reservedField will contain that code point and start will
578
 * point to after the corresponding byte sequence. The UTF-16 index will be
579
 * one less than what it would otherwise be corresponding to the UTF-8 index.
580
 * Otherwise, reservedField will be 0.
581
 */
582
583
/*
584
 * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:
585
 * Add implementations that do not call strlen() for iteration but check for NUL.
586
 */
587
588
static int32_t U_CALLCONV
589
0
utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) {
590
0
    switch(origin) {
591
0
    case UITER_ZERO:
592
0
    case UITER_START:
593
0
        return 0;
594
0
    case UITER_CURRENT:
595
0
        if(iter->index<0) {
596
            /* the current UTF-16 index is unknown after setState(), count from the beginning */
597
0
            const uint8_t *s;
598
0
            UChar32 c;
599
0
            int32_t i, limit, index;
600
601
0
            s=(const uint8_t *)iter->context;
602
0
            i=index=0;
603
0
            limit=iter->start; /* count up to the UTF-8 index */
604
0
            while(i<limit) {
605
0
                U8_NEXT_OR_FFFD(s, i, limit, c);
606
0
                index+=U16_LENGTH(c);
607
0
            }
608
609
0
            iter->start=i; /* just in case setState() did not get us to a code point boundary */
610
0
            if(i==iter->limit) {
611
0
                iter->length=index; /* in case it was <0 or wrong */
612
0
            }
613
0
            if(iter->reservedField!=0) {
614
0
                --index; /* we are in the middle of a supplementary code point */
615
0
            }
616
0
            iter->index=index;
617
0
        }
618
0
        return iter->index;
619
0
    case UITER_LIMIT:
620
0
    case UITER_LENGTH:
621
0
        if(iter->length<0) {
622
0
            const uint8_t *s;
623
0
            UChar32 c;
624
0
            int32_t i, limit, length;
625
626
0
            s=(const uint8_t *)iter->context;
627
0
            if(iter->index<0) {
628
                /*
629
                 * the current UTF-16 index is unknown after setState(),
630
                 * we must first count from the beginning to here
631
                 */
632
0
                i=length=0;
633
0
                limit=iter->start;
634
635
                /* count from the beginning to the current index */
636
0
                while(i<limit) {
637
0
                    U8_NEXT_OR_FFFD(s, i, limit, c);
638
0
                    length+=U16_LENGTH(c);
639
0
                }
640
641
                /* assume i==limit==iter->start, set the UTF-16 index */
642
0
                iter->start=i; /* just in case setState() did not get us to a code point boundary */
643
0
                iter->index= iter->reservedField!=0 ? length-1 : length;
644
0
            } else {
645
0
                i=iter->start;
646
0
                length=iter->index;
647
0
                if(iter->reservedField!=0) {
648
0
                    ++length;
649
0
                }
650
0
            }
651
652
            /* count from the current index to the end */
653
0
            limit=iter->limit;
654
0
            while(i<limit) {
655
0
                U8_NEXT_OR_FFFD(s, i, limit, c);
656
0
                length+=U16_LENGTH(c);
657
0
            }
658
0
            iter->length=length;
659
0
        }
660
0
        return iter->length;
661
0
    default:
662
        /* not a valid origin */
663
        /* Should never get here! */
664
0
        return -1;
665
0
    }
666
0
}
667
668
static int32_t U_CALLCONV
669
0
utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) {
670
0
    const uint8_t *s;
671
0
    UChar32 c;
672
0
    int32_t pos; /* requested UTF-16 index */
673
0
    int32_t i; /* UTF-8 index */
674
0
    UBool havePos;
675
676
    /* calculate the requested UTF-16 index */
677
0
    switch(origin) {
678
0
    case UITER_ZERO:
679
0
    case UITER_START:
680
0
        pos=delta;
681
0
        havePos=TRUE;
682
        /* iter->index<0 (unknown) is possible */
683
0
        break;
684
0
    case UITER_CURRENT:
685
0
        if(iter->index>=0) {
686
0
            pos=iter->index+delta;
687
0
            havePos=TRUE;
688
0
        } else {
689
            /* the current UTF-16 index is unknown after setState(), use only delta */
690
0
            pos=0;
691
0
            havePos=FALSE;
692
0
        }
693
0
        break;
694
0
    case UITER_LIMIT:
695
0
    case UITER_LENGTH:
696
0
        if(iter->length>=0) {
697
0
            pos=iter->length+delta;
698
0
            havePos=TRUE;
699
0
        } else {
700
            /* pin to the end, avoid counting the length */
701
0
            iter->index=-1;
702
0
            iter->start=iter->limit;
703
0
            iter->reservedField=0;
704
0
            if(delta>=0) {
705
0
                return UITER_UNKNOWN_INDEX;
706
0
            } else {
707
                /* the current UTF-16 index is unknown, use only delta */
708
0
                pos=0;
709
0
                havePos=FALSE;
710
0
            }
711
0
        }
712
0
        break;
713
0
    default:
714
0
        return -1;  /* Error */
715
0
    }
716
717
0
    if(havePos) {
718
        /* shortcuts: pinning to the edges of the string */
719
0
        if(pos<=0) {
720
0
            iter->index=iter->start=iter->reservedField=0;
721
0
            return 0;
722
0
        } else if(iter->length>=0 && pos>=iter->length) {
723
0
            iter->index=iter->length;
724
0
            iter->start=iter->limit;
725
0
            iter->reservedField=0;
726
0
            return iter->index;
727
0
        }
728
729
        /* minimize the number of U8_NEXT/PREV operations */
730
0
        if(iter->index<0 || pos<iter->index/2) {
731
            /* go forward from the start instead of backward from the current index */
732
0
            iter->index=iter->start=iter->reservedField=0;
733
0
        } else if(iter->length>=0 && (iter->length-pos)<(pos-iter->index)) {
734
            /*
735
             * if we have the UTF-16 index and length and the new position is
736
             * closer to the end than the current index,
737
             * then go backward from the end instead of forward from the current index
738
             */
739
0
            iter->index=iter->length;
740
0
            iter->start=iter->limit;
741
0
            iter->reservedField=0;
742
0
        }
743
744
0
        delta=pos-iter->index;
745
0
        if(delta==0) {
746
0
            return iter->index; /* nothing to do */
747
0
        }
748
0
    } else {
749
        /* move relative to unknown UTF-16 index */
750
0
        if(delta==0) {
751
0
            return UITER_UNKNOWN_INDEX; /* nothing to do */
752
0
        } else if(-delta>=iter->start) {
753
            /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */
754
0
            iter->index=iter->start=iter->reservedField=0;
755
0
            return 0;
756
0
        } else if(delta>=(iter->limit-iter->start)) {
757
            /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */
758
0
            iter->index=iter->length; /* may or may not be <0 (unknown) */
759
0
            iter->start=iter->limit;
760
0
            iter->reservedField=0;
761
0
            return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX;
762
0
        }
763
0
    }
764
765
    /* delta!=0 */
766
767
    /* move towards the requested position, pin to the edges of the string */
768
0
    s=(const uint8_t *)iter->context;
769
0
    pos=iter->index; /* could be <0 (unknown) */
770
0
    i=iter->start;
771
0
    if(delta>0) {
772
        /* go forward */
773
0
        int32_t limit=iter->limit;
774
0
        if(iter->reservedField!=0) {
775
0
            iter->reservedField=0;
776
0
            ++pos;
777
0
            --delta;
778
0
        }
779
0
        while(delta>0 && i<limit) {
780
0
            U8_NEXT_OR_FFFD(s, i, limit, c);
781
0
            if(c<=0xffff) {
782
0
                ++pos;
783
0
                --delta;
784
0
            } else if(delta>=2) {
785
0
                pos+=2;
786
0
                delta-=2;
787
0
            } else /* delta==1 */ {
788
                /* stop in the middle of a supplementary code point */
789
0
                iter->reservedField=c;
790
0
                ++pos;
791
0
                break; /* delta=0; */
792
0
            }
793
0
        }
794
0
        if(i==limit) {
795
0
            if(iter->length<0 && iter->index>=0) {
796
0
                iter->length= iter->reservedField==0 ? pos : pos+1;
797
0
            } else if(iter->index<0 && iter->length>=0) {
798
0
                iter->index= iter->reservedField==0 ? iter->length : iter->length-1;
799
0
            }
800
0
        }
801
0
    } else /* delta<0 */ {
802
        /* go backward */
803
0
        if(iter->reservedField!=0) {
804
0
            iter->reservedField=0;
805
0
            i-=4; /* we stayed behind the supplementary code point; go before it now */
806
0
            --pos;
807
0
            ++delta;
808
0
        }
809
0
        while(delta<0 && i>0) {
810
0
            U8_PREV_OR_FFFD(s, 0, i, c);
811
0
            if(c<=0xffff) {
812
0
                --pos;
813
0
                ++delta;
814
0
            } else if(delta<=-2) {
815
0
                pos-=2;
816
0
                delta+=2;
817
0
            } else /* delta==-1 */ {
818
                /* stop in the middle of a supplementary code point */
819
0
                i+=4; /* back to behind this supplementary code point for consistent state */
820
0
                iter->reservedField=c;
821
0
                --pos;
822
0
                break; /* delta=0; */
823
0
            }
824
0
        }
825
0
    }
826
827
0
    iter->start=i;
828
0
    if(iter->index>=0) {
829
0
        return iter->index=pos;
830
0
    } else {
831
        /* we started with index<0 (unknown) so pos is bogus */
832
0
        if(i<=1) {
833
0
            return iter->index=i; /* reached the beginning */
834
0
        } else {
835
            /* we still don't know the UTF-16 index */
836
0
            return UITER_UNKNOWN_INDEX;
837
0
        }
838
0
    }
839
0
}
840
841
static UBool U_CALLCONV
842
0
utf8IteratorHasNext(UCharIterator *iter) {
843
0
    return iter->start<iter->limit || iter->reservedField!=0;
844
0
}
845
846
static UBool U_CALLCONV
847
0
utf8IteratorHasPrevious(UCharIterator *iter) {
848
0
    return iter->start>0;
849
0
}
850
851
static UChar32 U_CALLCONV
852
0
utf8IteratorCurrent(UCharIterator *iter) {
853
0
    if(iter->reservedField!=0) {
854
0
        return U16_TRAIL(iter->reservedField);
855
0
    } else if(iter->start<iter->limit) {
856
0
        const uint8_t *s=(const uint8_t *)iter->context;
857
0
        UChar32 c;
858
0
        int32_t i=iter->start;
859
860
0
        U8_NEXT_OR_FFFD(s, i, iter->limit, c);
861
0
        if(c<=0xffff) {
862
0
            return c;
863
0
        } else {
864
0
            return U16_LEAD(c);
865
0
        }
866
0
    } else {
867
0
        return U_SENTINEL;
868
0
    }
869
0
}
870
871
static UChar32 U_CALLCONV
872
0
utf8IteratorNext(UCharIterator *iter) {
873
0
    int32_t index;
874
875
0
    if(iter->reservedField!=0) {
876
0
        UChar trail=U16_TRAIL(iter->reservedField);
877
0
        iter->reservedField=0;
878
0
        if((index=iter->index)>=0) {
879
0
            iter->index=index+1;
880
0
        }
881
0
        return trail;
882
0
    } else if(iter->start<iter->limit) {
883
0
        const uint8_t *s=(const uint8_t *)iter->context;
884
0
        UChar32 c;
885
886
0
        U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);
887
0
        if((index=iter->index)>=0) {
888
0
            iter->index=++index;
889
0
            if(iter->length<0 && iter->start==iter->limit) {
890
0
                iter->length= c<=0xffff ? index : index+1;
891
0
            }
892
0
        } else if(iter->start==iter->limit && iter->length>=0) {
893
0
            iter->index= c<=0xffff ? iter->length : iter->length-1;
894
0
        }
895
0
        if(c<=0xffff) {
896
0
            return c;
897
0
        } else {
898
0
            iter->reservedField=c;
899
0
            return U16_LEAD(c);
900
0
        }
901
0
    } else {
902
0
        return U_SENTINEL;
903
0
    }
904
0
}
905
906
static UChar32 U_CALLCONV
907
0
utf8IteratorPrevious(UCharIterator *iter) {
908
0
    int32_t index;
909
910
0
    if(iter->reservedField!=0) {
911
0
        UChar lead=U16_LEAD(iter->reservedField);
912
0
        iter->reservedField=0;
913
0
        iter->start-=4; /* we stayed behind the supplementary code point; go before it now */
914
0
        if((index=iter->index)>0) {
915
0
            iter->index=index-1;
916
0
        }
917
0
        return lead;
918
0
    } else if(iter->start>0) {
919
0
        const uint8_t *s=(const uint8_t *)iter->context;
920
0
        UChar32 c;
921
922
0
        U8_PREV_OR_FFFD(s, 0, iter->start, c);
923
0
        if((index=iter->index)>0) {
924
0
            iter->index=index-1;
925
0
        } else if(iter->start<=1) {
926
0
            iter->index= c<=0xffff ? iter->start : iter->start+1;
927
0
        }
928
0
        if(c<=0xffff) {
929
0
            return c;
930
0
        } else {
931
0
            iter->start+=4; /* back to behind this supplementary code point for consistent state */
932
0
            iter->reservedField=c;
933
0
            return U16_TRAIL(c);
934
0
        }
935
0
    } else {
936
0
        return U_SENTINEL;
937
0
    }
938
0
}
939
940
static uint32_t U_CALLCONV
941
0
utf8IteratorGetState(const UCharIterator *iter) {
942
0
    uint32_t state=(uint32_t)(iter->start<<1);
943
0
    if(iter->reservedField!=0) {
944
0
        state|=1;
945
0
    }
946
0
    return state;
947
0
}
948
949
static void U_CALLCONV
950
utf8IteratorSetState(UCharIterator *iter,
951
                     uint32_t state,
952
                     UErrorCode *pErrorCode)
953
0
{
954
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
955
        /* do nothing */
956
0
    } else if(iter==NULL) {
957
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
958
0
    } else if(state==utf8IteratorGetState(iter)) {
959
        /* setting to the current state: no-op */
960
0
    } else {
961
0
        int32_t index=(int32_t)(state>>1); /* UTF-8 index */
962
0
        state&=1; /* 1 if in surrogate pair, must be index>=4 */
963
964
0
        if((state==0 ? index<0 : index<4) || iter->limit<index) {
965
0
            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
966
0
        } else {
967
0
            iter->start=index; /* restore UTF-8 byte index */
968
0
            if(index<=1) {
969
0
                iter->index=index;
970
0
            } else {
971
0
                iter->index=-1; /* unknown UTF-16 index */
972
0
            }
973
0
            if(state==0) {
974
0
                iter->reservedField=0;
975
0
            } else {
976
                /* verified index>=4 above */
977
0
                UChar32 c;
978
0
                U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);
979
0
                if(c<=0xffff) {
980
0
                    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
981
0
                } else {
982
0
                    iter->reservedField=c;
983
0
                }
984
0
            }
985
0
        }
986
0
    }
987
0
}
988
989
static const UCharIterator utf8Iterator={
990
    0, 0, 0, 0, 0, 0,
991
    utf8IteratorGetIndex,
992
    utf8IteratorMove,
993
    utf8IteratorHasNext,
994
    utf8IteratorHasPrevious,
995
    utf8IteratorCurrent,
996
    utf8IteratorNext,
997
    utf8IteratorPrevious,
998
    NULL,
999
    utf8IteratorGetState,
1000
    utf8IteratorSetState
1001
};
1002
1003
U_CAPI void U_EXPORT2
1004
0
uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) {
1005
0
    if(iter!=0) {
1006
0
        if(s!=0 && length>=-1) {
1007
0
            *iter=utf8Iterator;
1008
0
            iter->context=s;
1009
0
            if(length>=0) {
1010
0
                iter->limit=length;
1011
0
            } else {
1012
0
                iter->limit=(int32_t)uprv_strlen(s);
1013
0
            }
1014
0
            iter->length= iter->limit<=1 ? iter->limit : -1;
1015
0
        } else {
1016
0
            *iter=noopIterator;
1017
0
        }
1018
0
    }
1019
0
}
1020
1021
/* Helper functions --------------------------------------------------------- */
1022
1023
U_CAPI UChar32 U_EXPORT2
1024
0
uiter_current32(UCharIterator *iter) {
1025
0
    UChar32 c, c2;
1026
1027
0
    c=iter->current(iter);
1028
0
    if(U16_IS_SURROGATE(c)) {
1029
0
        if(U16_IS_SURROGATE_LEAD(c)) {
1030
            /*
1031
             * go to the next code unit
1032
             * we know that we are not at the limit because c!=U_SENTINEL
1033
             */
1034
0
            iter->move(iter, 1, UITER_CURRENT);
1035
0
            if(U16_IS_TRAIL(c2=iter->current(iter))) {
1036
0
                c=U16_GET_SUPPLEMENTARY(c, c2);
1037
0
            }
1038
1039
            /* undo index movement */
1040
0
            iter->move(iter, -1, UITER_CURRENT);
1041
0
        } else {
1042
0
            if(U16_IS_LEAD(c2=iter->previous(iter))) {
1043
0
                c=U16_GET_SUPPLEMENTARY(c2, c);
1044
0
            }
1045
0
            if(c2>=0) {
1046
                /* undo index movement */
1047
0
                iter->move(iter, 1, UITER_CURRENT);
1048
0
            }
1049
0
        }
1050
0
    }
1051
0
    return c;
1052
0
}
1053
1054
U_CAPI UChar32 U_EXPORT2
1055
0
uiter_next32(UCharIterator *iter) {
1056
0
    UChar32 c, c2;
1057
1058
0
    c=iter->next(iter);
1059
0
    if(U16_IS_LEAD(c)) {
1060
0
        if(U16_IS_TRAIL(c2=iter->next(iter))) {
1061
0
            c=U16_GET_SUPPLEMENTARY(c, c2);
1062
0
        } else if(c2>=0) {
1063
            /* unmatched first surrogate, undo index movement */
1064
0
            iter->move(iter, -1, UITER_CURRENT);
1065
0
        }
1066
0
    }
1067
0
    return c;
1068
0
}
1069
1070
U_CAPI UChar32 U_EXPORT2
1071
0
uiter_previous32(UCharIterator *iter) {
1072
0
    UChar32 c, c2;
1073
1074
0
    c=iter->previous(iter);
1075
0
    if(U16_IS_TRAIL(c)) {
1076
0
        if(U16_IS_LEAD(c2=iter->previous(iter))) {
1077
0
            c=U16_GET_SUPPLEMENTARY(c2, c);
1078
0
        } else if(c2>=0) {
1079
            /* unmatched second surrogate, undo index movement */
1080
0
            iter->move(iter, 1, UITER_CURRENT);
1081
0
        }
1082
0
    }
1083
0
    return c;
1084
0
}
1085
1086
U_CAPI uint32_t U_EXPORT2
1087
0
uiter_getState(const UCharIterator *iter) {
1088
0
    if(iter==NULL || iter->getState==NULL) {
1089
0
        return UITER_NO_STATE;
1090
0
    } else {
1091
0
        return iter->getState(iter);
1092
0
    }
1093
0
}
1094
1095
U_CAPI void U_EXPORT2
1096
0
uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) {
1097
0
    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1098
        /* do nothing */
1099
0
    } else if(iter==NULL) {
1100
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1101
0
    } else if(iter->setState==NULL) {
1102
0
        *pErrorCode=U_UNSUPPORTED_ERROR;
1103
0
    } else {
1104
0
        iter->setState(iter, state, pErrorCode);
1105
0
    }
1106
0
}
1107
1108
U_CDECL_END