/src/icu/source/common/uiter.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | *  | 
6  |  | *   Copyright (C) 2002-2012, International Business Machines  | 
7  |  | *   Corporation and others.  All Rights Reserved.  | 
8  |  | *  | 
9  |  | *******************************************************************************  | 
10  |  | *   file name:  uiter.cpp  | 
11  |  | *   encoding:   UTF-8  | 
12  |  | *   tab size:   8 (not used)  | 
13  |  | *   indentation:4  | 
14  |  | *  | 
15  |  | *   created on: 2002jan18  | 
16  |  | *   created by: Markus W. Scherer  | 
17  |  | */  | 
18  |  |  | 
19  |  | #include "unicode/utypes.h"  | 
20  |  | #include "unicode/ustring.h"  | 
21  |  | #include "unicode/chariter.h"  | 
22  |  | #include "unicode/rep.h"  | 
23  |  | #include "unicode/uiter.h"  | 
24  |  | #include "unicode/utf.h"  | 
25  |  | #include "unicode/utf8.h"  | 
26  |  | #include "unicode/utf16.h"  | 
27  |  | #include "cstring.h"  | 
28  |  |  | 
29  |  | U_NAMESPACE_USE  | 
30  |  |  | 
31  | 0  | #define IS_EVEN(n) (((n)&1)==0)  | 
32  | 0  | #define IS_POINTER_EVEN(p) IS_EVEN((size_t)p)  | 
33  |  |  | 
34  |  | U_CDECL_BEGIN  | 
35  |  |  | 
36  |  | /* No-Op UCharIterator implementation for illegal input --------------------- */  | 
37  |  |  | 
38  |  | static int32_t U_CALLCONV  | 
39  | 0  | noopGetIndex(UCharIterator * /*iter*/, UCharIteratorOrigin /*origin*/) { | 
40  | 0  |     return 0;  | 
41  | 0  | }  | 
42  |  |  | 
43  |  | static int32_t U_CALLCONV  | 
44  | 0  | noopMove(UCharIterator * /*iter*/, int32_t /*delta*/, UCharIteratorOrigin /*origin*/) { | 
45  | 0  |     return 0;  | 
46  | 0  | }  | 
47  |  |  | 
48  |  | static UBool U_CALLCONV  | 
49  | 0  | noopHasNext(UCharIterator * /*iter*/) { | 
50  | 0  |     return FALSE;  | 
51  | 0  | }  | 
52  |  |  | 
53  |  | static UChar32 U_CALLCONV  | 
54  | 0  | noopCurrent(UCharIterator * /*iter*/) { | 
55  | 0  |     return U_SENTINEL;  | 
56  | 0  | }  | 
57  |  |  | 
58  |  | static uint32_t U_CALLCONV  | 
59  | 0  | noopGetState(const UCharIterator * /*iter*/) { | 
60  | 0  |     return UITER_NO_STATE;  | 
61  | 0  | }  | 
62  |  |  | 
63  |  | static void U_CALLCONV  | 
64  | 0  | noopSetState(UCharIterator * /*iter*/, uint32_t /*state*/, UErrorCode *pErrorCode) { | 
65  | 0  |     *pErrorCode=U_UNSUPPORTED_ERROR;  | 
66  | 0  | }  | 
67  |  |  | 
68  |  | static const UCharIterator noopIterator={ | 
69  |  |     0, 0, 0, 0, 0, 0,  | 
70  |  |     noopGetIndex,  | 
71  |  |     noopMove,  | 
72  |  |     noopHasNext,  | 
73  |  |     noopHasNext,  | 
74  |  |     noopCurrent,  | 
75  |  |     noopCurrent,  | 
76  |  |     noopCurrent,  | 
77  |  |     NULL,  | 
78  |  |     noopGetState,  | 
79  |  |     noopSetState  | 
80  |  | };  | 
81  |  |  | 
82  |  | /* UCharIterator implementation for simple strings -------------------------- */  | 
83  |  |  | 
84  |  | /*  | 
85  |  |  * This is an implementation of a code unit (UChar) iterator  | 
86  |  |  * for UChar * strings.  | 
87  |  |  *  | 
88  |  |  * The UCharIterator.context field holds a pointer to the string.  | 
89  |  |  */  | 
90  |  |  | 
91  |  | static int32_t U_CALLCONV  | 
92  | 0  | stringIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { | 
93  | 0  |     switch(origin) { | 
94  | 0  |     case UITER_ZERO:  | 
95  | 0  |         return 0;  | 
96  | 0  |     case UITER_START:  | 
97  | 0  |         return iter->start;  | 
98  | 0  |     case UITER_CURRENT:  | 
99  | 0  |         return iter->index;  | 
100  | 0  |     case UITER_LIMIT:  | 
101  | 0  |         return iter->limit;  | 
102  | 0  |     case UITER_LENGTH:  | 
103  | 0  |         return iter->length;  | 
104  | 0  |     default:  | 
105  |  |         /* not a valid origin */  | 
106  |  |         /* Should never get here! */  | 
107  | 0  |         return -1;  | 
108  | 0  |     }  | 
109  | 0  | }  | 
110  |  |  | 
111  |  | static int32_t U_CALLCONV  | 
112  | 0  | stringIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { | 
113  | 0  |     int32_t pos;  | 
114  |  | 
  | 
115  | 0  |     switch(origin) { | 
116  | 0  |     case UITER_ZERO:  | 
117  | 0  |         pos=delta;  | 
118  | 0  |         break;  | 
119  | 0  |     case UITER_START:  | 
120  | 0  |         pos=iter->start+delta;  | 
121  | 0  |         break;  | 
122  | 0  |     case UITER_CURRENT:  | 
123  | 0  |         pos=iter->index+delta;  | 
124  | 0  |         break;  | 
125  | 0  |     case UITER_LIMIT:  | 
126  | 0  |         pos=iter->limit+delta;  | 
127  | 0  |         break;  | 
128  | 0  |     case UITER_LENGTH:  | 
129  | 0  |         pos=iter->length+delta;  | 
130  | 0  |         break;  | 
131  | 0  |     default:  | 
132  | 0  |         return -1;  /* Error */  | 
133  | 0  |     }  | 
134  |  |  | 
135  | 0  |     if(pos<iter->start) { | 
136  | 0  |         pos=iter->start;  | 
137  | 0  |     } else if(pos>iter->limit) { | 
138  | 0  |         pos=iter->limit;  | 
139  | 0  |     }  | 
140  |  | 
  | 
141  | 0  |     return iter->index=pos;  | 
142  | 0  | }  | 
143  |  |  | 
144  |  | static UBool U_CALLCONV  | 
145  | 0  | stringIteratorHasNext(UCharIterator *iter) { | 
146  | 0  |     return iter->index<iter->limit;  | 
147  | 0  | }  | 
148  |  |  | 
149  |  | static UBool U_CALLCONV  | 
150  | 0  | stringIteratorHasPrevious(UCharIterator *iter) { | 
151  | 0  |     return iter->index>iter->start;  | 
152  | 0  | }  | 
153  |  |  | 
154  |  | static UChar32 U_CALLCONV  | 
155  | 0  | stringIteratorCurrent(UCharIterator *iter) { | 
156  | 0  |     if(iter->index<iter->limit) { | 
157  | 0  |         return ((const UChar *)(iter->context))[iter->index];  | 
158  | 0  |     } else { | 
159  | 0  |         return U_SENTINEL;  | 
160  | 0  |     }  | 
161  | 0  | }  | 
162  |  |  | 
163  |  | static UChar32 U_CALLCONV  | 
164  | 0  | stringIteratorNext(UCharIterator *iter) { | 
165  | 0  |     if(iter->index<iter->limit) { | 
166  | 0  |         return ((const UChar *)(iter->context))[iter->index++];  | 
167  | 0  |     } else { | 
168  | 0  |         return U_SENTINEL;  | 
169  | 0  |     }  | 
170  | 0  | }  | 
171  |  |  | 
172  |  | static UChar32 U_CALLCONV  | 
173  | 0  | stringIteratorPrevious(UCharIterator *iter) { | 
174  | 0  |     if(iter->index>iter->start) { | 
175  | 0  |         return ((const UChar *)(iter->context))[--iter->index];  | 
176  | 0  |     } else { | 
177  | 0  |         return U_SENTINEL;  | 
178  | 0  |     }  | 
179  | 0  | }  | 
180  |  |  | 
181  |  | static uint32_t U_CALLCONV  | 
182  | 0  | stringIteratorGetState(const UCharIterator *iter) { | 
183  | 0  |     return (uint32_t)iter->index;  | 
184  | 0  | }  | 
185  |  |  | 
186  |  | static void U_CALLCONV  | 
187  | 0  | stringIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { | 
188  | 0  |     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
189  |  |         /* do nothing */  | 
190  | 0  |     } else if(iter==NULL) { | 
191  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
192  | 0  |     } else if((int32_t)state<iter->start || iter->limit<(int32_t)state) { | 
193  | 0  |         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
194  | 0  |     } else { | 
195  | 0  |         iter->index=(int32_t)state;  | 
196  | 0  |     }  | 
197  | 0  | }  | 
198  |  |  | 
199  |  | static const UCharIterator stringIterator={ | 
200  |  |     0, 0, 0, 0, 0, 0,  | 
201  |  |     stringIteratorGetIndex,  | 
202  |  |     stringIteratorMove,  | 
203  |  |     stringIteratorHasNext,  | 
204  |  |     stringIteratorHasPrevious,  | 
205  |  |     stringIteratorCurrent,  | 
206  |  |     stringIteratorNext,  | 
207  |  |     stringIteratorPrevious,  | 
208  |  |     NULL,  | 
209  |  |     stringIteratorGetState,  | 
210  |  |     stringIteratorSetState  | 
211  |  | };  | 
212  |  |  | 
213  |  | U_CAPI void U_EXPORT2  | 
214  | 0  | uiter_setString(UCharIterator *iter, const UChar *s, int32_t length) { | 
215  | 0  |     if(iter!=0) { | 
216  | 0  |         if(s!=0 && length>=-1) { | 
217  | 0  |             *iter=stringIterator;  | 
218  | 0  |             iter->context=s;  | 
219  | 0  |             if(length>=0) { | 
220  | 0  |                 iter->length=length;  | 
221  | 0  |             } else { | 
222  | 0  |                 iter->length=u_strlen(s);  | 
223  | 0  |             }  | 
224  | 0  |             iter->limit=iter->length;  | 
225  | 0  |         } else { | 
226  | 0  |             *iter=noopIterator;  | 
227  | 0  |         }  | 
228  | 0  |     }  | 
229  | 0  | }  | 
230  |  |  | 
231  |  | /* UCharIterator implementation for UTF-16BE strings ------------------------ */  | 
232  |  |  | 
233  |  | /*  | 
234  |  |  * This is an implementation of a code unit (UChar) iterator  | 
235  |  |  * for UTF-16BE strings, i.e., strings in byte-vectors where  | 
236  |  |  * each UChar is stored as a big-endian pair of bytes.  | 
237  |  |  *  | 
238  |  |  * The UCharIterator.context field holds a pointer to the string.  | 
239  |  |  * Everything works just like with a normal UChar iterator (uiter_setString),  | 
240  |  |  * except that UChars are assembled from byte pairs.  | 
241  |  |  */  | 
242  |  |  | 
243  |  | /* internal helper function */  | 
244  |  | static inline UChar32  | 
245  | 0  | utf16BEIteratorGet(UCharIterator *iter, int32_t index) { | 
246  | 0  |     const uint8_t *p=(const uint8_t *)iter->context;  | 
247  | 0  |     return ((UChar)p[2*index]<<8)|(UChar)p[2*index+1];  | 
248  | 0  | }  | 
249  |  |  | 
250  |  | static UChar32 U_CALLCONV  | 
251  | 0  | utf16BEIteratorCurrent(UCharIterator *iter) { | 
252  | 0  |     int32_t index;  | 
253  |  | 
  | 
254  | 0  |     if((index=iter->index)<iter->limit) { | 
255  | 0  |         return utf16BEIteratorGet(iter, index);  | 
256  | 0  |     } else { | 
257  | 0  |         return U_SENTINEL;  | 
258  | 0  |     }  | 
259  | 0  | }  | 
260  |  |  | 
261  |  | static UChar32 U_CALLCONV  | 
262  | 0  | utf16BEIteratorNext(UCharIterator *iter) { | 
263  | 0  |     int32_t index;  | 
264  |  | 
  | 
265  | 0  |     if((index=iter->index)<iter->limit) { | 
266  | 0  |         iter->index=index+1;  | 
267  | 0  |         return utf16BEIteratorGet(iter, index);  | 
268  | 0  |     } else { | 
269  | 0  |         return U_SENTINEL;  | 
270  | 0  |     }  | 
271  | 0  | }  | 
272  |  |  | 
273  |  | static UChar32 U_CALLCONV  | 
274  | 0  | utf16BEIteratorPrevious(UCharIterator *iter) { | 
275  | 0  |     int32_t index;  | 
276  |  | 
  | 
277  | 0  |     if((index=iter->index)>iter->start) { | 
278  | 0  |         iter->index=--index;  | 
279  | 0  |         return utf16BEIteratorGet(iter, index);  | 
280  | 0  |     } else { | 
281  | 0  |         return U_SENTINEL;  | 
282  | 0  |     }  | 
283  | 0  | }  | 
284  |  |  | 
285  |  | static const UCharIterator utf16BEIterator={ | 
286  |  |     0, 0, 0, 0, 0, 0,  | 
287  |  |     stringIteratorGetIndex,  | 
288  |  |     stringIteratorMove,  | 
289  |  |     stringIteratorHasNext,  | 
290  |  |     stringIteratorHasPrevious,  | 
291  |  |     utf16BEIteratorCurrent,  | 
292  |  |     utf16BEIteratorNext,  | 
293  |  |     utf16BEIteratorPrevious,  | 
294  |  |     NULL,  | 
295  |  |     stringIteratorGetState,  | 
296  |  |     stringIteratorSetState  | 
297  |  | };  | 
298  |  |  | 
299  |  | /*  | 
300  |  |  * Count the number of UChars in a UTF-16BE string before a terminating UChar NUL,  | 
301  |  |  * i.e., before a pair of 0 bytes where the first 0 byte is at an even  | 
302  |  |  * offset from s.  | 
303  |  |  */  | 
304  |  | static int32_t  | 
305  | 0  | utf16BE_strlen(const char *s) { | 
306  | 0  |     if(IS_POINTER_EVEN(s)) { | 
307  |  |         /*  | 
308  |  |          * even-aligned, call u_strlen(s)  | 
309  |  |          * we are probably on a little-endian machine, but searching for UChar NUL  | 
310  |  |          * does not care about endianness  | 
311  |  |          */  | 
312  | 0  |         return u_strlen((const UChar *)s);  | 
313  | 0  |     } else { | 
314  |  |         /* odd-aligned, search for pair of 0 bytes */  | 
315  | 0  |         const char *p=s;  | 
316  |  | 
  | 
317  | 0  |         while(!(*p==0 && p[1]==0)) { | 
318  | 0  |             p+=2;  | 
319  | 0  |         }  | 
320  | 0  |         return (int32_t)((p-s)/2);  | 
321  | 0  |     }  | 
322  | 0  | }  | 
323  |  |  | 
324  |  | U_CAPI void U_EXPORT2  | 
325  | 0  | uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length) { | 
326  | 0  |     if(iter!=NULL) { | 
327  |  |         /* allow only even-length strings (the input length counts bytes) */  | 
328  | 0  |         if(s!=NULL && (length==-1 || (length>=0 && IS_EVEN(length)))) { | 
329  |  |             /* length/=2, except that >>=1 also works for -1 (-1/2==0, -1>>1==-1) */  | 
330  | 0  |             length>>=1;  | 
331  |  | 
  | 
332  | 0  |             if(U_IS_BIG_ENDIAN && IS_POINTER_EVEN(s)) { | 
333  |  |                 /* big-endian machine and 2-aligned UTF-16BE string: use normal UChar iterator */  | 
334  | 0  |                 uiter_setString(iter, (const UChar *)s, length);  | 
335  | 0  |                 return;  | 
336  | 0  |             }  | 
337  |  |  | 
338  | 0  |             *iter=utf16BEIterator;  | 
339  | 0  |             iter->context=s;  | 
340  | 0  |             if(length>=0) { | 
341  | 0  |                 iter->length=length;  | 
342  | 0  |             } else { | 
343  | 0  |                 iter->length=utf16BE_strlen(s);  | 
344  | 0  |             }  | 
345  | 0  |             iter->limit=iter->length;  | 
346  | 0  |         } else { | 
347  | 0  |             *iter=noopIterator;  | 
348  | 0  |         }  | 
349  | 0  |     }  | 
350  | 0  | }  | 
351  |  |  | 
352  |  | /* UCharIterator wrapper around CharacterIterator --------------------------- */  | 
353  |  |  | 
354  |  | /*  | 
355  |  |  * This is wrapper code around a C++ CharacterIterator to  | 
356  |  |  * look like a C UCharIterator.  | 
357  |  |  *  | 
358  |  |  * The UCharIterator.context field holds a pointer to the CharacterIterator.  | 
359  |  |  */  | 
360  |  |  | 
361  |  | static int32_t U_CALLCONV  | 
362  | 0  | characterIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { | 
363  | 0  |     switch(origin) { | 
364  | 0  |     case UITER_ZERO:  | 
365  | 0  |         return 0;  | 
366  | 0  |     case UITER_START:  | 
367  | 0  |         return ((CharacterIterator *)(iter->context))->startIndex();  | 
368  | 0  |     case UITER_CURRENT:  | 
369  | 0  |         return ((CharacterIterator *)(iter->context))->getIndex();  | 
370  | 0  |     case UITER_LIMIT:  | 
371  | 0  |         return ((CharacterIterator *)(iter->context))->endIndex();  | 
372  | 0  |     case UITER_LENGTH:  | 
373  | 0  |         return ((CharacterIterator *)(iter->context))->getLength();  | 
374  | 0  |     default:  | 
375  |  |         /* not a valid origin */  | 
376  |  |         /* Should never get here! */  | 
377  | 0  |         return -1;  | 
378  | 0  |     }  | 
379  | 0  | }  | 
380  |  |  | 
381  |  | static int32_t U_CALLCONV  | 
382  | 0  | characterIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { | 
383  | 0  |     switch(origin) { | 
384  | 0  |     case UITER_ZERO:  | 
385  | 0  |         ((CharacterIterator *)(iter->context))->setIndex(delta);  | 
386  | 0  |         return ((CharacterIterator *)(iter->context))->getIndex();  | 
387  | 0  |     case UITER_START:  | 
388  | 0  |     case UITER_CURRENT:  | 
389  | 0  |     case UITER_LIMIT:  | 
390  | 0  |         return ((CharacterIterator *)(iter->context))->move(delta, (CharacterIterator::EOrigin)origin);  | 
391  | 0  |     case UITER_LENGTH:  | 
392  | 0  |         ((CharacterIterator *)(iter->context))->setIndex(((CharacterIterator *)(iter->context))->getLength()+delta);  | 
393  | 0  |         return ((CharacterIterator *)(iter->context))->getIndex();  | 
394  | 0  |     default:  | 
395  |  |         /* not a valid origin */  | 
396  |  |         /* Should never get here! */  | 
397  | 0  |         return -1;  | 
398  | 0  |     }  | 
399  | 0  | }  | 
400  |  |  | 
401  |  | static UBool U_CALLCONV  | 
402  | 0  | characterIteratorHasNext(UCharIterator *iter) { | 
403  | 0  |     return ((CharacterIterator *)(iter->context))->hasNext();  | 
404  | 0  | }  | 
405  |  |  | 
406  |  | static UBool U_CALLCONV  | 
407  | 0  | characterIteratorHasPrevious(UCharIterator *iter) { | 
408  | 0  |     return ((CharacterIterator *)(iter->context))->hasPrevious();  | 
409  | 0  | }  | 
410  |  |  | 
411  |  | static UChar32 U_CALLCONV  | 
412  | 0  | characterIteratorCurrent(UCharIterator *iter) { | 
413  | 0  |     UChar32 c;  | 
414  |  | 
  | 
415  | 0  |     c=((CharacterIterator *)(iter->context))->current();  | 
416  | 0  |     if(c!=0xffff || ((CharacterIterator *)(iter->context))->hasNext()) { | 
417  | 0  |         return c;  | 
418  | 0  |     } else { | 
419  | 0  |         return U_SENTINEL;  | 
420  | 0  |     }  | 
421  | 0  | }  | 
422  |  |  | 
423  |  | static UChar32 U_CALLCONV  | 
424  | 0  | characterIteratorNext(UCharIterator *iter) { | 
425  | 0  |     if(((CharacterIterator *)(iter->context))->hasNext()) { | 
426  | 0  |         return ((CharacterIterator *)(iter->context))->nextPostInc();  | 
427  | 0  |     } else { | 
428  | 0  |         return U_SENTINEL;  | 
429  | 0  |     }  | 
430  | 0  | }  | 
431  |  |  | 
432  |  | static UChar32 U_CALLCONV  | 
433  | 0  | characterIteratorPrevious(UCharIterator *iter) { | 
434  | 0  |     if(((CharacterIterator *)(iter->context))->hasPrevious()) { | 
435  | 0  |         return ((CharacterIterator *)(iter->context))->previous();  | 
436  | 0  |     } else { | 
437  | 0  |         return U_SENTINEL;  | 
438  | 0  |     }  | 
439  | 0  | }  | 
440  |  |  | 
441  |  | static uint32_t U_CALLCONV  | 
442  | 0  | characterIteratorGetState(const UCharIterator *iter) { | 
443  | 0  |     return ((CharacterIterator *)(iter->context))->getIndex();  | 
444  | 0  | }  | 
445  |  |  | 
446  |  | static void U_CALLCONV  | 
447  | 0  | characterIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { | 
448  | 0  |     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
449  |  |         /* do nothing */  | 
450  | 0  |     } else if(iter==NULL || iter->context==NULL) { | 
451  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
452  | 0  |     } else if((int32_t)state<((CharacterIterator *)(iter->context))->startIndex() || ((CharacterIterator *)(iter->context))->endIndex()<(int32_t)state) { | 
453  | 0  |         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
454  | 0  |     } else { | 
455  | 0  |         ((CharacterIterator *)(iter->context))->setIndex((int32_t)state);  | 
456  | 0  |     }  | 
457  | 0  | }  | 
458  |  |  | 
459  |  | static const UCharIterator characterIteratorWrapper={ | 
460  |  |     0, 0, 0, 0, 0, 0,  | 
461  |  |     characterIteratorGetIndex,  | 
462  |  |     characterIteratorMove,  | 
463  |  |     characterIteratorHasNext,  | 
464  |  |     characterIteratorHasPrevious,  | 
465  |  |     characterIteratorCurrent,  | 
466  |  |     characterIteratorNext,  | 
467  |  |     characterIteratorPrevious,  | 
468  |  |     NULL,  | 
469  |  |     characterIteratorGetState,  | 
470  |  |     characterIteratorSetState  | 
471  |  | };  | 
472  |  |  | 
473  |  | U_CAPI void U_EXPORT2  | 
474  | 0  | uiter_setCharacterIterator(UCharIterator *iter, CharacterIterator *charIter) { | 
475  | 0  |     if(iter!=0) { | 
476  | 0  |         if(charIter!=0) { | 
477  | 0  |             *iter=characterIteratorWrapper;  | 
478  | 0  |             iter->context=charIter;  | 
479  | 0  |         } else { | 
480  | 0  |             *iter=noopIterator;  | 
481  | 0  |         }  | 
482  | 0  |     }  | 
483  | 0  | }  | 
484  |  |  | 
485  |  | /* UCharIterator wrapper around Replaceable --------------------------------- */  | 
486  |  |  | 
487  |  | /*  | 
488  |  |  * This is an implementation of a code unit (UChar) iterator  | 
489  |  |  * based on a Replaceable object.  | 
490  |  |  *  | 
491  |  |  * The UCharIterator.context field holds a pointer to the Replaceable.  | 
492  |  |  * UCharIterator.length and UCharIterator.index hold Replaceable.length()  | 
493  |  |  * and the iteration index.  | 
494  |  |  */  | 
495  |  |  | 
496  |  | static UChar32 U_CALLCONV  | 
497  | 0  | replaceableIteratorCurrent(UCharIterator *iter) { | 
498  | 0  |     if(iter->index<iter->limit) { | 
499  | 0  |         return ((Replaceable *)(iter->context))->charAt(iter->index);  | 
500  | 0  |     } else { | 
501  | 0  |         return U_SENTINEL;  | 
502  | 0  |     }  | 
503  | 0  | }  | 
504  |  |  | 
505  |  | static UChar32 U_CALLCONV  | 
506  | 0  | replaceableIteratorNext(UCharIterator *iter) { | 
507  | 0  |     if(iter->index<iter->limit) { | 
508  | 0  |         return ((Replaceable *)(iter->context))->charAt(iter->index++);  | 
509  | 0  |     } else { | 
510  | 0  |         return U_SENTINEL;  | 
511  | 0  |     }  | 
512  | 0  | }  | 
513  |  |  | 
514  |  | static UChar32 U_CALLCONV  | 
515  | 0  | replaceableIteratorPrevious(UCharIterator *iter) { | 
516  | 0  |     if(iter->index>iter->start) { | 
517  | 0  |         return ((Replaceable *)(iter->context))->charAt(--iter->index);  | 
518  | 0  |     } else { | 
519  | 0  |         return U_SENTINEL;  | 
520  | 0  |     }  | 
521  | 0  | }  | 
522  |  |  | 
523  |  | static const UCharIterator replaceableIterator={ | 
524  |  |     0, 0, 0, 0, 0, 0,  | 
525  |  |     stringIteratorGetIndex,  | 
526  |  |     stringIteratorMove,  | 
527  |  |     stringIteratorHasNext,  | 
528  |  |     stringIteratorHasPrevious,  | 
529  |  |     replaceableIteratorCurrent,  | 
530  |  |     replaceableIteratorNext,  | 
531  |  |     replaceableIteratorPrevious,  | 
532  |  |     NULL,  | 
533  |  |     stringIteratorGetState,  | 
534  |  |     stringIteratorSetState  | 
535  |  | };  | 
536  |  |  | 
537  |  | U_CAPI void U_EXPORT2  | 
538  | 0  | uiter_setReplaceable(UCharIterator *iter, const Replaceable *rep) { | 
539  | 0  |     if(iter!=0) { | 
540  | 0  |         if(rep!=0) { | 
541  | 0  |             *iter=replaceableIterator;  | 
542  | 0  |             iter->context=rep;  | 
543  | 0  |             iter->limit=iter->length=rep->length();  | 
544  | 0  |         } else { | 
545  | 0  |             *iter=noopIterator;  | 
546  | 0  |         }  | 
547  | 0  |     }  | 
548  | 0  | }  | 
549  |  |  | 
550  |  | /* UCharIterator implementation for UTF-8 strings --------------------------- */  | 
551  |  |  | 
552  |  | /*  | 
553  |  |  * Possible, probably necessary only for an implementation for arbitrary  | 
554  |  |  * converters:  | 
555  |  |  * Maintain a buffer (ring buffer?) for a piece of converted 16-bit text.  | 
556  |  |  * This would require to turn reservedFn into a close function and  | 
557  |  |  * to introduce a uiter_close(iter).  | 
558  |  |  */  | 
559  |  |  | 
560  |  | #define UITER_CNV_CAPACITY 16  | 
561  |  |  | 
562  |  | /*  | 
563  |  |  * Minimal implementation:  | 
564  |  |  * Maintain a single-UChar buffer for an additional surrogate.  | 
565  |  |  * The caller must not modify start and limit because they are used internally.  | 
566  |  |  *  | 
567  |  |  * Use UCharIterator fields as follows:  | 
568  |  |  *   context        pointer to UTF-8 string  | 
569  |  |  *   length         UTF-16 length of the string; -1 until lazy evaluation  | 
570  |  |  *   start          current UTF-8 index  | 
571  |  |  *   index          current UTF-16 index; may be -1="unknown" after setState()  | 
572  |  |  *   limit          UTF-8 length of the string  | 
573  |  |  *   reservedField  supplementary code point  | 
574  |  |  *  | 
575  |  |  * Since UCharIterator delivers 16-bit code units, the iteration can be  | 
576  |  |  * currently in the middle of the byte sequence for a supplementary code point.  | 
577  |  |  * In this case, reservedField will contain that code point and start will  | 
578  |  |  * point to after the corresponding byte sequence. The UTF-16 index will be  | 
579  |  |  * one less than what it would otherwise be corresponding to the UTF-8 index.  | 
580  |  |  * Otherwise, reservedField will be 0.  | 
581  |  |  */  | 
582  |  |  | 
583  |  | /*  | 
584  |  |  * Possible optimization for NUL-terminated UTF-8 and UTF-16 strings:  | 
585  |  |  * Add implementations that do not call strlen() for iteration but check for NUL.  | 
586  |  |  */  | 
587  |  |  | 
588  |  | static int32_t U_CALLCONV  | 
589  | 0  | utf8IteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin) { | 
590  | 0  |     switch(origin) { | 
591  | 0  |     case UITER_ZERO:  | 
592  | 0  |     case UITER_START:  | 
593  | 0  |         return 0;  | 
594  | 0  |     case UITER_CURRENT:  | 
595  | 0  |         if(iter->index<0) { | 
596  |  |             /* the current UTF-16 index is unknown after setState(), count from the beginning */  | 
597  | 0  |             const uint8_t *s;  | 
598  | 0  |             UChar32 c;  | 
599  | 0  |             int32_t i, limit, index;  | 
600  |  | 
  | 
601  | 0  |             s=(const uint8_t *)iter->context;  | 
602  | 0  |             i=index=0;  | 
603  | 0  |             limit=iter->start; /* count up to the UTF-8 index */  | 
604  | 0  |             while(i<limit) { | 
605  | 0  |                 U8_NEXT_OR_FFFD(s, i, limit, c);  | 
606  | 0  |                 index+=U16_LENGTH(c);  | 
607  | 0  |             }  | 
608  |  | 
  | 
609  | 0  |             iter->start=i; /* just in case setState() did not get us to a code point boundary */  | 
610  | 0  |             if(i==iter->limit) { | 
611  | 0  |                 iter->length=index; /* in case it was <0 or wrong */  | 
612  | 0  |             }  | 
613  | 0  |             if(iter->reservedField!=0) { | 
614  | 0  |                 --index; /* we are in the middle of a supplementary code point */  | 
615  | 0  |             }  | 
616  | 0  |             iter->index=index;  | 
617  | 0  |         }  | 
618  | 0  |         return iter->index;  | 
619  | 0  |     case UITER_LIMIT:  | 
620  | 0  |     case UITER_LENGTH:  | 
621  | 0  |         if(iter->length<0) { | 
622  | 0  |             const uint8_t *s;  | 
623  | 0  |             UChar32 c;  | 
624  | 0  |             int32_t i, limit, length;  | 
625  |  | 
  | 
626  | 0  |             s=(const uint8_t *)iter->context;  | 
627  | 0  |             if(iter->index<0) { | 
628  |  |                 /*  | 
629  |  |                  * the current UTF-16 index is unknown after setState(),  | 
630  |  |                  * we must first count from the beginning to here  | 
631  |  |                  */  | 
632  | 0  |                 i=length=0;  | 
633  | 0  |                 limit=iter->start;  | 
634  |  |  | 
635  |  |                 /* count from the beginning to the current index */  | 
636  | 0  |                 while(i<limit) { | 
637  | 0  |                     U8_NEXT_OR_FFFD(s, i, limit, c);  | 
638  | 0  |                     length+=U16_LENGTH(c);  | 
639  | 0  |                 }  | 
640  |  |  | 
641  |  |                 /* assume i==limit==iter->start, set the UTF-16 index */  | 
642  | 0  |                 iter->start=i; /* just in case setState() did not get us to a code point boundary */  | 
643  | 0  |                 iter->index= iter->reservedField!=0 ? length-1 : length;  | 
644  | 0  |             } else { | 
645  | 0  |                 i=iter->start;  | 
646  | 0  |                 length=iter->index;  | 
647  | 0  |                 if(iter->reservedField!=0) { | 
648  | 0  |                     ++length;  | 
649  | 0  |                 }  | 
650  | 0  |             }  | 
651  |  |  | 
652  |  |             /* count from the current index to the end */  | 
653  | 0  |             limit=iter->limit;  | 
654  | 0  |             while(i<limit) { | 
655  | 0  |                 U8_NEXT_OR_FFFD(s, i, limit, c);  | 
656  | 0  |                 length+=U16_LENGTH(c);  | 
657  | 0  |             }  | 
658  | 0  |             iter->length=length;  | 
659  | 0  |         }  | 
660  | 0  |         return iter->length;  | 
661  | 0  |     default:  | 
662  |  |         /* not a valid origin */  | 
663  |  |         /* Should never get here! */  | 
664  | 0  |         return -1;  | 
665  | 0  |     }  | 
666  | 0  | }  | 
667  |  |  | 
668  |  | static int32_t U_CALLCONV  | 
669  | 0  | utf8IteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin) { | 
670  | 0  |     const uint8_t *s;  | 
671  | 0  |     UChar32 c;  | 
672  | 0  |     int32_t pos; /* requested UTF-16 index */  | 
673  | 0  |     int32_t i; /* UTF-8 index */  | 
674  | 0  |     UBool havePos;  | 
675  |  |  | 
676  |  |     /* calculate the requested UTF-16 index */  | 
677  | 0  |     switch(origin) { | 
678  | 0  |     case UITER_ZERO:  | 
679  | 0  |     case UITER_START:  | 
680  | 0  |         pos=delta;  | 
681  | 0  |         havePos=TRUE;  | 
682  |  |         /* iter->index<0 (unknown) is possible */  | 
683  | 0  |         break;  | 
684  | 0  |     case UITER_CURRENT:  | 
685  | 0  |         if(iter->index>=0) { | 
686  | 0  |             pos=iter->index+delta;  | 
687  | 0  |             havePos=TRUE;  | 
688  | 0  |         } else { | 
689  |  |             /* the current UTF-16 index is unknown after setState(), use only delta */  | 
690  | 0  |             pos=0;  | 
691  | 0  |             havePos=FALSE;  | 
692  | 0  |         }  | 
693  | 0  |         break;  | 
694  | 0  |     case UITER_LIMIT:  | 
695  | 0  |     case UITER_LENGTH:  | 
696  | 0  |         if(iter->length>=0) { | 
697  | 0  |             pos=iter->length+delta;  | 
698  | 0  |             havePos=TRUE;  | 
699  | 0  |         } else { | 
700  |  |             /* pin to the end, avoid counting the length */  | 
701  | 0  |             iter->index=-1;  | 
702  | 0  |             iter->start=iter->limit;  | 
703  | 0  |             iter->reservedField=0;  | 
704  | 0  |             if(delta>=0) { | 
705  | 0  |                 return UITER_UNKNOWN_INDEX;  | 
706  | 0  |             } else { | 
707  |  |                 /* the current UTF-16 index is unknown, use only delta */  | 
708  | 0  |                 pos=0;  | 
709  | 0  |                 havePos=FALSE;  | 
710  | 0  |             }  | 
711  | 0  |         }  | 
712  | 0  |         break;  | 
713  | 0  |     default:  | 
714  | 0  |         return -1;  /* Error */  | 
715  | 0  |     }  | 
716  |  |  | 
717  | 0  |     if(havePos) { | 
718  |  |         /* shortcuts: pinning to the edges of the string */  | 
719  | 0  |         if(pos<=0) { | 
720  | 0  |             iter->index=iter->start=iter->reservedField=0;  | 
721  | 0  |             return 0;  | 
722  | 0  |         } else if(iter->length>=0 && pos>=iter->length) { | 
723  | 0  |             iter->index=iter->length;  | 
724  | 0  |             iter->start=iter->limit;  | 
725  | 0  |             iter->reservedField=0;  | 
726  | 0  |             return iter->index;  | 
727  | 0  |         }  | 
728  |  |  | 
729  |  |         /* minimize the number of U8_NEXT/PREV operations */  | 
730  | 0  |         if(iter->index<0 || pos<iter->index/2) { | 
731  |  |             /* go forward from the start instead of backward from the current index */  | 
732  | 0  |             iter->index=iter->start=iter->reservedField=0;  | 
733  | 0  |         } else if(iter->length>=0 && (iter->length-pos)<(pos-iter->index)) { | 
734  |  |             /*  | 
735  |  |              * if we have the UTF-16 index and length and the new position is  | 
736  |  |              * closer to the end than the current index,  | 
737  |  |              * then go backward from the end instead of forward from the current index  | 
738  |  |              */  | 
739  | 0  |             iter->index=iter->length;  | 
740  | 0  |             iter->start=iter->limit;  | 
741  | 0  |             iter->reservedField=0;  | 
742  | 0  |         }  | 
743  |  | 
  | 
744  | 0  |         delta=pos-iter->index;  | 
745  | 0  |         if(delta==0) { | 
746  | 0  |             return iter->index; /* nothing to do */  | 
747  | 0  |         }  | 
748  | 0  |     } else { | 
749  |  |         /* move relative to unknown UTF-16 index */  | 
750  | 0  |         if(delta==0) { | 
751  | 0  |             return UITER_UNKNOWN_INDEX; /* nothing to do */  | 
752  | 0  |         } else if(-delta>=iter->start) { | 
753  |  |             /* moving backwards by more UChars than there are UTF-8 bytes, pin to 0 */  | 
754  | 0  |             iter->index=iter->start=iter->reservedField=0;  | 
755  | 0  |             return 0;  | 
756  | 0  |         } else if(delta>=(iter->limit-iter->start)) { | 
757  |  |             /* moving forward by more UChars than the remaining UTF-8 bytes, pin to the end */  | 
758  | 0  |             iter->index=iter->length; /* may or may not be <0 (unknown) */  | 
759  | 0  |             iter->start=iter->limit;  | 
760  | 0  |             iter->reservedField=0;  | 
761  | 0  |             return iter->index>=0 ? iter->index : (int32_t)UITER_UNKNOWN_INDEX;  | 
762  | 0  |         }  | 
763  | 0  |     }  | 
764  |  |  | 
765  |  |     /* delta!=0 */  | 
766  |  |  | 
767  |  |     /* move towards the requested position, pin to the edges of the string */  | 
768  | 0  |     s=(const uint8_t *)iter->context;  | 
769  | 0  |     pos=iter->index; /* could be <0 (unknown) */  | 
770  | 0  |     i=iter->start;  | 
771  | 0  |     if(delta>0) { | 
772  |  |         /* go forward */  | 
773  | 0  |         int32_t limit=iter->limit;  | 
774  | 0  |         if(iter->reservedField!=0) { | 
775  | 0  |             iter->reservedField=0;  | 
776  | 0  |             ++pos;  | 
777  | 0  |             --delta;  | 
778  | 0  |         }  | 
779  | 0  |         while(delta>0 && i<limit) { | 
780  | 0  |             U8_NEXT_OR_FFFD(s, i, limit, c);  | 
781  | 0  |             if(c<=0xffff) { | 
782  | 0  |                 ++pos;  | 
783  | 0  |                 --delta;  | 
784  | 0  |             } else if(delta>=2) { | 
785  | 0  |                 pos+=2;  | 
786  | 0  |                 delta-=2;  | 
787  | 0  |             } else /* delta==1 */ { | 
788  |  |                 /* stop in the middle of a supplementary code point */  | 
789  | 0  |                 iter->reservedField=c;  | 
790  | 0  |                 ++pos;  | 
791  | 0  |                 break; /* delta=0; */  | 
792  | 0  |             }  | 
793  | 0  |         }  | 
794  | 0  |         if(i==limit) { | 
795  | 0  |             if(iter->length<0 && iter->index>=0) { | 
796  | 0  |                 iter->length= iter->reservedField==0 ? pos : pos+1;  | 
797  | 0  |             } else if(iter->index<0 && iter->length>=0) { | 
798  | 0  |                 iter->index= iter->reservedField==0 ? iter->length : iter->length-1;  | 
799  | 0  |             }  | 
800  | 0  |         }  | 
801  | 0  |     } else /* delta<0 */ { | 
802  |  |         /* go backward */  | 
803  | 0  |         if(iter->reservedField!=0) { | 
804  | 0  |             iter->reservedField=0;  | 
805  | 0  |             i-=4; /* we stayed behind the supplementary code point; go before it now */  | 
806  | 0  |             --pos;  | 
807  | 0  |             ++delta;  | 
808  | 0  |         }  | 
809  | 0  |         while(delta<0 && i>0) { | 
810  | 0  |             U8_PREV_OR_FFFD(s, 0, i, c);  | 
811  | 0  |             if(c<=0xffff) { | 
812  | 0  |                 --pos;  | 
813  | 0  |                 ++delta;  | 
814  | 0  |             } else if(delta<=-2) { | 
815  | 0  |                 pos-=2;  | 
816  | 0  |                 delta+=2;  | 
817  | 0  |             } else /* delta==-1 */ { | 
818  |  |                 /* stop in the middle of a supplementary code point */  | 
819  | 0  |                 i+=4; /* back to behind this supplementary code point for consistent state */  | 
820  | 0  |                 iter->reservedField=c;  | 
821  | 0  |                 --pos;  | 
822  | 0  |                 break; /* delta=0; */  | 
823  | 0  |             }  | 
824  | 0  |         }  | 
825  | 0  |     }  | 
826  |  | 
  | 
827  | 0  |     iter->start=i;  | 
828  | 0  |     if(iter->index>=0) { | 
829  | 0  |         return iter->index=pos;  | 
830  | 0  |     } else { | 
831  |  |         /* we started with index<0 (unknown) so pos is bogus */  | 
832  | 0  |         if(i<=1) { | 
833  | 0  |             return iter->index=i; /* reached the beginning */  | 
834  | 0  |         } else { | 
835  |  |             /* we still don't know the UTF-16 index */  | 
836  | 0  |             return UITER_UNKNOWN_INDEX;  | 
837  | 0  |         }  | 
838  | 0  |     }  | 
839  | 0  | }  | 
840  |  |  | 
841  |  | static UBool U_CALLCONV  | 
842  | 0  | utf8IteratorHasNext(UCharIterator *iter) { | 
843  | 0  |     return iter->start<iter->limit || iter->reservedField!=0;  | 
844  | 0  | }  | 
845  |  |  | 
846  |  | static UBool U_CALLCONV  | 
847  | 0  | utf8IteratorHasPrevious(UCharIterator *iter) { | 
848  | 0  |     return iter->start>0;  | 
849  | 0  | }  | 
850  |  |  | 
851  |  | static UChar32 U_CALLCONV  | 
852  | 0  | utf8IteratorCurrent(UCharIterator *iter) { | 
853  | 0  |     if(iter->reservedField!=0) { | 
854  | 0  |         return U16_TRAIL(iter->reservedField);  | 
855  | 0  |     } else if(iter->start<iter->limit) { | 
856  | 0  |         const uint8_t *s=(const uint8_t *)iter->context;  | 
857  | 0  |         UChar32 c;  | 
858  | 0  |         int32_t i=iter->start;  | 
859  |  | 
  | 
860  | 0  |         U8_NEXT_OR_FFFD(s, i, iter->limit, c);  | 
861  | 0  |         if(c<=0xffff) { | 
862  | 0  |             return c;  | 
863  | 0  |         } else { | 
864  | 0  |             return U16_LEAD(c);  | 
865  | 0  |         }  | 
866  | 0  |     } else { | 
867  | 0  |         return U_SENTINEL;  | 
868  | 0  |     }  | 
869  | 0  | }  | 
870  |  |  | 
871  |  | static UChar32 U_CALLCONV  | 
872  | 0  | utf8IteratorNext(UCharIterator *iter) { | 
873  | 0  |     int32_t index;  | 
874  |  | 
  | 
875  | 0  |     if(iter->reservedField!=0) { | 
876  | 0  |         UChar trail=U16_TRAIL(iter->reservedField);  | 
877  | 0  |         iter->reservedField=0;  | 
878  | 0  |         if((index=iter->index)>=0) { | 
879  | 0  |             iter->index=index+1;  | 
880  | 0  |         }  | 
881  | 0  |         return trail;  | 
882  | 0  |     } else if(iter->start<iter->limit) { | 
883  | 0  |         const uint8_t *s=(const uint8_t *)iter->context;  | 
884  | 0  |         UChar32 c;  | 
885  |  | 
  | 
886  | 0  |         U8_NEXT_OR_FFFD(s, iter->start, iter->limit, c);  | 
887  | 0  |         if((index=iter->index)>=0) { | 
888  | 0  |             iter->index=++index;  | 
889  | 0  |             if(iter->length<0 && iter->start==iter->limit) { | 
890  | 0  |                 iter->length= c<=0xffff ? index : index+1;  | 
891  | 0  |             }  | 
892  | 0  |         } else if(iter->start==iter->limit && iter->length>=0) { | 
893  | 0  |             iter->index= c<=0xffff ? iter->length : iter->length-1;  | 
894  | 0  |         }  | 
895  | 0  |         if(c<=0xffff) { | 
896  | 0  |             return c;  | 
897  | 0  |         } else { | 
898  | 0  |             iter->reservedField=c;  | 
899  | 0  |             return U16_LEAD(c);  | 
900  | 0  |         }  | 
901  | 0  |     } else { | 
902  | 0  |         return U_SENTINEL;  | 
903  | 0  |     }  | 
904  | 0  | }  | 
905  |  |  | 
906  |  | static UChar32 U_CALLCONV  | 
907  | 0  | utf8IteratorPrevious(UCharIterator *iter) { | 
908  | 0  |     int32_t index;  | 
909  |  | 
  | 
910  | 0  |     if(iter->reservedField!=0) { | 
911  | 0  |         UChar lead=U16_LEAD(iter->reservedField);  | 
912  | 0  |         iter->reservedField=0;  | 
913  | 0  |         iter->start-=4; /* we stayed behind the supplementary code point; go before it now */  | 
914  | 0  |         if((index=iter->index)>0) { | 
915  | 0  |             iter->index=index-1;  | 
916  | 0  |         }  | 
917  | 0  |         return lead;  | 
918  | 0  |     } else if(iter->start>0) { | 
919  | 0  |         const uint8_t *s=(const uint8_t *)iter->context;  | 
920  | 0  |         UChar32 c;  | 
921  |  | 
  | 
922  | 0  |         U8_PREV_OR_FFFD(s, 0, iter->start, c);  | 
923  | 0  |         if((index=iter->index)>0) { | 
924  | 0  |             iter->index=index-1;  | 
925  | 0  |         } else if(iter->start<=1) { | 
926  | 0  |             iter->index= c<=0xffff ? iter->start : iter->start+1;  | 
927  | 0  |         }  | 
928  | 0  |         if(c<=0xffff) { | 
929  | 0  |             return c;  | 
930  | 0  |         } else { | 
931  | 0  |             iter->start+=4; /* back to behind this supplementary code point for consistent state */  | 
932  | 0  |             iter->reservedField=c;  | 
933  | 0  |             return U16_TRAIL(c);  | 
934  | 0  |         }  | 
935  | 0  |     } else { | 
936  | 0  |         return U_SENTINEL;  | 
937  | 0  |     }  | 
938  | 0  | }  | 
939  |  |  | 
940  |  | static uint32_t U_CALLCONV  | 
941  | 0  | utf8IteratorGetState(const UCharIterator *iter) { | 
942  | 0  |     uint32_t state=(uint32_t)(iter->start<<1);  | 
943  | 0  |     if(iter->reservedField!=0) { | 
944  | 0  |         state|=1;  | 
945  | 0  |     }  | 
946  | 0  |     return state;  | 
947  | 0  | }  | 
948  |  |  | 
949  |  | static void U_CALLCONV  | 
950  |  | utf8IteratorSetState(UCharIterator *iter,  | 
951  |  |                      uint32_t state,  | 
952  |  |                      UErrorCode *pErrorCode)  | 
953  | 0  | { | 
954  | 0  |     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
955  |  |         /* do nothing */  | 
956  | 0  |     } else if(iter==NULL) { | 
957  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
958  | 0  |     } else if(state==utf8IteratorGetState(iter)) { | 
959  |  |         /* setting to the current state: no-op */  | 
960  | 0  |     } else { | 
961  | 0  |         int32_t index=(int32_t)(state>>1); /* UTF-8 index */  | 
962  | 0  |         state&=1; /* 1 if in surrogate pair, must be index>=4 */  | 
963  |  | 
  | 
964  | 0  |         if((state==0 ? index<0 : index<4) || iter->limit<index) { | 
965  | 0  |             *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
966  | 0  |         } else { | 
967  | 0  |             iter->start=index; /* restore UTF-8 byte index */  | 
968  | 0  |             if(index<=1) { | 
969  | 0  |                 iter->index=index;  | 
970  | 0  |             } else { | 
971  | 0  |                 iter->index=-1; /* unknown UTF-16 index */  | 
972  | 0  |             }  | 
973  | 0  |             if(state==0) { | 
974  | 0  |                 iter->reservedField=0;  | 
975  | 0  |             } else { | 
976  |  |                 /* verified index>=4 above */  | 
977  | 0  |                 UChar32 c;  | 
978  | 0  |                 U8_PREV_OR_FFFD((const uint8_t *)iter->context, 0, index, c);  | 
979  | 0  |                 if(c<=0xffff) { | 
980  | 0  |                     *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
981  | 0  |                 } else { | 
982  | 0  |                     iter->reservedField=c;  | 
983  | 0  |                 }  | 
984  | 0  |             }  | 
985  | 0  |         }  | 
986  | 0  |     }  | 
987  | 0  | }  | 
988  |  |  | 
989  |  | static const UCharIterator utf8Iterator={ | 
990  |  |     0, 0, 0, 0, 0, 0,  | 
991  |  |     utf8IteratorGetIndex,  | 
992  |  |     utf8IteratorMove,  | 
993  |  |     utf8IteratorHasNext,  | 
994  |  |     utf8IteratorHasPrevious,  | 
995  |  |     utf8IteratorCurrent,  | 
996  |  |     utf8IteratorNext,  | 
997  |  |     utf8IteratorPrevious,  | 
998  |  |     NULL,  | 
999  |  |     utf8IteratorGetState,  | 
1000  |  |     utf8IteratorSetState  | 
1001  |  | };  | 
1002  |  |  | 
1003  |  | U_CAPI void U_EXPORT2  | 
1004  | 0  | uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length) { | 
1005  | 0  |     if(iter!=0) { | 
1006  | 0  |         if(s!=0 && length>=-1) { | 
1007  | 0  |             *iter=utf8Iterator;  | 
1008  | 0  |             iter->context=s;  | 
1009  | 0  |             if(length>=0) { | 
1010  | 0  |                 iter->limit=length;  | 
1011  | 0  |             } else { | 
1012  | 0  |                 iter->limit=(int32_t)uprv_strlen(s);  | 
1013  | 0  |             }  | 
1014  | 0  |             iter->length= iter->limit<=1 ? iter->limit : -1;  | 
1015  | 0  |         } else { | 
1016  | 0  |             *iter=noopIterator;  | 
1017  | 0  |         }  | 
1018  | 0  |     }  | 
1019  | 0  | }  | 
1020  |  |  | 
1021  |  | /* Helper functions --------------------------------------------------------- */  | 
1022  |  |  | 
1023  |  | U_CAPI UChar32 U_EXPORT2  | 
1024  | 0  | uiter_current32(UCharIterator *iter) { | 
1025  | 0  |     UChar32 c, c2;  | 
1026  |  | 
  | 
1027  | 0  |     c=iter->current(iter);  | 
1028  | 0  |     if(U16_IS_SURROGATE(c)) { | 
1029  | 0  |         if(U16_IS_SURROGATE_LEAD(c)) { | 
1030  |  |             /*  | 
1031  |  |              * go to the next code unit  | 
1032  |  |              * we know that we are not at the limit because c!=U_SENTINEL  | 
1033  |  |              */  | 
1034  | 0  |             iter->move(iter, 1, UITER_CURRENT);  | 
1035  | 0  |             if(U16_IS_TRAIL(c2=iter->current(iter))) { | 
1036  | 0  |                 c=U16_GET_SUPPLEMENTARY(c, c2);  | 
1037  | 0  |             }  | 
1038  |  |  | 
1039  |  |             /* undo index movement */  | 
1040  | 0  |             iter->move(iter, -1, UITER_CURRENT);  | 
1041  | 0  |         } else { | 
1042  | 0  |             if(U16_IS_LEAD(c2=iter->previous(iter))) { | 
1043  | 0  |                 c=U16_GET_SUPPLEMENTARY(c2, c);  | 
1044  | 0  |             }  | 
1045  | 0  |             if(c2>=0) { | 
1046  |  |                 /* undo index movement */  | 
1047  | 0  |                 iter->move(iter, 1, UITER_CURRENT);  | 
1048  | 0  |             }  | 
1049  | 0  |         }  | 
1050  | 0  |     }  | 
1051  | 0  |     return c;  | 
1052  | 0  | }  | 
1053  |  |  | 
1054  |  | U_CAPI UChar32 U_EXPORT2  | 
1055  | 0  | uiter_next32(UCharIterator *iter) { | 
1056  | 0  |     UChar32 c, c2;  | 
1057  |  | 
  | 
1058  | 0  |     c=iter->next(iter);  | 
1059  | 0  |     if(U16_IS_LEAD(c)) { | 
1060  | 0  |         if(U16_IS_TRAIL(c2=iter->next(iter))) { | 
1061  | 0  |             c=U16_GET_SUPPLEMENTARY(c, c2);  | 
1062  | 0  |         } else if(c2>=0) { | 
1063  |  |             /* unmatched first surrogate, undo index movement */  | 
1064  | 0  |             iter->move(iter, -1, UITER_CURRENT);  | 
1065  | 0  |         }  | 
1066  | 0  |     }  | 
1067  | 0  |     return c;  | 
1068  | 0  | }  | 
1069  |  |  | 
1070  |  | U_CAPI UChar32 U_EXPORT2  | 
1071  | 0  | uiter_previous32(UCharIterator *iter) { | 
1072  | 0  |     UChar32 c, c2;  | 
1073  |  | 
  | 
1074  | 0  |     c=iter->previous(iter);  | 
1075  | 0  |     if(U16_IS_TRAIL(c)) { | 
1076  | 0  |         if(U16_IS_LEAD(c2=iter->previous(iter))) { | 
1077  | 0  |             c=U16_GET_SUPPLEMENTARY(c2, c);  | 
1078  | 0  |         } else if(c2>=0) { | 
1079  |  |             /* unmatched second surrogate, undo index movement */  | 
1080  | 0  |             iter->move(iter, 1, UITER_CURRENT);  | 
1081  | 0  |         }  | 
1082  | 0  |     }  | 
1083  | 0  |     return c;  | 
1084  | 0  | }  | 
1085  |  |  | 
1086  |  | U_CAPI uint32_t U_EXPORT2  | 
1087  | 0  | uiter_getState(const UCharIterator *iter) { | 
1088  | 0  |     if(iter==NULL || iter->getState==NULL) { | 
1089  | 0  |         return UITER_NO_STATE;  | 
1090  | 0  |     } else { | 
1091  | 0  |         return iter->getState(iter);  | 
1092  | 0  |     }  | 
1093  | 0  | }  | 
1094  |  |  | 
1095  |  | U_CAPI void U_EXPORT2  | 
1096  | 0  | uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode) { | 
1097  | 0  |     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { | 
1098  |  |         /* do nothing */  | 
1099  | 0  |     } else if(iter==NULL) { | 
1100  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1101  | 0  |     } else if(iter->setState==NULL) { | 
1102  | 0  |         *pErrorCode=U_UNSUPPORTED_ERROR;  | 
1103  | 0  |     } else { | 
1104  | 0  |         iter->setState(iter, state, pErrorCode);  | 
1105  | 0  |     }  | 
1106  | 0  | }  | 
1107  |  |  | 
1108  |  | U_CDECL_END  |