/src/icu/source/common/utext.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | *******************************************************************************  | 
5  |  | *  | 
6  |  | *   Copyright (C) 2005-2016, International Business Machines  | 
7  |  | *   Corporation and others.  All Rights Reserved.  | 
8  |  | *  | 
9  |  | *******************************************************************************  | 
10  |  | *   file name:  utext.cpp  | 
11  |  | *   encoding:   UTF-8  | 
12  |  | *   tab size:   8 (not used)  | 
13  |  | *   indentation:4  | 
14  |  | *  | 
15  |  | *   created on: 2005apr12  | 
16  |  | *   created by: Markus W. Scherer  | 
17  |  | */  | 
18  |  |  | 
19  |  | #include <cstddef>  | 
20  |  |  | 
21  |  | #include "unicode/utypes.h"  | 
22  |  | #include "unicode/ustring.h"  | 
23  |  | #include "unicode/unistr.h"  | 
24  |  | #include "unicode/chariter.h"  | 
25  |  | #include "unicode/utext.h"  | 
26  |  | #include "unicode/utf.h"  | 
27  |  | #include "unicode/utf8.h"  | 
28  |  | #include "unicode/utf16.h"  | 
29  |  | #include "ustr_imp.h"  | 
30  |  | #include "cmemory.h"  | 
31  |  | #include "cstring.h"  | 
32  |  | #include "uassert.h"  | 
33  |  | #include "putilimp.h"  | 
34  |  |  | 
35  |  | U_NAMESPACE_USE  | 
36  |  |  | 
37  | 0  | #define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex))  | 
38  |  |  | 
39  |  |  | 
40  |  | static UBool  | 
41  | 0  | utext_access(UText *ut, int64_t index, UBool forward) { | 
42  | 0  |     return ut->pFuncs->access(ut, index, forward);  | 
43  | 0  | }  | 
44  |  |  | 
45  |  |  | 
46  |  |  | 
47  |  | U_CAPI UBool U_EXPORT2  | 
48  | 0  | utext_moveIndex32(UText *ut, int32_t delta) { | 
49  | 0  |     UChar32  c;  | 
50  | 0  |     if (delta > 0) { | 
51  | 0  |         do { | 
52  | 0  |             if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, TRUE)) { | 
53  | 0  |                 return FALSE;  | 
54  | 0  |             }  | 
55  | 0  |             c = ut->chunkContents[ut->chunkOffset];  | 
56  | 0  |             if (U16_IS_SURROGATE(c)) { | 
57  | 0  |                 c = utext_next32(ut);  | 
58  | 0  |                 if (c == U_SENTINEL) { | 
59  | 0  |                     return FALSE;  | 
60  | 0  |                 }  | 
61  | 0  |             } else { | 
62  | 0  |                 ut->chunkOffset++;  | 
63  | 0  |             }  | 
64  | 0  |         } while(--delta>0);  | 
65  |  | 
  | 
66  | 0  |     } else if (delta<0) { | 
67  | 0  |         do { | 
68  | 0  |             if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FALSE)) { | 
69  | 0  |                 return FALSE;  | 
70  | 0  |             }  | 
71  | 0  |             c = ut->chunkContents[ut->chunkOffset-1];  | 
72  | 0  |             if (U16_IS_SURROGATE(c)) { | 
73  | 0  |                 c = utext_previous32(ut);  | 
74  | 0  |                 if (c == U_SENTINEL) { | 
75  | 0  |                     return FALSE;  | 
76  | 0  |                 }  | 
77  | 0  |             } else { | 
78  | 0  |                 ut->chunkOffset--;  | 
79  | 0  |             }  | 
80  | 0  |         } while(++delta<0);  | 
81  | 0  |     }  | 
82  |  |  | 
83  | 0  |     return TRUE;  | 
84  | 0  | }  | 
85  |  |  | 
86  |  |  | 
87  |  | U_CAPI int64_t U_EXPORT2  | 
88  | 0  | utext_nativeLength(UText *ut) { | 
89  | 0  |     return ut->pFuncs->nativeLength(ut);  | 
90  | 0  | }  | 
91  |  |  | 
92  |  |  | 
93  |  | U_CAPI UBool U_EXPORT2  | 
94  | 0  | utext_isLengthExpensive(const UText *ut) { | 
95  | 0  |     UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != 0;  | 
96  | 0  |     return r;  | 
97  | 0  | }  | 
98  |  |  | 
99  |  |  | 
100  |  | U_CAPI int64_t U_EXPORT2  | 
101  | 0  | utext_getNativeIndex(const UText *ut) { | 
102  | 0  |     if(ut->chunkOffset <= ut->nativeIndexingLimit) { | 
103  | 0  |         return ut->chunkNativeStart+ut->chunkOffset;  | 
104  | 0  |     } else { | 
105  | 0  |         return ut->pFuncs->mapOffsetToNative(ut);  | 
106  | 0  |     }  | 
107  | 0  | }  | 
108  |  |  | 
109  |  |  | 
110  |  | U_CAPI void U_EXPORT2  | 
111  | 0  | utext_setNativeIndex(UText *ut, int64_t index) { | 
112  | 0  |     if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) { | 
113  |  |         // The desired position is outside of the current chunk.  | 
114  |  |         // Access the new position.  Assume a forward iteration from here,  | 
115  |  |         // which will also be optimimum for a single random access.  | 
116  |  |         // Reverse iterations may suffer slightly.  | 
117  | 0  |         ut->pFuncs->access(ut, index, TRUE);  | 
118  | 0  |     } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) { | 
119  |  |         // utf-16 indexing.  | 
120  | 0  |         ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);  | 
121  | 0  |     } else { | 
122  | 0  |          ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);  | 
123  | 0  |     }  | 
124  |  |     // The convention is that the index must always be on a code point boundary.  | 
125  |  |     // Adjust the index position if it is in the middle of a surrogate pair.  | 
126  | 0  |     if (ut->chunkOffset<ut->chunkLength) { | 
127  | 0  |         UChar c= ut->chunkContents[ut->chunkOffset];  | 
128  | 0  |         if (U16_IS_TRAIL(c)) { | 
129  | 0  |             if (ut->chunkOffset==0) { | 
130  | 0  |                 ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);  | 
131  | 0  |             }  | 
132  | 0  |             if (ut->chunkOffset>0) { | 
133  | 0  |                 UChar lead = ut->chunkContents[ut->chunkOffset-1];  | 
134  | 0  |                 if (U16_IS_LEAD(lead)) { | 
135  | 0  |                     ut->chunkOffset--;  | 
136  | 0  |                 }  | 
137  | 0  |             }  | 
138  | 0  |         }  | 
139  | 0  |     }  | 
140  | 0  | }  | 
141  |  |  | 
142  |  |  | 
143  |  |  | 
144  |  | U_CAPI int64_t U_EXPORT2  | 
145  | 0  | utext_getPreviousNativeIndex(UText *ut) { | 
146  |  |     //  | 
147  |  |     //  Fast-path the common case.  | 
148  |  |     //     Common means current position is not at the beginning of a chunk  | 
149  |  |     //     and the preceding character is not supplementary.  | 
150  |  |     //  | 
151  | 0  |     int32_t i = ut->chunkOffset - 1;  | 
152  | 0  |     int64_t result;  | 
153  | 0  |     if (i >= 0) { | 
154  | 0  |         UChar c = ut->chunkContents[i];  | 
155  | 0  |         if (U16_IS_TRAIL(c) == FALSE) { | 
156  | 0  |             if (i <= ut->nativeIndexingLimit) { | 
157  | 0  |                 result = ut->chunkNativeStart + i;  | 
158  | 0  |             } else { | 
159  | 0  |                 ut->chunkOffset = i;  | 
160  | 0  |                 result = ut->pFuncs->mapOffsetToNative(ut);  | 
161  | 0  |                 ut->chunkOffset++;  | 
162  | 0  |             }  | 
163  | 0  |             return result;  | 
164  | 0  |         }  | 
165  | 0  |     }  | 
166  |  |  | 
167  |  |     // If at the start of text, simply return 0.  | 
168  | 0  |     if (ut->chunkOffset==0 && ut->chunkNativeStart==0) { | 
169  | 0  |         return 0;  | 
170  | 0  |     }  | 
171  |  |  | 
172  |  |     // Harder, less common cases.  We are at a chunk boundary, or on a surrogate.  | 
173  |  |     //    Keep it simple, use other functions to handle the edges.  | 
174  |  |     //  | 
175  | 0  |     utext_previous32(ut);  | 
176  | 0  |     result = UTEXT_GETNATIVEINDEX(ut);  | 
177  | 0  |     utext_next32(ut);  | 
178  | 0  |     return result;  | 
179  | 0  | }  | 
180  |  |  | 
181  |  |  | 
182  |  | //  | 
183  |  | //  utext_current32.  Get the UChar32 at the current position.  | 
184  |  | //                    UText iteration position is always on a code point boundary,  | 
185  |  | //                    never on the trail half of a surrogate pair.  | 
186  |  | //  | 
187  |  | U_CAPI UChar32 U_EXPORT2  | 
188  | 0  | utext_current32(UText *ut) { | 
189  | 0  |     UChar32  c;  | 
190  | 0  |     if (ut->chunkOffset==ut->chunkLength) { | 
191  |  |         // Current position is just off the end of the chunk.  | 
192  | 0  |         if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) { | 
193  |  |             // Off the end of the text.  | 
194  | 0  |             return U_SENTINEL;  | 
195  | 0  |         }  | 
196  | 0  |     }  | 
197  |  |  | 
198  | 0  |     c = ut->chunkContents[ut->chunkOffset];  | 
199  | 0  |     if (U16_IS_LEAD(c) == FALSE) { | 
200  |  |         // Normal, non-supplementary case.  | 
201  | 0  |         return c;  | 
202  | 0  |     }  | 
203  |  |  | 
204  |  |     //  | 
205  |  |     //  Possible supplementary char.  | 
206  |  |     //  | 
207  | 0  |     UChar32   trail = 0;  | 
208  | 0  |     UChar32   supplementaryC = c;  | 
209  | 0  |     if ((ut->chunkOffset+1) < ut->chunkLength) { | 
210  |  |         // The trail surrogate is in the same chunk.  | 
211  | 0  |         trail = ut->chunkContents[ut->chunkOffset+1];  | 
212  | 0  |     } else { | 
213  |  |         //  The trail surrogate is in a different chunk.  | 
214  |  |         //     Because we must maintain the iteration position, we need to switch forward  | 
215  |  |         //     into the new chunk, get the trail surrogate, then revert the chunk back to the  | 
216  |  |         //     original one.  | 
217  |  |         //     An edge case to be careful of:  the entire text may end with an unpaired  | 
218  |  |         //        leading surrogate.  The attempt to access the trail will fail, but  | 
219  |  |         //        the original position before the unpaired lead still needs to be restored.  | 
220  | 0  |         int64_t  nativePosition = ut->chunkNativeLimit;  | 
221  | 0  |         int32_t  originalOffset = ut->chunkOffset;  | 
222  | 0  |         if (ut->pFuncs->access(ut, nativePosition, TRUE)) { | 
223  | 0  |             trail = ut->chunkContents[ut->chunkOffset];  | 
224  | 0  |         }  | 
225  | 0  |         UBool r = ut->pFuncs->access(ut, nativePosition, FALSE);  // reverse iteration flag loads preceding chunk  | 
226  | 0  |         U_ASSERT(r==TRUE);  | 
227  | 0  |         ut->chunkOffset = originalOffset;  | 
228  | 0  |         if(!r) { | 
229  | 0  |             return U_SENTINEL;  | 
230  | 0  |         }  | 
231  | 0  |     }  | 
232  |  |  | 
233  | 0  |     if (U16_IS_TRAIL(trail)) { | 
234  | 0  |         supplementaryC = U16_GET_SUPPLEMENTARY(c, trail);  | 
235  | 0  |     }  | 
236  | 0  |     return supplementaryC;  | 
237  |  | 
  | 
238  | 0  | }  | 
239  |  |  | 
240  |  |  | 
241  |  | U_CAPI UChar32 U_EXPORT2  | 
242  | 0  | utext_char32At(UText *ut, int64_t nativeIndex) { | 
243  | 0  |     UChar32 c = U_SENTINEL;  | 
244  |  |  | 
245  |  |     // Fast path the common case.  | 
246  | 0  |     if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) { | 
247  | 0  |         ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart);  | 
248  | 0  |         c = ut->chunkContents[ut->chunkOffset];  | 
249  | 0  |         if (U16_IS_SURROGATE(c) == FALSE) { | 
250  | 0  |             return c;  | 
251  | 0  |         }  | 
252  | 0  |     }  | 
253  |  |  | 
254  |  |  | 
255  | 0  |     utext_setNativeIndex(ut, nativeIndex);  | 
256  | 0  |     if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) { | 
257  | 0  |         c = ut->chunkContents[ut->chunkOffset];  | 
258  | 0  |         if (U16_IS_SURROGATE(c)) { | 
259  |  |             // For surrogates, let current32() deal with the complications  | 
260  |  |             //    of supplementaries that may span chunk boundaries.  | 
261  | 0  |             c = utext_current32(ut);  | 
262  | 0  |         }  | 
263  | 0  |     }  | 
264  | 0  |     return c;  | 
265  | 0  | }  | 
266  |  |  | 
267  |  |  | 
268  |  | U_CAPI UChar32 U_EXPORT2  | 
269  | 0  | utext_next32(UText *ut) { | 
270  | 0  |     UChar32       c;  | 
271  |  | 
  | 
272  | 0  |     if (ut->chunkOffset >= ut->chunkLength) { | 
273  | 0  |         if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) { | 
274  | 0  |             return U_SENTINEL;  | 
275  | 0  |         }  | 
276  | 0  |     }  | 
277  |  |  | 
278  | 0  |     c = ut->chunkContents[ut->chunkOffset++];  | 
279  | 0  |     if (U16_IS_LEAD(c) == FALSE) { | 
280  |  |         // Normal case, not supplementary.  | 
281  |  |         //   (A trail surrogate seen here is just returned as is, as a surrogate value.  | 
282  |  |         //    It cannot be part of a pair.)  | 
283  | 0  |         return c;  | 
284  | 0  |     }  | 
285  |  |  | 
286  | 0  |     if (ut->chunkOffset >= ut->chunkLength) { | 
287  | 0  |         if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) { | 
288  |  |             // c is an unpaired lead surrogate at the end of the text.  | 
289  |  |             // return it as it is.  | 
290  | 0  |             return c;  | 
291  | 0  |         }  | 
292  | 0  |     }  | 
293  | 0  |     UChar32 trail = ut->chunkContents[ut->chunkOffset];  | 
294  | 0  |     if (U16_IS_TRAIL(trail) == FALSE) { | 
295  |  |         // c was an unpaired lead surrogate, not at the end of the text.  | 
296  |  |         // return it as it is (unpaired).  Iteration position is on the  | 
297  |  |         // following character, possibly in the next chunk, where the  | 
298  |  |         //  trail surrogate would have been if it had existed.  | 
299  | 0  |         return c;  | 
300  | 0  |     }  | 
301  |  |  | 
302  | 0  |     UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail);  | 
303  | 0  |     ut->chunkOffset++;   // move iteration position over the trail surrogate.  | 
304  | 0  |     return supplementary;  | 
305  | 0  |     }  | 
306  |  |  | 
307  |  |  | 
308  |  | U_CAPI UChar32 U_EXPORT2  | 
309  | 0  | utext_previous32(UText *ut) { | 
310  | 0  |     UChar32       c;  | 
311  |  | 
  | 
312  | 0  |     if (ut->chunkOffset <= 0) { | 
313  | 0  |         if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) { | 
314  | 0  |             return U_SENTINEL;  | 
315  | 0  |         }  | 
316  | 0  |     }  | 
317  | 0  |     ut->chunkOffset--;  | 
318  | 0  |     c = ut->chunkContents[ut->chunkOffset];  | 
319  | 0  |     if (U16_IS_TRAIL(c) == FALSE) { | 
320  |  |         // Normal case, not supplementary.  | 
321  |  |         //   (A lead surrogate seen here is just returned as is, as a surrogate value.  | 
322  |  |         //    It cannot be part of a pair.)  | 
323  | 0  |         return c;  | 
324  | 0  |     }  | 
325  |  |  | 
326  | 0  |     if (ut->chunkOffset <= 0) { | 
327  | 0  |         if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) { | 
328  |  |             // c is an unpaired trail surrogate at the start of the text.  | 
329  |  |             // return it as it is.  | 
330  | 0  |             return c;  | 
331  | 0  |         }  | 
332  | 0  |     }  | 
333  |  |  | 
334  | 0  |     UChar32 lead = ut->chunkContents[ut->chunkOffset-1];  | 
335  | 0  |     if (U16_IS_LEAD(lead) == FALSE) { | 
336  |  |         // c was an unpaired trail surrogate, not at the end of the text.  | 
337  |  |         // return it as it is (unpaired).  Iteration position is at c  | 
338  | 0  |         return c;  | 
339  | 0  |     }  | 
340  |  |  | 
341  | 0  |     UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c);  | 
342  | 0  |     ut->chunkOffset--;   // move iteration position over the lead surrogate.  | 
343  | 0  |     return supplementary;  | 
344  | 0  | }  | 
345  |  |  | 
346  |  |  | 
347  |  |  | 
348  |  | U_CAPI UChar32 U_EXPORT2  | 
349  | 0  | utext_next32From(UText *ut, int64_t index) { | 
350  | 0  |     UChar32       c      = U_SENTINEL;  | 
351  |  | 
  | 
352  | 0  |     if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) { | 
353  |  |         // Desired position is outside of the current chunk.  | 
354  | 0  |         if(!ut->pFuncs->access(ut, index, TRUE)) { | 
355  |  |             // no chunk available here  | 
356  | 0  |             return U_SENTINEL;  | 
357  | 0  |         }  | 
358  | 0  |     } else if (index - ut->chunkNativeStart  <= (int64_t)ut->nativeIndexingLimit) { | 
359  |  |         // Desired position is in chunk, with direct 1:1 native to UTF16 indexing  | 
360  | 0  |         ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);  | 
361  | 0  |     } else { | 
362  |  |         // Desired position is in chunk, with non-UTF16 indexing.  | 
363  | 0  |         ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index);  | 
364  | 0  |     }  | 
365  |  |  | 
366  | 0  |     c = ut->chunkContents[ut->chunkOffset++];  | 
367  | 0  |     if (U16_IS_SURROGATE(c)) { | 
368  |  |         // Surrogates.  Many edge cases.  Use other functions that already  | 
369  |  |         //              deal with the problems.  | 
370  | 0  |         utext_setNativeIndex(ut, index);  | 
371  | 0  |         c = utext_next32(ut);  | 
372  | 0  |     }  | 
373  | 0  |     return c;  | 
374  | 0  | }  | 
375  |  |  | 
376  |  |  | 
377  |  | U_CAPI UChar32 U_EXPORT2  | 
378  | 0  | utext_previous32From(UText *ut, int64_t index) { | 
379  |  |     //  | 
380  |  |     //  Return the character preceding the specified index.  | 
381  |  |     //  Leave the iteration position at the start of the character that was returned.  | 
382  |  |     //  | 
383  | 0  |     UChar32     cPrev;    // The character preceding cCurr, which is what we will return.  | 
384  |  |  | 
385  |  |     // Address the chunk containing the position preceding the incoming index  | 
386  |  |     // A tricky edge case:  | 
387  |  |     //   We try to test the requested native index against the chunkNativeStart to determine  | 
388  |  |     //    whether the character preceding the one at the index is in the current chunk.  | 
389  |  |     //    BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the  | 
390  |  |     //    requested index is on something other than the first position of the first char.  | 
391  |  |     //  | 
392  | 0  |     if(index<=ut->chunkNativeStart || index>ut->chunkNativeLimit) { | 
393  |  |         // Requested native index is outside of the current chunk.  | 
394  | 0  |         if(!ut->pFuncs->access(ut, index, FALSE)) { | 
395  |  |             // no chunk available here  | 
396  | 0  |             return U_SENTINEL;  | 
397  | 0  |         }  | 
398  | 0  |     } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) { | 
399  |  |         // Direct UTF-16 indexing.  | 
400  | 0  |         ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);  | 
401  | 0  |     } else { | 
402  | 0  |         ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);  | 
403  | 0  |         if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE)) { | 
404  |  |             // no chunk available here  | 
405  | 0  |             return U_SENTINEL;  | 
406  | 0  |         }  | 
407  | 0  |     }  | 
408  |  |  | 
409  |  |     //  | 
410  |  |     // Simple case with no surrogates.  | 
411  |  |     //  | 
412  | 0  |     ut->chunkOffset--;  | 
413  | 0  |     cPrev = ut->chunkContents[ut->chunkOffset];  | 
414  |  | 
  | 
415  | 0  |     if (U16_IS_SURROGATE(cPrev)) { | 
416  |  |         // Possible supplementary.  Many edge cases.  | 
417  |  |         // Let other functions do the heavy lifting.  | 
418  | 0  |         utext_setNativeIndex(ut, index);  | 
419  | 0  |         cPrev = utext_previous32(ut);  | 
420  | 0  |     }  | 
421  | 0  |     return cPrev;  | 
422  | 0  | }  | 
423  |  |  | 
424  |  |  | 
425  |  | U_CAPI int32_t U_EXPORT2  | 
426  |  | utext_extract(UText *ut,  | 
427  |  |              int64_t start, int64_t limit,  | 
428  |  |              UChar *dest, int32_t destCapacity,  | 
429  | 0  |              UErrorCode *status) { | 
430  | 0  |                  return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status);  | 
431  | 0  |              }  | 
432  |  |  | 
433  |  |  | 
434  |  |  | 
435  |  | U_CAPI UBool U_EXPORT2  | 
436  | 0  | utext_equals(const UText *a, const UText *b) { | 
437  | 0  |     if (a==NULL || b==NULL ||  | 
438  | 0  |         a->magic != UTEXT_MAGIC ||  | 
439  | 0  |         b->magic != UTEXT_MAGIC) { | 
440  |  |             // Null or invalid arguments don't compare equal to anything.  | 
441  | 0  |             return FALSE;  | 
442  | 0  |     }  | 
443  |  |  | 
444  | 0  |     if (a->pFuncs != b->pFuncs) { | 
445  |  |         // Different types of text providers.  | 
446  | 0  |         return FALSE;  | 
447  | 0  |     }  | 
448  |  |  | 
449  | 0  |     if (a->context != b->context) { | 
450  |  |         // Different sources (different strings)  | 
451  | 0  |         return FALSE;  | 
452  | 0  |     }  | 
453  | 0  |     if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) { | 
454  |  |         // Different current position in the string.  | 
455  | 0  |         return FALSE;  | 
456  | 0  |     }  | 
457  |  |  | 
458  | 0  |     return TRUE;  | 
459  | 0  | }  | 
460  |  |  | 
461  |  | U_CAPI UBool U_EXPORT2  | 
462  |  | utext_isWritable(const UText *ut)  | 
463  | 0  | { | 
464  | 0  |     UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0;  | 
465  | 0  |     return b;  | 
466  | 0  | }  | 
467  |  |  | 
468  |  |  | 
469  |  | U_CAPI void U_EXPORT2  | 
470  | 0  | utext_freeze(UText *ut) { | 
471  |  |     // Zero out the WRITABLE flag.  | 
472  | 0  |     ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE));  | 
473  | 0  | }  | 
474  |  |  | 
475  |  |  | 
476  |  | U_CAPI UBool U_EXPORT2  | 
477  |  | utext_hasMetaData(const UText *ut)  | 
478  | 0  | { | 
479  | 0  |     UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0;  | 
480  | 0  |     return b;  | 
481  | 0  | }  | 
482  |  |  | 
483  |  |  | 
484  |  |  | 
485  |  | U_CAPI int32_t U_EXPORT2  | 
486  |  | utext_replace(UText *ut,  | 
487  |  |              int64_t nativeStart, int64_t nativeLimit,  | 
488  |  |              const UChar *replacementText, int32_t replacementLength,  | 
489  |  |              UErrorCode *status)  | 
490  | 0  | { | 
491  | 0  |     if (U_FAILURE(*status)) { | 
492  | 0  |         return 0;  | 
493  | 0  |     }  | 
494  | 0  |     if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) { | 
495  | 0  |         *status = U_NO_WRITE_PERMISSION;  | 
496  | 0  |         return 0;  | 
497  | 0  |     }  | 
498  | 0  |     int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status);  | 
499  | 0  |     return i;  | 
500  | 0  | }  | 
501  |  |  | 
502  |  | U_CAPI void U_EXPORT2  | 
503  |  | utext_copy(UText *ut,  | 
504  |  |           int64_t nativeStart, int64_t nativeLimit,  | 
505  |  |           int64_t destIndex,  | 
506  |  |           UBool move,  | 
507  |  |           UErrorCode *status)  | 
508  | 0  | { | 
509  | 0  |     if (U_FAILURE(*status)) { | 
510  | 0  |         return;  | 
511  | 0  |     }  | 
512  | 0  |     if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) { | 
513  | 0  |         *status = U_NO_WRITE_PERMISSION;  | 
514  | 0  |         return;  | 
515  | 0  |     }  | 
516  | 0  |     ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status);  | 
517  | 0  | }  | 
518  |  |  | 
519  |  |  | 
520  |  |  | 
521  |  | U_CAPI UText * U_EXPORT2  | 
522  | 0  | utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) { | 
523  | 0  |     if (U_FAILURE(*status)) { | 
524  | 0  |         return dest;  | 
525  | 0  |     }  | 
526  | 0  |     UText *result = src->pFuncs->clone(dest, src, deep, status);  | 
527  | 0  |     if (U_FAILURE(*status)) { | 
528  | 0  |         return result;  | 
529  | 0  |     }  | 
530  | 0  |     if (result == NULL) { | 
531  | 0  |         *status = U_MEMORY_ALLOCATION_ERROR;  | 
532  | 0  |         return result;  | 
533  | 0  |     }  | 
534  | 0  |     if (readOnly) { | 
535  | 0  |         utext_freeze(result);  | 
536  | 0  |     }  | 
537  | 0  |     return result;  | 
538  | 0  | }  | 
539  |  |  | 
540  |  |  | 
541  |  |  | 
542  |  | //------------------------------------------------------------------------------  | 
543  |  | //  | 
544  |  | //   UText common functions implementation  | 
545  |  | //  | 
546  |  | //------------------------------------------------------------------------------  | 
547  |  |  | 
548  |  | //  | 
549  |  | //  UText.flags bit definitions  | 
550  |  | //  | 
551  |  | enum { | 
552  |  |     UTEXT_HEAP_ALLOCATED  = 1,      //  1 if ICU has allocated this UText struct on the heap.  | 
553  |  |                                     //  0 if caller provided storage for the UText.  | 
554  |  |  | 
555  |  |     UTEXT_EXTRA_HEAP_ALLOCATED = 2, //  1 if ICU has allocated extra storage as a separate  | 
556  |  |                                     //     heap block.  | 
557  |  |                                     //  0 if there is no separate allocation.  Either no extra  | 
558  |  |                                     //     storage was requested, or it is appended to the end  | 
559  |  |                                     //     of the main UText storage.  | 
560  |  |  | 
561  |  |     UTEXT_OPEN = 4                  //  1 if this UText is currently open  | 
562  |  |                                     //  0 if this UText is not open.  | 
563  |  | };  | 
564  |  |  | 
565  |  |  | 
566  |  | //  | 
567  |  | //  Extended form of a UText.  The purpose is to aid in computing the total size required  | 
568  |  | //    when a provider asks for a UText to be allocated with extra storage.  | 
569  |  |  | 
570  |  | struct ExtendedUText { | 
571  |  |     UText               ut;  | 
572  |  |     std::max_align_t    extension;  | 
573  |  | };  | 
574  |  |  | 
575  |  | static const UText emptyText = UTEXT_INITIALIZER;  | 
576  |  |  | 
577  |  | U_CAPI UText * U_EXPORT2  | 
578  | 0  | utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) { | 
579  | 0  |     if (U_FAILURE(*status)) { | 
580  | 0  |         return ut;  | 
581  | 0  |     }  | 
582  |  |  | 
583  | 0  |     if (ut == NULL) { | 
584  |  |         // We need to heap-allocate storage for the new UText  | 
585  | 0  |         int32_t spaceRequired = sizeof(UText);  | 
586  | 0  |         if (extraSpace > 0) { | 
587  | 0  |             spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(std::max_align_t);  | 
588  | 0  |         }  | 
589  | 0  |         ut = (UText *)uprv_malloc(spaceRequired);  | 
590  | 0  |         if (ut == NULL) { | 
591  | 0  |             *status = U_MEMORY_ALLOCATION_ERROR;  | 
592  | 0  |             return NULL;  | 
593  | 0  |         } else { | 
594  | 0  |             *ut = emptyText;  | 
595  | 0  |             ut->flags |= UTEXT_HEAP_ALLOCATED;  | 
596  | 0  |             if (spaceRequired>0) { | 
597  | 0  |                 ut->extraSize = extraSpace;  | 
598  | 0  |                 ut->pExtra    = &((ExtendedUText *)ut)->extension;  | 
599  | 0  |             }  | 
600  | 0  |         }  | 
601  | 0  |     } else { | 
602  |  |         // We have been supplied with an already existing UText.  | 
603  |  |         // Verify that it really appears to be a UText.  | 
604  | 0  |         if (ut->magic != UTEXT_MAGIC) { | 
605  | 0  |             *status = U_ILLEGAL_ARGUMENT_ERROR;  | 
606  | 0  |             return ut;  | 
607  | 0  |         }  | 
608  |  |         // If the ut is already open and there's a provider supplied close  | 
609  |  |         //   function, call it.  | 
610  | 0  |         if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL)  { | 
611  | 0  |             ut->pFuncs->close(ut);  | 
612  | 0  |         }  | 
613  | 0  |         ut->flags &= ~UTEXT_OPEN;  | 
614  |  |  | 
615  |  |         // If extra space was requested by our caller, check whether  | 
616  |  |         //   sufficient already exists, and allocate new if needed.  | 
617  | 0  |         if (extraSpace > ut->extraSize) { | 
618  |  |             // Need more space.  If there is existing separately allocated space,  | 
619  |  |             //   delete it first, then allocate new space.  | 
620  | 0  |             if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) { | 
621  | 0  |                 uprv_free(ut->pExtra);  | 
622  | 0  |                 ut->extraSize = 0;  | 
623  | 0  |             }  | 
624  | 0  |             ut->pExtra = uprv_malloc(extraSpace);  | 
625  | 0  |             if (ut->pExtra == NULL) { | 
626  | 0  |                 *status = U_MEMORY_ALLOCATION_ERROR;  | 
627  | 0  |             } else { | 
628  | 0  |                 ut->extraSize = extraSpace;  | 
629  | 0  |                 ut->flags |= UTEXT_EXTRA_HEAP_ALLOCATED;  | 
630  | 0  |             }  | 
631  | 0  |         }  | 
632  | 0  |     }  | 
633  | 0  |     if (U_SUCCESS(*status)) { | 
634  | 0  |         ut->flags |= UTEXT_OPEN;  | 
635  |  |  | 
636  |  |         // Initialize all remaining fields of the UText.  | 
637  |  |         //  | 
638  | 0  |         ut->context             = NULL;  | 
639  | 0  |         ut->chunkContents       = NULL;  | 
640  | 0  |         ut->p                   = NULL;  | 
641  | 0  |         ut->q                   = NULL;  | 
642  | 0  |         ut->r                   = NULL;  | 
643  | 0  |         ut->a                   = 0;  | 
644  | 0  |         ut->b                   = 0;  | 
645  | 0  |         ut->c                   = 0;  | 
646  | 0  |         ut->chunkOffset         = 0;  | 
647  | 0  |         ut->chunkLength         = 0;  | 
648  | 0  |         ut->chunkNativeStart    = 0;  | 
649  | 0  |         ut->chunkNativeLimit    = 0;  | 
650  | 0  |         ut->nativeIndexingLimit = 0;  | 
651  | 0  |         ut->providerProperties  = 0;  | 
652  | 0  |         ut->privA               = 0;  | 
653  | 0  |         ut->privB               = 0;  | 
654  | 0  |         ut->privC               = 0;  | 
655  | 0  |         ut->privP               = NULL;  | 
656  | 0  |         if (ut->pExtra!=NULL && ut->extraSize>0)  | 
657  | 0  |             uprv_memset(ut->pExtra, 0, ut->extraSize);  | 
658  |  | 
  | 
659  | 0  |     }  | 
660  | 0  |     return ut;  | 
661  | 0  | }  | 
662  |  |  | 
663  |  |  | 
664  |  | U_CAPI UText * U_EXPORT2  | 
665  | 0  | utext_close(UText *ut) { | 
666  | 0  |     if (ut==NULL ||  | 
667  | 0  |         ut->magic != UTEXT_MAGIC ||  | 
668  | 0  |         (ut->flags & UTEXT_OPEN) == 0)  | 
669  | 0  |     { | 
670  |  |         // The supplied ut is not an open UText.  | 
671  |  |         // Do nothing.  | 
672  | 0  |         return ut;  | 
673  | 0  |     }  | 
674  |  |  | 
675  |  |     // If the provider gave us a close function, call it now.  | 
676  |  |     // This will clean up anything allocated specifically by the provider.  | 
677  | 0  |     if (ut->pFuncs->close != NULL) { | 
678  | 0  |         ut->pFuncs->close(ut);  | 
679  | 0  |     }  | 
680  | 0  |     ut->flags &= ~UTEXT_OPEN;  | 
681  |  |  | 
682  |  |     // If we (the framework) allocated the UText or subsidiary storage,  | 
683  |  |     //   delete it.  | 
684  | 0  |     if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) { | 
685  | 0  |         uprv_free(ut->pExtra);  | 
686  | 0  |         ut->pExtra = NULL;  | 
687  | 0  |         ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED;  | 
688  | 0  |         ut->extraSize = 0;  | 
689  | 0  |     }  | 
690  |  |  | 
691  |  |     // Zero out function table of the closed UText.  This is a defensive move,  | 
692  |  |     //   intended to cause applications that inadvertently use a closed  | 
693  |  |     //   utext to crash with null pointer errors.  | 
694  | 0  |     ut->pFuncs        = NULL;  | 
695  |  | 
  | 
696  | 0  |     if (ut->flags & UTEXT_HEAP_ALLOCATED) { | 
697  |  |         // This UText was allocated by UText setup.  We need to free it.  | 
698  |  |         // Clear magic, so we can detect if the user messes up and immediately  | 
699  |  |         //  tries to reopen another UText using the deleted storage.  | 
700  | 0  |         ut->magic = 0;  | 
701  | 0  |         uprv_free(ut);  | 
702  | 0  |         ut = NULL;  | 
703  | 0  |     }  | 
704  | 0  |     return ut;  | 
705  | 0  | }  | 
706  |  |  | 
707  |  |  | 
708  |  |  | 
709  |  |  | 
710  |  | //  | 
711  |  | // invalidateChunk   Reset a chunk to have no contents, so that the next call  | 
712  |  | //                   to access will cause new data to load.  | 
713  |  | //                   This is needed when copy/move/replace operate directly on the  | 
714  |  | //                   backing text, potentially putting it out of sync with the  | 
715  |  | //                   contents in the chunk.  | 
716  |  | //  | 
717  |  | static void  | 
718  | 0  | invalidateChunk(UText *ut) { | 
719  | 0  |     ut->chunkLength = 0;  | 
720  | 0  |     ut->chunkNativeLimit = 0;  | 
721  | 0  |     ut->chunkNativeStart = 0;  | 
722  | 0  |     ut->chunkOffset = 0;  | 
723  | 0  |     ut->nativeIndexingLimit = 0;  | 
724  | 0  | }  | 
725  |  |  | 
726  |  | //  | 
727  |  | // pinIndex        Do range pinning on a native index parameter.  | 
728  |  | //                 64 bit pinning is done in place.  | 
729  |  | //                 32 bit truncated result is returned as a convenience for  | 
730  |  | //                        use in providers that don't need 64 bits.  | 
731  |  | static int32_t  | 
732  | 0  | pinIndex(int64_t &index, int64_t limit) { | 
733  | 0  |     if (index<0) { | 
734  | 0  |         index = 0;  | 
735  | 0  |     } else if (index > limit) { | 
736  | 0  |         index = limit;  | 
737  | 0  |     }  | 
738  | 0  |     return (int32_t)index;  | 
739  | 0  | }  | 
740  |  |  | 
741  |  |  | 
742  |  | U_CDECL_BEGIN  | 
743  |  |  | 
744  |  | //  | 
745  |  | // Pointer relocation function,  | 
746  |  | //   a utility used by shallow clone.  | 
747  |  | //   Adjust a pointer that refers to something within one UText (the source)  | 
748  |  | //   to refer to the same relative offset within a another UText (the target)  | 
749  |  | //  | 
750  | 0  | static void adjustPointer(UText *dest, const void **destPtr, const UText *src) { | 
751  |  |     // convert all pointers to (char *) so that byte address arithmetic will work.  | 
752  | 0  |     char  *dptr = (char *)*destPtr;  | 
753  | 0  |     char  *dUText = (char *)dest;  | 
754  | 0  |     char  *sUText = (char *)src;  | 
755  |  | 
  | 
756  | 0  |     if (dptr >= (char *)src->pExtra && dptr < ((char*)src->pExtra)+src->extraSize) { | 
757  |  |         // target ptr was to something within the src UText's pExtra storage.  | 
758  |  |         //   relocate it into the target UText's pExtra region.  | 
759  | 0  |         *destPtr = ((char *)dest->pExtra) + (dptr - (char *)src->pExtra);  | 
760  | 0  |     } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) { | 
761  |  |         // target ptr was pointing to somewhere within the source UText itself.  | 
762  |  |         //   Move it to the same offset within the target UText.  | 
763  | 0  |         *destPtr = dUText + (dptr-sUText);  | 
764  | 0  |     }  | 
765  | 0  | }  | 
766  |  |  | 
767  |  |  | 
768  |  | //  | 
769  |  | //  Clone.  This is a generic copy-the-utext-by-value clone function that can be  | 
770  |  | //          used as-is with some utext types, and as a helper by other clones.  | 
771  |  | //  | 
772  |  | static UText * U_CALLCONV  | 
773  | 0  | shallowTextClone(UText * dest, const UText * src, UErrorCode * status) { | 
774  | 0  |     if (U_FAILURE(*status)) { | 
775  | 0  |         return NULL;  | 
776  | 0  |     }  | 
777  | 0  |     int32_t  srcExtraSize = src->extraSize;  | 
778  |  |  | 
779  |  |     //  | 
780  |  |     // Use the generic text_setup to allocate storage if required.  | 
781  |  |     //  | 
782  | 0  |     dest = utext_setup(dest, srcExtraSize, status);  | 
783  | 0  |     if (U_FAILURE(*status)) { | 
784  | 0  |         return dest;  | 
785  | 0  |     }  | 
786  |  |  | 
787  |  |     //  | 
788  |  |     //  flags (how the UText was allocated) and the pointer to the  | 
789  |  |     //   extra storage must retain the values in the cloned utext that  | 
790  |  |     //   were set up by utext_setup.  Save them separately before  | 
791  |  |     //   copying the whole struct.  | 
792  |  |     //  | 
793  | 0  |     void *destExtra = dest->pExtra;  | 
794  | 0  |     int32_t flags   = dest->flags;  | 
795  |  |  | 
796  |  |  | 
797  |  |     //  | 
798  |  |     //  Copy the whole UText struct by value.  | 
799  |  |     //  Any "Extra" storage is copied also.  | 
800  |  |     //  | 
801  | 0  |     int sizeToCopy = src->sizeOfStruct;  | 
802  | 0  |     if (sizeToCopy > dest->sizeOfStruct) { | 
803  | 0  |         sizeToCopy = dest->sizeOfStruct;  | 
804  | 0  |     }  | 
805  | 0  |     uprv_memcpy(dest, src, sizeToCopy);  | 
806  | 0  |     dest->pExtra = destExtra;  | 
807  | 0  |     dest->flags  = flags;  | 
808  | 0  |     if (srcExtraSize > 0) { | 
809  | 0  |         uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize);  | 
810  | 0  |     }  | 
811  |  |  | 
812  |  |     //  | 
813  |  |     // Relocate any pointers in the target that refer to the UText itself  | 
814  |  |     //   to point to the cloned copy rather than the original source.  | 
815  |  |     //  | 
816  | 0  |     adjustPointer(dest, &dest->context, src);  | 
817  | 0  |     adjustPointer(dest, &dest->p, src);  | 
818  | 0  |     adjustPointer(dest, &dest->q, src);  | 
819  | 0  |     adjustPointer(dest, &dest->r, src);  | 
820  | 0  |     adjustPointer(dest, (const void **)&dest->chunkContents, src);  | 
821  |  |  | 
822  |  |     // The newly shallow-cloned UText does _not_ own the underlying storage for the text.  | 
823  |  |     // (The source for the clone may or may not have owned the text.)  | 
824  |  | 
  | 
825  | 0  |     dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);  | 
826  |  | 
  | 
827  | 0  |     return dest;  | 
828  | 0  | }  | 
829  |  |  | 
830  |  |  | 
831  |  | U_CDECL_END  | 
832  |  |  | 
833  |  |  | 
834  |  |  | 
835  |  | //------------------------------------------------------------------------------  | 
836  |  | //  | 
837  |  | //     UText implementation for UTF-8 char * strings (read-only)  | 
838  |  | //     Limitation:  string length must be <= 0x7fffffff in length.  | 
839  |  | //                  (length must for in an int32_t variable)  | 
840  |  | //  | 
841  |  | //         Use of UText data members:  | 
842  |  | //              context    pointer to UTF-8 string  | 
843  |  | //              utext.b    is the input string length (bytes).  | 
844  |  | //              utext.c    Length scanned so far in string  | 
845  |  | //                           (for optimizing finding length of zero terminated strings.)  | 
846  |  | //              utext.p    pointer to the current buffer  | 
847  |  | //              utext.q    pointer to the other buffer.  | 
848  |  | //  | 
849  |  | //------------------------------------------------------------------------------  | 
850  |  |  | 
851  |  | // Chunk size.  | 
852  |  | //     Must be less than 85 (256/3), because of byte mapping from UChar indexes to native indexes.  | 
853  |  | //     Worst case is three native bytes to one UChar.  (Supplemenaries are 4 native bytes  | 
854  |  | //     to two UChars.)  | 
855  |  | //     The longest illegal byte sequence treated as a single error (and converted to U+FFFD)  | 
856  |  | //     is a three-byte sequence (truncated four-byte sequence).  | 
857  |  | //  | 
858  |  | enum { UTF8_TEXT_CHUNK_SIZE=32 }; | 
859  |  |  | 
860  |  | //  | 
861  |  | // UTF8Buf  Two of these structs will be set up in the UText's extra allocated space.  | 
862  |  | //          Each contains the UChar chunk buffer, the to and from native maps, and  | 
863  |  | //          header info.  | 
864  |  | //  | 
865  |  | //     because backwards iteration fills the buffers starting at the end and  | 
866  |  | //     working towards the front, the filled part of the buffers may not begin  | 
867  |  | //     at the start of the available storage for the buffers.  | 
868  |  | //  | 
869  |  | //     Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for  | 
870  |  | //     the last character added being a supplementary, and thus requiring a surrogate  | 
871  |  | //     pair.  Doing this is simpler than checking for the edge case.  | 
872  |  | //  | 
873  |  |  | 
874  |  | struct UTF8Buf { | 
875  |  |     int32_t   bufNativeStart;                        // Native index of first char in UChar buf  | 
876  |  |     int32_t   bufNativeLimit;                        // Native index following last char in buf.  | 
877  |  |     int32_t   bufStartIdx;                           // First filled position in buf.  | 
878  |  |     int32_t   bufLimitIdx;                           // Limit of filled range in buf.  | 
879  |  |     int32_t   bufNILimit;                            // Limit of native indexing part of buf  | 
880  |  |     int32_t   toUCharsMapStart;                      // Native index corresponding to  | 
881  |  |                                                      //   mapToUChars[0].  | 
882  |  |                                                      //   Set to bufNativeStart when filling forwards.  | 
883  |  |                                                      //   Set to computed value when filling backwards.  | 
884  |  |  | 
885  |  |     UChar     buf[UTF8_TEXT_CHUNK_SIZE+4];           // The UChar buffer.  Requires one extra position beyond the  | 
886  |  |                                                      //   the chunk size, to allow for surrogate at the end.  | 
887  |  |                                                      //   Length must be identical to mapToNative array, below,  | 
888  |  |                                                      //   because of the way indexing works when the array is  | 
889  |  |                                                      //   filled backwards during a reverse iteration.  Thus,  | 
890  |  |                                                      //   the additional extra size.  | 
891  |  |     uint8_t   mapToNative[UTF8_TEXT_CHUNK_SIZE+4];   // map UChar index in buf to  | 
892  |  |                                                      //  native offset from bufNativeStart.  | 
893  |  |                                                      //  Requires two extra slots,  | 
894  |  |                                                      //    one for a supplementary starting in the last normal position,  | 
895  |  |                                                      //    and one for an entry for the buffer limit position.  | 
896  |  |     uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to  | 
897  |  |                                                      //   corresponding offset in filled part of buf.  | 
898  |  |     int32_t   align;  | 
899  |  | };  | 
900  |  |  | 
901  |  | U_CDECL_BEGIN  | 
902  |  |  | 
903  |  | //  | 
904  |  | //   utf8TextLength  | 
905  |  | //  | 
906  |  | //        Get the length of the string.  If we don't already know it,  | 
907  |  | //              we'll need to scan for the trailing  nul.  | 
908  |  | //  | 
909  |  | static int64_t U_CALLCONV  | 
910  | 0  | utf8TextLength(UText *ut) { | 
911  | 0  |     if (ut->b < 0) { | 
912  |  |         // Zero terminated string, and we haven't scanned to the end yet.  | 
913  |  |         // Scan it now.  | 
914  | 0  |         const char *r = (const char *)ut->context + ut->c;  | 
915  | 0  |         while (*r != 0) { | 
916  | 0  |             r++;  | 
917  | 0  |         }  | 
918  | 0  |         if ((r - (const char *)ut->context) < 0x7fffffff) { | 
919  | 0  |             ut->b = (int32_t)(r - (const char *)ut->context);  | 
920  | 0  |         } else { | 
921  |  |             // Actual string was bigger (more than 2 gig) than we  | 
922  |  |             //   can handle.  Clip it to 2 GB.  | 
923  | 0  |             ut->b = 0x7fffffff;  | 
924  | 0  |         }  | 
925  | 0  |         ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);  | 
926  | 0  |     }  | 
927  | 0  |     return ut->b;  | 
928  | 0  | }  | 
929  |  |  | 
930  |  |  | 
931  |  |  | 
932  |  |  | 
933  |  |  | 
934  |  |  | 
935  |  | static UBool U_CALLCONV  | 
936  | 0  | utf8TextAccess(UText *ut, int64_t index, UBool forward) { | 
937  |  |     //  | 
938  |  |     //  Apologies to those who are allergic to goto statements.  | 
939  |  |     //    Consider each goto to a labelled block to be the equivalent of  | 
940  |  |     //         call the named block as if it were a function();  | 
941  |  |     //         return;  | 
942  |  |     //  | 
943  | 0  |     const uint8_t *s8=(const uint8_t *)ut->context;  | 
944  | 0  |     UTF8Buf *u8b = NULL;  | 
945  | 0  |     int32_t  length = ut->b;         // Length of original utf-8  | 
946  | 0  |     int32_t  ix= (int32_t)index;     // Requested index, trimmed to 32 bits.  | 
947  | 0  |     int32_t  mapIndex = 0;  | 
948  | 0  |     if (index<0) { | 
949  | 0  |         ix=0;  | 
950  | 0  |     } else if (index > 0x7fffffff) { | 
951  |  |         // Strings with 64 bit lengths not supported by this UTF-8 provider.  | 
952  | 0  |         ix = 0x7fffffff;  | 
953  | 0  |     }  | 
954  |  |  | 
955  |  |     // Pin requested index to the string length.  | 
956  | 0  |     if (ix>length) { | 
957  | 0  |         if (length>=0) { | 
958  | 0  |             ix=length;  | 
959  | 0  |         } else if (ix>=ut->c) { | 
960  |  |             // Zero terminated string, and requested index is beyond  | 
961  |  |             //   the region that has already been scanned.  | 
962  |  |             //   Scan up to either the end of the string or to the  | 
963  |  |             //   requested position, whichever comes first.  | 
964  | 0  |             while (ut->c<ix && s8[ut->c]!=0) { | 
965  | 0  |                 ut->c++;  | 
966  | 0  |             }  | 
967  |  |             //  TODO:  support for null terminated string length > 32 bits.  | 
968  | 0  |             if (s8[ut->c] == 0) { | 
969  |  |                 // We just found the actual length of the string.  | 
970  |  |                 //  Trim the requested index back to that.  | 
971  | 0  |                 ix     = ut->c;  | 
972  | 0  |                 ut->b  = ut->c;  | 
973  | 0  |                 length = ut->c;  | 
974  | 0  |                 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);  | 
975  | 0  |             }  | 
976  | 0  |         }  | 
977  | 0  |     }  | 
978  |  |  | 
979  |  |     //  | 
980  |  |     // Dispatch to the appropriate action for a forward iteration request.  | 
981  |  |     //  | 
982  | 0  |     if (forward) { | 
983  | 0  |         if (ix==ut->chunkNativeLimit) { | 
984  |  |             // Check for normal sequential iteration cases first.  | 
985  | 0  |             if (ix==length) { | 
986  |  |                 // Just reached end of string  | 
987  |  |                 // Don't swap buffers, but do set the  | 
988  |  |                 //   current buffer position.  | 
989  | 0  |                 ut->chunkOffset = ut->chunkLength;  | 
990  | 0  |                 return FALSE;  | 
991  | 0  |             } else { | 
992  |  |                 // End of current buffer.  | 
993  |  |                 //   check whether other buffer already has what we need.  | 
994  | 0  |                 UTF8Buf *altB = (UTF8Buf *)ut->q;  | 
995  | 0  |                 if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) { | 
996  | 0  |                     goto swapBuffers;  | 
997  | 0  |                 }  | 
998  | 0  |             }  | 
999  | 0  |         }  | 
1000  |  |  | 
1001  |  |         // A random access.  Desired index could be in either or niether buf.  | 
1002  |  |         // For optimizing the order of testing, first check for the index  | 
1003  |  |         //    being in the other buffer.  This will be the case for uses that  | 
1004  |  |         //    move back and forth over a fairly limited range  | 
1005  | 0  |         { | 
1006  | 0  |             u8b = (UTF8Buf *)ut->q;   // the alternate buffer  | 
1007  | 0  |             if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) { | 
1008  |  |                 // Requested index is in the other buffer.  | 
1009  | 0  |                 goto swapBuffers;  | 
1010  | 0  |             }  | 
1011  | 0  |             if (ix == length) { | 
1012  |  |                 // Requested index is end-of-string.  | 
1013  |  |                 //   (this is the case of randomly seeking to the end.  | 
1014  |  |                 //    The case of iterating off the end is handled earlier.)  | 
1015  | 0  |                 if (ix == ut->chunkNativeLimit) { | 
1016  |  |                     // Current buffer extends up to the end of the string.  | 
1017  |  |                     //   Leave it as the current buffer.  | 
1018  | 0  |                     ut->chunkOffset = ut->chunkLength;  | 
1019  | 0  |                     return FALSE;  | 
1020  | 0  |                 }  | 
1021  | 0  |                 if (ix == u8b->bufNativeLimit) { | 
1022  |  |                     // Alternate buffer extends to the end of string.  | 
1023  |  |                     //   Swap it in as the current buffer.  | 
1024  | 0  |                     goto swapBuffersAndFail;  | 
1025  | 0  |                 }  | 
1026  |  |  | 
1027  |  |                 // Neither existing buffer extends to the end of the string.  | 
1028  | 0  |                 goto makeStubBuffer;  | 
1029  | 0  |             }  | 
1030  |  |  | 
1031  | 0  |             if (ix<ut->chunkNativeStart || ix>=ut->chunkNativeLimit) { | 
1032  |  |                 // Requested index is in neither buffer.  | 
1033  | 0  |                 goto fillForward;  | 
1034  | 0  |             }  | 
1035  |  |  | 
1036  |  |             // Requested index is in this buffer.  | 
1037  | 0  |             u8b = (UTF8Buf *)ut->p;   // the current buffer  | 
1038  | 0  |             mapIndex = ix - u8b->toUCharsMapStart;  | 
1039  | 0  |             U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));  | 
1040  | 0  |             ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;  | 
1041  | 0  |             return TRUE;  | 
1042  |  | 
  | 
1043  | 0  |         }  | 
1044  | 0  |     }  | 
1045  |  |  | 
1046  |  |  | 
1047  |  |     //  | 
1048  |  |     // Dispatch to the appropriate action for a  | 
1049  |  |     //   Backwards Direction iteration request.  | 
1050  |  |     //  | 
1051  | 0  |     if (ix==ut->chunkNativeStart) { | 
1052  |  |         // Check for normal sequential iteration cases first.  | 
1053  | 0  |         if (ix==0) { | 
1054  |  |             // Just reached the start of string  | 
1055  |  |             // Don't swap buffers, but do set the  | 
1056  |  |             //   current buffer position.  | 
1057  | 0  |             ut->chunkOffset = 0;  | 
1058  | 0  |             return FALSE;  | 
1059  | 0  |         } else { | 
1060  |  |             // Start of current buffer.  | 
1061  |  |             //   check whether other buffer already has what we need.  | 
1062  | 0  |             UTF8Buf *altB = (UTF8Buf *)ut->q;  | 
1063  | 0  |             if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) { | 
1064  | 0  |                 goto swapBuffers;  | 
1065  | 0  |             }  | 
1066  | 0  |         }  | 
1067  | 0  |     }  | 
1068  |  |  | 
1069  |  |     // A random access.  Desired index could be in either or niether buf.  | 
1070  |  |     // For optimizing the order of testing,  | 
1071  |  |     //    Most likely case:  in the other buffer.  | 
1072  |  |     //    Second most likely: in neither buffer.  | 
1073  |  |     //    Unlikely, but must work:  in the current buffer.  | 
1074  | 0  |     u8b = (UTF8Buf *)ut->q;   // the alternate buffer  | 
1075  | 0  |     if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) { | 
1076  |  |         // Requested index is in the other buffer.  | 
1077  | 0  |         goto swapBuffers;  | 
1078  | 0  |     }  | 
1079  |  |     // Requested index is start-of-string.  | 
1080  |  |     //   (this is the case of randomly seeking to the start.  | 
1081  |  |     //    The case of iterating off the start is handled earlier.)  | 
1082  | 0  |     if (ix==0) { | 
1083  | 0  |         if (u8b->bufNativeStart==0) { | 
1084  |  |             // Alternate buffer contains the data for the start string.  | 
1085  |  |             // Make it be the current buffer.  | 
1086  | 0  |             goto swapBuffersAndFail;  | 
1087  | 0  |         } else { | 
1088  |  |             // Request for data before the start of string,  | 
1089  |  |             //   neither buffer is usable.  | 
1090  |  |             //   set up a zero-length buffer.  | 
1091  | 0  |             goto makeStubBuffer;  | 
1092  | 0  |         }  | 
1093  | 0  |     }  | 
1094  |  |  | 
1095  | 0  |     if (ix<=ut->chunkNativeStart || ix>ut->chunkNativeLimit) { | 
1096  |  |         // Requested index is in neither buffer.  | 
1097  | 0  |         goto fillReverse;  | 
1098  | 0  |     }  | 
1099  |  |  | 
1100  |  |     // Requested index is in this buffer.  | 
1101  |  |     //   Set the utf16 buffer index.  | 
1102  | 0  |     u8b = (UTF8Buf *)ut->p;  | 
1103  | 0  |     mapIndex = ix - u8b->toUCharsMapStart;  | 
1104  | 0  |     ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;  | 
1105  | 0  |     if (ut->chunkOffset==0) { | 
1106  |  |         // This occurs when the first character in the text is  | 
1107  |  |         //   a multi-byte UTF-8 char, and the requested index is to  | 
1108  |  |         //   one of the trailing bytes.  Because there is no preceding ,  | 
1109  |  |         //   character, this access fails.  We can't pick up on the  | 
1110  |  |         //   situation sooner because the requested index is not zero.  | 
1111  | 0  |         return FALSE;  | 
1112  | 0  |     } else { | 
1113  | 0  |         return TRUE;  | 
1114  | 0  |     }  | 
1115  |  |  | 
1116  |  |  | 
1117  |  |  | 
1118  | 0  | swapBuffers:  | 
1119  |  |     //  The alternate buffer (ut->q) has the string data that was requested.  | 
1120  |  |     //  Swap the primary and alternate buffers, and set the  | 
1121  |  |     //   chunk index into the new primary buffer.  | 
1122  | 0  |     { | 
1123  | 0  |         u8b   = (UTF8Buf *)ut->q;  | 
1124  | 0  |         ut->q = ut->p;  | 
1125  | 0  |         ut->p = u8b;  | 
1126  | 0  |         ut->chunkContents       = &u8b->buf[u8b->bufStartIdx];  | 
1127  | 0  |         ut->chunkLength         = u8b->bufLimitIdx - u8b->bufStartIdx;  | 
1128  | 0  |         ut->chunkNativeStart    = u8b->bufNativeStart;  | 
1129  | 0  |         ut->chunkNativeLimit    = u8b->bufNativeLimit;  | 
1130  | 0  |         ut->nativeIndexingLimit = u8b->bufNILimit;  | 
1131  |  |  | 
1132  |  |         // Index into the (now current) chunk  | 
1133  |  |         // Use the map to set the chunk index.  It's more trouble than it's worth  | 
1134  |  |         //    to check whether native indexing can be used.  | 
1135  | 0  |         U_ASSERT(ix>=u8b->bufNativeStart);  | 
1136  | 0  |         U_ASSERT(ix<=u8b->bufNativeLimit);  | 
1137  | 0  |         mapIndex = ix - u8b->toUCharsMapStart;  | 
1138  | 0  |         U_ASSERT(mapIndex>=0);  | 
1139  | 0  |         U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars));  | 
1140  | 0  |         ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;  | 
1141  |  | 
  | 
1142  | 0  |         return TRUE;  | 
1143  | 0  |     }  | 
1144  |  |  | 
1145  |  |  | 
1146  | 0  |  swapBuffersAndFail:  | 
1147  |  |     // We got a request for either the start or end of the string,  | 
1148  |  |     //  with iteration continuing in the out-of-bounds direction.  | 
1149  |  |     // The alternate buffer already contains the data up to the  | 
1150  |  |     //  start/end.  | 
1151  |  |     // Swap the buffers, then return failure, indicating that we couldn't  | 
1152  |  |     //  make things correct for continuing the iteration in the requested  | 
1153  |  |     //  direction.  The position & buffer are correct should the  | 
1154  |  |     //  user decide to iterate in the opposite direction.  | 
1155  | 0  |     u8b   = (UTF8Buf *)ut->q;  | 
1156  | 0  |     ut->q = ut->p;  | 
1157  | 0  |     ut->p = u8b;  | 
1158  | 0  |     ut->chunkContents       = &u8b->buf[u8b->bufStartIdx];  | 
1159  | 0  |     ut->chunkLength         = u8b->bufLimitIdx - u8b->bufStartIdx;  | 
1160  | 0  |     ut->chunkNativeStart    = u8b->bufNativeStart;  | 
1161  | 0  |     ut->chunkNativeLimit    = u8b->bufNativeLimit;  | 
1162  | 0  |     ut->nativeIndexingLimit = u8b->bufNILimit;  | 
1163  |  |  | 
1164  |  |     // Index into the (now current) chunk  | 
1165  |  |     //  For this function  (swapBuffersAndFail), the requested index  | 
1166  |  |     //    will always be at either the start or end of the chunk.  | 
1167  | 0  |     if (ix==u8b->bufNativeLimit) { | 
1168  | 0  |         ut->chunkOffset = ut->chunkLength;  | 
1169  | 0  |     } else  { | 
1170  | 0  |         ut->chunkOffset = 0;  | 
1171  | 0  |         U_ASSERT(ix == u8b->bufNativeStart);  | 
1172  | 0  |     }  | 
1173  | 0  |     return FALSE;  | 
1174  |  |  | 
1175  | 0  | makeStubBuffer:  | 
1176  |  |     //   The user has done a seek/access past the start or end  | 
1177  |  |     //   of the string.  Rather than loading data that is likely  | 
1178  |  |     //   to never be used, just set up a zero-length buffer at  | 
1179  |  |     //   the position.  | 
1180  | 0  |     u8b = (UTF8Buf *)ut->q;  | 
1181  | 0  |     u8b->bufNativeStart   = ix;  | 
1182  | 0  |     u8b->bufNativeLimit   = ix;  | 
1183  | 0  |     u8b->bufStartIdx      = 0;  | 
1184  | 0  |     u8b->bufLimitIdx      = 0;  | 
1185  | 0  |     u8b->bufNILimit       = 0;  | 
1186  | 0  |     u8b->toUCharsMapStart = ix;  | 
1187  | 0  |     u8b->mapToNative[0]   = 0;  | 
1188  | 0  |     u8b->mapToUChars[0]   = 0;  | 
1189  | 0  |     goto swapBuffersAndFail;  | 
1190  |  |  | 
1191  |  |  | 
1192  |  |  | 
1193  | 0  | fillForward:  | 
1194  | 0  |     { | 
1195  |  |         // Move the incoming index to a code point boundary.  | 
1196  | 0  |         U8_SET_CP_START(s8, 0, ix);  | 
1197  |  |  | 
1198  |  |         // Swap the UText buffers.  | 
1199  |  |         //  We want to fill what was previously the alternate buffer,  | 
1200  |  |         //  and make what was the current buffer be the new alternate.  | 
1201  | 0  |         UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;  | 
1202  | 0  |         ut->q = ut->p;  | 
1203  | 0  |         ut->p = u8b_swap;  | 
1204  |  | 
  | 
1205  | 0  |         int32_t strLen = ut->b;  | 
1206  | 0  |         UBool   nulTerminated = FALSE;  | 
1207  | 0  |         if (strLen < 0) { | 
1208  | 0  |             strLen = 0x7fffffff;  | 
1209  | 0  |             nulTerminated = TRUE;  | 
1210  | 0  |         }  | 
1211  |  | 
  | 
1212  | 0  |         UChar   *buf = u8b_swap->buf;  | 
1213  | 0  |         uint8_t *mapToNative  = u8b_swap->mapToNative;  | 
1214  | 0  |         uint8_t *mapToUChars  = u8b_swap->mapToUChars;  | 
1215  | 0  |         int32_t  destIx       = 0;  | 
1216  | 0  |         int32_t  srcIx        = ix;  | 
1217  | 0  |         UBool    seenNonAscii = FALSE;  | 
1218  | 0  |         UChar32  c = 0;  | 
1219  |  |  | 
1220  |  |         // Fill the chunk buffer and mapping arrays.  | 
1221  | 0  |         while (destIx<UTF8_TEXT_CHUNK_SIZE) { | 
1222  | 0  |             c = s8[srcIx];  | 
1223  | 0  |             if (c>0 && c<0x80) { | 
1224  |  |                 // Special case ASCII range for speed.  | 
1225  |  |                 //   zero is excluded to simplify bounds checking.  | 
1226  | 0  |                 buf[destIx] = (UChar)c;  | 
1227  | 0  |                 mapToNative[destIx]    = (uint8_t)(srcIx - ix);  | 
1228  | 0  |                 mapToUChars[srcIx-ix]  = (uint8_t)destIx;  | 
1229  | 0  |                 srcIx++;  | 
1230  | 0  |                 destIx++;  | 
1231  | 0  |             } else { | 
1232  |  |                 // General case, handle everything.  | 
1233  | 0  |                 if (seenNonAscii == FALSE) { | 
1234  | 0  |                     seenNonAscii = TRUE;  | 
1235  | 0  |                     u8b_swap->bufNILimit = destIx;  | 
1236  | 0  |                 }  | 
1237  |  | 
  | 
1238  | 0  |                 int32_t  cIx      = srcIx;  | 
1239  | 0  |                 int32_t  dIx      = destIx;  | 
1240  | 0  |                 int32_t  dIxSaved = destIx;  | 
1241  | 0  |                 U8_NEXT_OR_FFFD(s8, srcIx, strLen, c);  | 
1242  | 0  |                 if (c==0 && nulTerminated) { | 
1243  | 0  |                     srcIx--;  | 
1244  | 0  |                     break;  | 
1245  | 0  |                 }  | 
1246  |  |  | 
1247  | 0  |                 U16_APPEND_UNSAFE(buf, destIx, c);  | 
1248  | 0  |                 do { | 
1249  | 0  |                     mapToNative[dIx++] = (uint8_t)(cIx - ix);  | 
1250  | 0  |                 } while (dIx < destIx);  | 
1251  |  | 
  | 
1252  | 0  |                 do { | 
1253  | 0  |                     mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;  | 
1254  | 0  |                 } while (cIx < srcIx);  | 
1255  | 0  |             }  | 
1256  | 0  |             if (srcIx>=strLen) { | 
1257  | 0  |                 break;  | 
1258  | 0  |             }  | 
1259  |  | 
  | 
1260  | 0  |         }  | 
1261  |  |  | 
1262  |  |         //  store Native <--> Chunk Map entries for the end of the buffer.  | 
1263  |  |         //    There is no actual character here, but the index position is valid.  | 
1264  | 0  |         mapToNative[destIx]     = (uint8_t)(srcIx - ix);  | 
1265  | 0  |         mapToUChars[srcIx - ix] = (uint8_t)destIx;  | 
1266  |  |  | 
1267  |  |         //  fill in Buffer descriptor  | 
1268  | 0  |         u8b_swap->bufNativeStart     = ix;  | 
1269  | 0  |         u8b_swap->bufNativeLimit     = srcIx;  | 
1270  | 0  |         u8b_swap->bufStartIdx        = 0;  | 
1271  | 0  |         u8b_swap->bufLimitIdx        = destIx;  | 
1272  | 0  |         if (seenNonAscii == FALSE) { | 
1273  | 0  |             u8b_swap->bufNILimit     = destIx;  | 
1274  | 0  |         }  | 
1275  | 0  |         u8b_swap->toUCharsMapStart   = u8b_swap->bufNativeStart;  | 
1276  |  |  | 
1277  |  |         // Set UText chunk to refer to this buffer.  | 
1278  | 0  |         ut->chunkContents       = buf;  | 
1279  | 0  |         ut->chunkOffset         = 0;  | 
1280  | 0  |         ut->chunkLength         = u8b_swap->bufLimitIdx;  | 
1281  | 0  |         ut->chunkNativeStart    = u8b_swap->bufNativeStart;  | 
1282  | 0  |         ut->chunkNativeLimit    = u8b_swap->bufNativeLimit;  | 
1283  | 0  |         ut->nativeIndexingLimit = u8b_swap->bufNILimit;  | 
1284  |  |  | 
1285  |  |         // For zero terminated strings, keep track of the maximum point  | 
1286  |  |         //   scanned so far.  | 
1287  | 0  |         if (nulTerminated && srcIx>ut->c) { | 
1288  | 0  |             ut->c = srcIx;  | 
1289  | 0  |             if (c==0) { | 
1290  |  |                 // We scanned to the end.  | 
1291  |  |                 //   Remember the actual length.  | 
1292  | 0  |                 ut->b = srcIx;  | 
1293  | 0  |                 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);  | 
1294  | 0  |             }  | 
1295  | 0  |         }  | 
1296  | 0  |         return TRUE;  | 
1297  | 0  |     }  | 
1298  |  |  | 
1299  |  |  | 
1300  | 0  | fillReverse:  | 
1301  | 0  |     { | 
1302  |  |         // Move the incoming index to a code point boundary.  | 
1303  |  |         // Can only do this if the incoming index is somewhere in the interior of the string.  | 
1304  |  |         //   If index is at the end, there is no character there to look at.  | 
1305  | 0  |         if (ix != ut->b) { | 
1306  |  |             // Note: this function will only move the index back if it is on a trail byte  | 
1307  |  |             //       and there is a preceding lead byte and the sequence from the lead   | 
1308  |  |             //       through this trail could be part of a valid UTF-8 sequence  | 
1309  |  |             //       Otherwise the index remains unchanged.  | 
1310  | 0  |             U8_SET_CP_START(s8, 0, ix);  | 
1311  | 0  |         }  | 
1312  |  |  | 
1313  |  |         // Swap the UText buffers.  | 
1314  |  |         //  We want to fill what was previously the alternate buffer,  | 
1315  |  |         //  and make what was the current buffer be the new alternate.  | 
1316  | 0  |         UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;  | 
1317  | 0  |         ut->q = ut->p;  | 
1318  | 0  |         ut->p = u8b_swap;  | 
1319  |  | 
  | 
1320  | 0  |         UChar   *buf = u8b_swap->buf;  | 
1321  | 0  |         uint8_t *mapToNative = u8b_swap->mapToNative;  | 
1322  | 0  |         uint8_t *mapToUChars = u8b_swap->mapToUChars;  | 
1323  | 0  |         int32_t  toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;  | 
1324  |  |         // Note that toUCharsMapStart can be negative. Happens when the remaining  | 
1325  |  |         // text from current position to the beginning is less than the buffer size.  | 
1326  |  |         // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.  | 
1327  | 0  |         int32_t  destIx = UTF8_TEXT_CHUNK_SIZE+2;   // Start in the overflow region  | 
1328  |  |                                                     //   at end of buffer to leave room  | 
1329  |  |                                                     //   for a surrogate pair at the  | 
1330  |  |                                                     //   buffer start.  | 
1331  | 0  |         int32_t  srcIx  = ix;  | 
1332  | 0  |         int32_t  bufNILimit = destIx;  | 
1333  | 0  |         UChar32   c;  | 
1334  |  |  | 
1335  |  |         // Map to/from Native Indexes, fill in for the position at the end of  | 
1336  |  |         //   the buffer.  | 
1337  |  |         //  | 
1338  | 0  |         mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);  | 
1339  | 0  |         mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;  | 
1340  |  |  | 
1341  |  |         // Fill the chunk buffer  | 
1342  |  |         // Work backwards, filling from the end of the buffer towards the front.  | 
1343  |  |         //  | 
1344  | 0  |         while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) { | 
1345  | 0  |             srcIx--;  | 
1346  | 0  |             destIx--;  | 
1347  |  |  | 
1348  |  |             // Get last byte of the UTF-8 character  | 
1349  | 0  |             c = s8[srcIx];  | 
1350  | 0  |             if (c<0x80) { | 
1351  |  |                 // Special case ASCII range for speed.  | 
1352  | 0  |                 buf[destIx] = (UChar)c;  | 
1353  | 0  |                 U_ASSERT(toUCharsMapStart <= srcIx);  | 
1354  | 0  |                 mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;  | 
1355  | 0  |                 mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);  | 
1356  | 0  |             } else { | 
1357  |  |                 // General case, handle everything non-ASCII.  | 
1358  |  | 
  | 
1359  | 0  |                 int32_t  sIx      = srcIx;  // ix of last byte of multi-byte u8 char  | 
1360  |  |  | 
1361  |  |                 // Get the full character from the UTF8 string.  | 
1362  |  |                 //   use code derived from the macros in utf8.h  | 
1363  |  |                 //   Leaves srcIx pointing at the first byte of the UTF-8 char.  | 
1364  |  |                 //  | 
1365  | 0  |                 c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -3);  | 
1366  |  |                 // leaves srcIx at first byte of the multi-byte char.  | 
1367  |  |  | 
1368  |  |                 // Store the character in UTF-16 buffer.  | 
1369  | 0  |                 if (c<0x10000) { | 
1370  | 0  |                     buf[destIx] = (UChar)c;  | 
1371  | 0  |                     mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);  | 
1372  | 0  |                 } else { | 
1373  | 0  |                     buf[destIx]         = U16_TRAIL(c);  | 
1374  | 0  |                     mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);  | 
1375  | 0  |                     buf[--destIx]       = U16_LEAD(c);  | 
1376  | 0  |                     mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);  | 
1377  | 0  |                 }  | 
1378  |  |  | 
1379  |  |                 // Fill in the map from native indexes to UChars buf index.  | 
1380  | 0  |                 do { | 
1381  | 0  |                     mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;  | 
1382  | 0  |                 } while (sIx >= srcIx);  | 
1383  | 0  |                 U_ASSERT(toUCharsMapStart <= (srcIx+1));  | 
1384  |  |  | 
1385  |  |                 // Set native indexing limit to be the current position.  | 
1386  |  |                 //   We are processing a non-ascii, non-native-indexing char now;  | 
1387  |  |                 //     the limit will be here if the rest of the chars to be  | 
1388  |  |                 //     added to this buffer are ascii.  | 
1389  | 0  |                 bufNILimit = destIx;  | 
1390  | 0  |             }  | 
1391  | 0  |         }  | 
1392  | 0  |         u8b_swap->bufNativeStart     = srcIx;  | 
1393  | 0  |         u8b_swap->bufNativeLimit     = ix;  | 
1394  | 0  |         u8b_swap->bufStartIdx        = destIx;  | 
1395  | 0  |         u8b_swap->bufLimitIdx        = UTF8_TEXT_CHUNK_SIZE+2;  | 
1396  | 0  |         u8b_swap->bufNILimit         = bufNILimit - u8b_swap->bufStartIdx;  | 
1397  | 0  |         u8b_swap->toUCharsMapStart   = toUCharsMapStart;  | 
1398  |  | 
  | 
1399  | 0  |         ut->chunkContents       = &buf[u8b_swap->bufStartIdx];  | 
1400  | 0  |         ut->chunkLength         = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx;  | 
1401  | 0  |         ut->chunkOffset         = ut->chunkLength;  | 
1402  | 0  |         ut->chunkNativeStart    = u8b_swap->bufNativeStart;  | 
1403  | 0  |         ut->chunkNativeLimit    = u8b_swap->bufNativeLimit;  | 
1404  | 0  |         ut->nativeIndexingLimit = u8b_swap->bufNILimit;  | 
1405  | 0  |         return TRUE;  | 
1406  | 0  |     }  | 
1407  |  | 
  | 
1408  | 0  | }  | 
1409  |  |  | 
1410  |  |  | 
1411  |  |  | 
1412  |  | //  | 
1413  |  | //  This is a slightly modified copy of u_strFromUTF8,  | 
1414  |  | //     Inserts a Replacement Char rather than failing on invalid UTF-8  | 
1415  |  | //     Removes unnecessary features.  | 
1416  |  | //  | 
1417  |  | static UChar*  | 
1418  |  | utext_strFromUTF8(UChar *dest,  | 
1419  |  |               int32_t destCapacity,  | 
1420  |  |               int32_t *pDestLength,  | 
1421  |  |               const char* src,  | 
1422  |  |               int32_t srcLength,        // required.  NUL terminated not supported.  | 
1423  |  |               UErrorCode *pErrorCode  | 
1424  |  |               )  | 
1425  | 0  | { | 
1426  |  | 
  | 
1427  | 0  |     UChar *pDest = dest;  | 
1428  | 0  |     UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;  | 
1429  | 0  |     UChar32 ch=0;  | 
1430  | 0  |     int32_t index = 0;  | 
1431  | 0  |     int32_t reqLength = 0;  | 
1432  | 0  |     uint8_t* pSrc = (uint8_t*) src;  | 
1433  |  |  | 
1434  |  | 
  | 
1435  | 0  |     while((index < srcLength)&&(pDest<pDestLimit)){ | 
1436  | 0  |         ch = pSrc[index++];  | 
1437  | 0  |         if(ch <=0x7f){ | 
1438  | 0  |             *pDest++=(UChar)ch;  | 
1439  | 0  |         }else{ | 
1440  | 0  |             ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);  | 
1441  | 0  |             if(U_IS_BMP(ch)){ | 
1442  | 0  |                 *(pDest++)=(UChar)ch;  | 
1443  | 0  |             }else{ | 
1444  | 0  |                 *(pDest++)=U16_LEAD(ch);  | 
1445  | 0  |                 if(pDest<pDestLimit){ | 
1446  | 0  |                     *(pDest++)=U16_TRAIL(ch);  | 
1447  | 0  |                 }else{ | 
1448  | 0  |                     reqLength++;  | 
1449  | 0  |                     break;  | 
1450  | 0  |                 }  | 
1451  | 0  |             }  | 
1452  | 0  |         }  | 
1453  | 0  |     }  | 
1454  |  |     /* donot fill the dest buffer just count the UChars needed */  | 
1455  | 0  |     while(index < srcLength){ | 
1456  | 0  |         ch = pSrc[index++];  | 
1457  | 0  |         if(ch <= 0x7f){ | 
1458  | 0  |             reqLength++;  | 
1459  | 0  |         }else{ | 
1460  | 0  |             ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);  | 
1461  | 0  |             reqLength+=U16_LENGTH(ch);  | 
1462  | 0  |         }  | 
1463  | 0  |     }  | 
1464  |  | 
  | 
1465  | 0  |     reqLength+=(int32_t)(pDest - dest);  | 
1466  |  | 
  | 
1467  | 0  |     if(pDestLength){ | 
1468  | 0  |         *pDestLength = reqLength;  | 
1469  | 0  |     }  | 
1470  |  |  | 
1471  |  |     /* Terminate the buffer */  | 
1472  | 0  |     u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);  | 
1473  |  | 
  | 
1474  | 0  |     return dest;  | 
1475  | 0  | }  | 
1476  |  |  | 
1477  |  |  | 
1478  |  |  | 
1479  |  | static int32_t U_CALLCONV  | 
1480  |  | utf8TextExtract(UText *ut,  | 
1481  |  |                 int64_t start, int64_t limit,  | 
1482  |  |                 UChar *dest, int32_t destCapacity,  | 
1483  | 0  |                 UErrorCode *pErrorCode) { | 
1484  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
1485  | 0  |         return 0;  | 
1486  | 0  |     }  | 
1487  | 0  |     if(destCapacity<0 || (dest==NULL && destCapacity>0)) { | 
1488  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
1489  | 0  |         return 0;  | 
1490  | 0  |     }  | 
1491  | 0  |     int32_t  length  = ut->b;  | 
1492  | 0  |     int32_t  start32 = pinIndex(start, length);  | 
1493  | 0  |     int32_t  limit32 = pinIndex(limit, length);  | 
1494  |  | 
  | 
1495  | 0  |     if(start32>limit32) { | 
1496  | 0  |         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
1497  | 0  |         return 0;  | 
1498  | 0  |     }  | 
1499  |  |  | 
1500  |  |  | 
1501  |  |     // adjust the incoming indexes to land on code point boundaries if needed.  | 
1502  |  |     //    adjust by no more than three, because that is the largest number of trail bytes  | 
1503  |  |     //    in a well formed UTF8 character.  | 
1504  | 0  |     const uint8_t *buf = (const uint8_t *)ut->context;  | 
1505  | 0  |     int i;  | 
1506  | 0  |     if (start32 < ut->chunkNativeLimit) { | 
1507  | 0  |         for (i=0; i<3; i++) { | 
1508  | 0  |             if (U8_IS_SINGLE(buf[start32]) || U8_IS_LEAD(buf[start32]) || start32==0) { | 
1509  | 0  |                 break;  | 
1510  | 0  |             }  | 
1511  | 0  |             start32--;  | 
1512  | 0  |         }  | 
1513  | 0  |     }  | 
1514  |  | 
  | 
1515  | 0  |     if (limit32 < ut->chunkNativeLimit) { | 
1516  | 0  |         for (i=0; i<3; i++) { | 
1517  | 0  |             if (U8_IS_SINGLE(buf[limit32]) || U8_IS_LEAD(buf[limit32]) || limit32==0) { | 
1518  | 0  |                 break;  | 
1519  | 0  |             }  | 
1520  | 0  |             limit32--;  | 
1521  | 0  |         }  | 
1522  | 0  |     }  | 
1523  |  |  | 
1524  |  |     // Do the actual extract.  | 
1525  | 0  |     int32_t destLength=0;  | 
1526  | 0  |     utext_strFromUTF8(dest, destCapacity, &destLength,  | 
1527  | 0  |                     (const char *)ut->context+start32, limit32-start32,  | 
1528  | 0  |                     pErrorCode);  | 
1529  | 0  |     utf8TextAccess(ut, limit32, TRUE);  | 
1530  | 0  |     return destLength;  | 
1531  | 0  | }  | 
1532  |  |  | 
1533  |  | //  | 
1534  |  | // utf8TextMapOffsetToNative  | 
1535  |  | //  | 
1536  |  | // Map a chunk (UTF-16) offset to a native index.  | 
1537  |  | static int64_t U_CALLCONV  | 
1538  | 0  | utf8TextMapOffsetToNative(const UText *ut) { | 
1539  |  |     //  | 
1540  | 0  |     UTF8Buf *u8b = (UTF8Buf *)ut->p;  | 
1541  | 0  |     U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength);  | 
1542  | 0  |     int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart;  | 
1543  | 0  |     U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit);  | 
1544  | 0  |     return nativeOffset;  | 
1545  | 0  | }  | 
1546  |  |  | 
1547  |  | //  | 
1548  |  | // Map a native index to the corresponding chunk offset  | 
1549  |  | //  | 
1550  |  | static int32_t U_CALLCONV  | 
1551  | 0  | utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) { | 
1552  | 0  |     U_ASSERT(index64 <= 0x7fffffff);  | 
1553  | 0  |     int32_t index = (int32_t)index64;  | 
1554  | 0  |     UTF8Buf *u8b = (UTF8Buf *)ut->p;  | 
1555  | 0  |     U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);  | 
1556  | 0  |     U_ASSERT(index<=ut->chunkNativeLimit);  | 
1557  | 0  |     int32_t mapIndex = index - u8b->toUCharsMapStart;  | 
1558  | 0  |     U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));  | 
1559  | 0  |     int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;  | 
1560  | 0  |     U_ASSERT(offset>=0 && offset<=ut->chunkLength);  | 
1561  | 0  |     return offset;  | 
1562  | 0  | }  | 
1563  |  |  | 
1564  |  | static UText * U_CALLCONV  | 
1565  |  | utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)  | 
1566  | 0  | { | 
1567  |  |     // First do a generic shallow clone.  Does everything needed for the UText struct itself.  | 
1568  | 0  |     dest = shallowTextClone(dest, src, status);  | 
1569  |  |  | 
1570  |  |     // For deep clones, make a copy of the string.  | 
1571  |  |     //  The copied storage is owned by the newly created clone.  | 
1572  |  |     //  | 
1573  |  |     // TODO:  There is an issue with using utext_nativeLength().  | 
1574  |  |     //        That function is non-const in cases where the input was NUL terminated  | 
1575  |  |     //          and the length has not yet been determined.  | 
1576  |  |     //        This function (clone()) is const.  | 
1577  |  |     //        There potentially a thread safety issue lurking here.  | 
1578  |  |     //  | 
1579  | 0  |     if (deep && U_SUCCESS(*status)) { | 
1580  | 0  |         int32_t  len = (int32_t)utext_nativeLength((UText *)src);  | 
1581  | 0  |         char *copyStr = (char *)uprv_malloc(len+1);  | 
1582  | 0  |         if (copyStr == NULL) { | 
1583  | 0  |             *status = U_MEMORY_ALLOCATION_ERROR;  | 
1584  | 0  |         } else { | 
1585  | 0  |             uprv_memcpy(copyStr, src->context, len+1);  | 
1586  | 0  |             dest->context = copyStr;  | 
1587  | 0  |             dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);  | 
1588  | 0  |         }  | 
1589  | 0  |     }  | 
1590  | 0  |     return dest;  | 
1591  | 0  | }  | 
1592  |  |  | 
1593  |  |  | 
1594  |  | static void U_CALLCONV  | 
1595  | 0  | utf8TextClose(UText *ut) { | 
1596  |  |     // Most of the work of close is done by the generic UText framework close.  | 
1597  |  |     // All that needs to be done here is to delete the UTF8 string if the UText  | 
1598  |  |     //  owns it.  This occurs if the UText was created by cloning.  | 
1599  | 0  |     if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { | 
1600  | 0  |         char *s = (char *)ut->context;  | 
1601  | 0  |         uprv_free(s);  | 
1602  | 0  |         ut->context = NULL;  | 
1603  | 0  |     }  | 
1604  | 0  | }  | 
1605  |  |  | 
1606  |  | U_CDECL_END  | 
1607  |  |  | 
1608  |  |  | 
1609  |  | static const struct UTextFuncs utf8Funcs =  | 
1610  |  | { | 
1611  |  |     sizeof(UTextFuncs),  | 
1612  |  |     0, 0, 0,             // Reserved alignment padding  | 
1613  |  |     utf8TextClone,  | 
1614  |  |     utf8TextLength,  | 
1615  |  |     utf8TextAccess,  | 
1616  |  |     utf8TextExtract,  | 
1617  |  |     NULL,                /* replace*/  | 
1618  |  |     NULL,                /* copy   */  | 
1619  |  |     utf8TextMapOffsetToNative,  | 
1620  |  |     utf8TextMapIndexToUTF16,  | 
1621  |  |     utf8TextClose,  | 
1622  |  |     NULL,                // spare 1  | 
1623  |  |     NULL,                // spare 2  | 
1624  |  |     NULL                 // spare 3  | 
1625  |  | };  | 
1626  |  |  | 
1627  |  |  | 
1628  |  | static const char gEmptyString[] = {0}; | 
1629  |  |  | 
1630  |  | U_CAPI UText * U_EXPORT2  | 
1631  | 0  | utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status) { | 
1632  | 0  |     if(U_FAILURE(*status)) { | 
1633  | 0  |         return NULL;  | 
1634  | 0  |     }  | 
1635  | 0  |     if(s==NULL && length==0) { | 
1636  | 0  |         s = gEmptyString;  | 
1637  | 0  |     }  | 
1638  |  | 
  | 
1639  | 0  |     if(s==NULL || length<-1 || length>INT32_MAX) { | 
1640  | 0  |         *status=U_ILLEGAL_ARGUMENT_ERROR;  | 
1641  | 0  |         return NULL;  | 
1642  | 0  |     }  | 
1643  |  |  | 
1644  | 0  |     ut = utext_setup(ut, sizeof(UTF8Buf) * 2, status);  | 
1645  | 0  |     if (U_FAILURE(*status)) { | 
1646  | 0  |         return ut;  | 
1647  | 0  |     }  | 
1648  |  |  | 
1649  | 0  |     ut->pFuncs  = &utf8Funcs;  | 
1650  | 0  |     ut->context = s;  | 
1651  | 0  |     ut->b       = (int32_t)length;  | 
1652  | 0  |     ut->c       = (int32_t)length;  | 
1653  | 0  |     if (ut->c < 0) { | 
1654  | 0  |         ut->c = 0;  | 
1655  | 0  |         ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);  | 
1656  | 0  |     }  | 
1657  | 0  |     ut->p = ut->pExtra;  | 
1658  | 0  |     ut->q = (char *)ut->pExtra + sizeof(UTF8Buf);  | 
1659  | 0  |     return ut;  | 
1660  |  | 
  | 
1661  | 0  | }  | 
1662  |  |  | 
1663  |  |  | 
1664  |  |  | 
1665  |  |  | 
1666  |  |  | 
1667  |  |  | 
1668  |  |  | 
1669  |  |  | 
1670  |  | //------------------------------------------------------------------------------  | 
1671  |  | //  | 
1672  |  | //     UText implementation wrapper for Replaceable (read/write)  | 
1673  |  | //  | 
1674  |  | //         Use of UText data members:  | 
1675  |  | //            context    pointer to Replaceable.  | 
1676  |  | //            p          pointer to Replaceable if it is owned by the UText.  | 
1677  |  | //  | 
1678  |  | //------------------------------------------------------------------------------  | 
1679  |  |  | 
1680  |  |  | 
1681  |  |  | 
1682  |  | // minimum chunk size for this implementation: 3  | 
1683  |  | // to allow for possible trimming for code point boundaries  | 
1684  |  | enum { REP_TEXT_CHUNK_SIZE=10 }; | 
1685  |  |  | 
1686  |  | struct ReplExtra { | 
1687  |  |     /*  | 
1688  |  |      * Chunk UChars.  | 
1689  |  |      * +1 to simplify filling with surrogate pair at the end.  | 
1690  |  |      */  | 
1691  |  |     UChar s[REP_TEXT_CHUNK_SIZE+1];  | 
1692  |  | };  | 
1693  |  |  | 
1694  |  |  | 
1695  |  | U_CDECL_BEGIN  | 
1696  |  |  | 
1697  |  | static UText * U_CALLCONV  | 
1698  | 0  | repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) { | 
1699  |  |     // First do a generic shallow clone.  Does everything needed for the UText struct itself.  | 
1700  | 0  |     dest = shallowTextClone(dest, src, status);  | 
1701  |  |  | 
1702  |  |     // For deep clones, make a copy of the Replaceable.  | 
1703  |  |     //  The copied Replaceable storage is owned by the newly created UText clone.  | 
1704  |  |     //  A non-NULL pointer in UText.p is the signal to the close() function to delete  | 
1705  |  |     //    it.  | 
1706  |  |     //  | 
1707  | 0  |     if (deep && U_SUCCESS(*status)) { | 
1708  | 0  |         const Replaceable *replSrc = (const Replaceable *)src->context;  | 
1709  | 0  |         dest->context = replSrc->clone();  | 
1710  | 0  |         dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);  | 
1711  |  |  | 
1712  |  |         // with deep clone, the copy is writable, even when the source is not.  | 
1713  | 0  |         dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);  | 
1714  | 0  |     }  | 
1715  | 0  |     return dest;  | 
1716  | 0  | }  | 
1717  |  |  | 
1718  |  |  | 
1719  |  | static void U_CALLCONV  | 
1720  | 0  | repTextClose(UText *ut) { | 
1721  |  |     // Most of the work of close is done by the generic UText framework close.  | 
1722  |  |     // All that needs to be done here is delete the Replaceable if the UText  | 
1723  |  |     //  owns it.  This occurs if the UText was created by cloning.  | 
1724  | 0  |     if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { | 
1725  | 0  |         Replaceable *rep = (Replaceable *)ut->context;  | 
1726  | 0  |         delete rep;  | 
1727  | 0  |         ut->context = NULL;  | 
1728  | 0  |     }  | 
1729  | 0  | }  | 
1730  |  |  | 
1731  |  |  | 
1732  |  | static int64_t U_CALLCONV  | 
1733  | 0  | repTextLength(UText *ut) { | 
1734  | 0  |     const Replaceable *replSrc = (const Replaceable *)ut->context;  | 
1735  | 0  |     int32_t  len = replSrc->length();  | 
1736  | 0  |     return len;  | 
1737  | 0  | }  | 
1738  |  |  | 
1739  |  |  | 
1740  |  | static UBool U_CALLCONV  | 
1741  | 0  | repTextAccess(UText *ut, int64_t index, UBool forward) { | 
1742  | 0  |     const Replaceable *rep=(const Replaceable *)ut->context;  | 
1743  | 0  |     int32_t length=rep->length();   // Full length of the input text (bigger than a chunk)  | 
1744  |  |  | 
1745  |  |     // clip the requested index to the limits of the text.  | 
1746  | 0  |     int32_t index32 = pinIndex(index, length);  | 
1747  | 0  |     U_ASSERT(index<=INT32_MAX);  | 
1748  |  |  | 
1749  |  |  | 
1750  |  |     /*  | 
1751  |  |      * Compute start/limit boundaries around index, for a segment of text  | 
1752  |  |      * to be extracted.  | 
1753  |  |      * To allow for the possibility that our user gave an index to the trailing  | 
1754  |  |      * half of a surrogate pair, we must request one extra preceding UChar when  | 
1755  |  |      * going in the forward direction.  This will ensure that the buffer has the  | 
1756  |  |      * entire code point at the specified index.  | 
1757  |  |      */  | 
1758  | 0  |     if(forward) { | 
1759  |  | 
  | 
1760  | 0  |         if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) { | 
1761  |  |             // Buffer already contains the requested position.  | 
1762  | 0  |             ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);  | 
1763  | 0  |             return TRUE;  | 
1764  | 0  |         }  | 
1765  | 0  |         if (index32>=length && ut->chunkNativeLimit==length) { | 
1766  |  |             // Request for end of string, and buffer already extends up to it.  | 
1767  |  |             // Can't get the data, but don't change the buffer.  | 
1768  | 0  |             ut->chunkOffset = length - (int32_t)ut->chunkNativeStart;  | 
1769  | 0  |             return FALSE;  | 
1770  | 0  |         }  | 
1771  |  |  | 
1772  | 0  |         ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;  | 
1773  |  |         // Going forward, so we want to have the buffer with stuff at and beyond  | 
1774  |  |         //   the requested index.  The -1 gets us one code point before the  | 
1775  |  |         //   requested index also, to handle the case of the index being on  | 
1776  |  |         //   a trail surrogate of a surrogate pair.  | 
1777  | 0  |         if(ut->chunkNativeLimit > length) { | 
1778  | 0  |             ut->chunkNativeLimit = length;  | 
1779  | 0  |         }  | 
1780  |  |         // unless buffer ran off end, start is index-1.  | 
1781  | 0  |         ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE;  | 
1782  | 0  |         if(ut->chunkNativeStart < 0) { | 
1783  | 0  |             ut->chunkNativeStart = 0;  | 
1784  | 0  |         }  | 
1785  | 0  |     } else { | 
1786  |  |         // Reverse iteration.  Fill buffer with data preceding the requested index.  | 
1787  | 0  |         if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) { | 
1788  |  |             // Requested position already in buffer.  | 
1789  | 0  |             ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart;  | 
1790  | 0  |             return TRUE;  | 
1791  | 0  |         }  | 
1792  | 0  |         if (index32==0 && ut->chunkNativeStart==0) { | 
1793  |  |             // Request for start, buffer already begins at start.  | 
1794  |  |             //  No data, but keep the buffer as is.  | 
1795  | 0  |             ut->chunkOffset = 0;  | 
1796  | 0  |             return FALSE;  | 
1797  | 0  |         }  | 
1798  |  |  | 
1799  |  |         // Figure out the bounds of the chunk to extract for reverse iteration.  | 
1800  |  |         // Need to worry about chunk not splitting surrogate pairs, and while still  | 
1801  |  |         // containing the data we need.  | 
1802  |  |         // Fix by requesting a chunk that includes an extra UChar at the end.  | 
1803  |  |         // If this turns out to be a lead surrogate, we can lop it off and still have  | 
1804  |  |         //   the data we wanted.  | 
1805  | 0  |         ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE;  | 
1806  | 0  |         if (ut->chunkNativeStart < 0) { | 
1807  | 0  |             ut->chunkNativeStart = 0;  | 
1808  | 0  |         }  | 
1809  |  | 
  | 
1810  | 0  |         ut->chunkNativeLimit = index32 + 1;  | 
1811  | 0  |         if (ut->chunkNativeLimit > length) { | 
1812  | 0  |             ut->chunkNativeLimit = length;  | 
1813  | 0  |         }  | 
1814  | 0  |     }  | 
1815  |  |  | 
1816  |  |     // Extract the new chunk of text from the Replaceable source.  | 
1817  | 0  |     ReplExtra *ex = (ReplExtra *)ut->pExtra;  | 
1818  |  |     // UnicodeString with its buffer a writable alias to the chunk buffer  | 
1819  | 0  |     UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/);  | 
1820  | 0  |     rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer);  | 
1821  |  | 
  | 
1822  | 0  |     ut->chunkContents  = ex->s;  | 
1823  | 0  |     ut->chunkLength    = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart);  | 
1824  | 0  |     ut->chunkOffset    = (int32_t)(index32 - ut->chunkNativeStart);  | 
1825  |  |  | 
1826  |  |     // Surrogate pairs from the input text must not span chunk boundaries.  | 
1827  |  |     // If end of chunk could be the start of a surrogate, trim it off.  | 
1828  | 0  |     if (ut->chunkNativeLimit < length &&  | 
1829  | 0  |         U16_IS_LEAD(ex->s[ut->chunkLength-1])) { | 
1830  | 0  |             ut->chunkLength--;  | 
1831  | 0  |             ut->chunkNativeLimit--;  | 
1832  | 0  |             if (ut->chunkOffset > ut->chunkLength) { | 
1833  | 0  |                 ut->chunkOffset = ut->chunkLength;  | 
1834  | 0  |             }  | 
1835  | 0  |         }  | 
1836  |  |  | 
1837  |  |     // if the first UChar in the chunk could be the trailing half of a surrogate pair,  | 
1838  |  |     // trim it off.  | 
1839  | 0  |     if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])) { | 
1840  | 0  |         ++(ut->chunkContents);  | 
1841  | 0  |         ++(ut->chunkNativeStart);  | 
1842  | 0  |         --(ut->chunkLength);  | 
1843  | 0  |         --(ut->chunkOffset);  | 
1844  | 0  |     }  | 
1845  |  |  | 
1846  |  |     // adjust the index/chunkOffset to a code point boundary  | 
1847  | 0  |     U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset);  | 
1848  |  |  | 
1849  |  |     // Use fast indexing for get/setNativeIndex()  | 
1850  | 0  |     ut->nativeIndexingLimit = ut->chunkLength;  | 
1851  |  | 
  | 
1852  | 0  |     return TRUE;  | 
1853  | 0  | }  | 
1854  |  |  | 
1855  |  |  | 
1856  |  |  | 
1857  |  | static int32_t U_CALLCONV  | 
1858  |  | repTextExtract(UText *ut,  | 
1859  |  |                int64_t start, int64_t limit,  | 
1860  |  |                UChar *dest, int32_t destCapacity,  | 
1861  | 0  |                UErrorCode *status) { | 
1862  | 0  |     const Replaceable *rep=(const Replaceable *)ut->context;  | 
1863  | 0  |     int32_t  length=rep->length();  | 
1864  |  | 
  | 
1865  | 0  |     if(U_FAILURE(*status)) { | 
1866  | 0  |         return 0;  | 
1867  | 0  |     }  | 
1868  | 0  |     if(destCapacity<0 || (dest==NULL && destCapacity>0)) { | 
1869  | 0  |         *status=U_ILLEGAL_ARGUMENT_ERROR;  | 
1870  | 0  |     }  | 
1871  | 0  |     if(start>limit) { | 
1872  | 0  |         *status=U_INDEX_OUTOFBOUNDS_ERROR;  | 
1873  | 0  |         return 0;  | 
1874  | 0  |     }  | 
1875  |  |  | 
1876  | 0  |     int32_t  start32 = pinIndex(start, length);  | 
1877  | 0  |     int32_t  limit32 = pinIndex(limit, length);  | 
1878  |  |  | 
1879  |  |     // adjust start, limit if they point to trail half of surrogates  | 
1880  | 0  |     if (start32<length && U16_IS_TRAIL(rep->charAt(start32)) &&  | 
1881  | 0  |         U_IS_SUPPLEMENTARY(rep->char32At(start32))){ | 
1882  | 0  |             start32--;  | 
1883  | 0  |     }  | 
1884  | 0  |     if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32)) &&  | 
1885  | 0  |         U_IS_SUPPLEMENTARY(rep->char32At(limit32))){ | 
1886  | 0  |             limit32--;  | 
1887  | 0  |     }  | 
1888  |  | 
  | 
1889  | 0  |     length=limit32-start32;  | 
1890  | 0  |     if(length>destCapacity) { | 
1891  | 0  |         limit32 = start32 + destCapacity;  | 
1892  | 0  |     }  | 
1893  | 0  |     UnicodeString buffer(dest, 0, destCapacity); // writable alias  | 
1894  | 0  |     rep->extractBetween(start32, limit32, buffer);  | 
1895  | 0  |     repTextAccess(ut, limit32, TRUE);  | 
1896  |  | 
  | 
1897  | 0  |     return u_terminateUChars(dest, destCapacity, length, status);  | 
1898  | 0  | }  | 
1899  |  |  | 
1900  |  | static int32_t U_CALLCONV  | 
1901  |  | repTextReplace(UText *ut,  | 
1902  |  |                int64_t start, int64_t limit,  | 
1903  |  |                const UChar *src, int32_t length,  | 
1904  | 0  |                UErrorCode *status) { | 
1905  | 0  |     Replaceable *rep=(Replaceable *)ut->context;  | 
1906  | 0  |     int32_t oldLength;  | 
1907  |  | 
  | 
1908  | 0  |     if(U_FAILURE(*status)) { | 
1909  | 0  |         return 0;  | 
1910  | 0  |     }  | 
1911  | 0  |     if(src==NULL && length!=0) { | 
1912  | 0  |         *status=U_ILLEGAL_ARGUMENT_ERROR;  | 
1913  | 0  |         return 0;  | 
1914  | 0  |     }  | 
1915  | 0  |     oldLength=rep->length(); // will subtract from new length  | 
1916  | 0  |     if(start>limit ) { | 
1917  | 0  |         *status=U_INDEX_OUTOFBOUNDS_ERROR;  | 
1918  | 0  |         return 0;  | 
1919  | 0  |     }  | 
1920  |  |  | 
1921  | 0  |     int32_t start32 = pinIndex(start, oldLength);  | 
1922  | 0  |     int32_t limit32 = pinIndex(limit, oldLength);  | 
1923  |  |  | 
1924  |  |     // Snap start & limit to code point boundaries.  | 
1925  | 0  |     if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32)) &&  | 
1926  | 0  |         start32>0 && U16_IS_LEAD(rep->charAt(start32-1)))  | 
1927  | 0  |     { | 
1928  | 0  |             start32--;  | 
1929  | 0  |     }  | 
1930  | 0  |     if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-1)) &&  | 
1931  | 0  |         U16_IS_TRAIL(rep->charAt(limit32)))  | 
1932  | 0  |     { | 
1933  | 0  |             limit32++;  | 
1934  | 0  |     }  | 
1935  |  |  | 
1936  |  |     // Do the actual replace operation using methods of the Replaceable class  | 
1937  | 0  |     UnicodeString replStr((UBool)(length<0), src, length); // read-only alias  | 
1938  | 0  |     rep->handleReplaceBetween(start32, limit32, replStr);  | 
1939  | 0  |     int32_t newLength = rep->length();  | 
1940  | 0  |     int32_t lengthDelta = newLength - oldLength;  | 
1941  |  |  | 
1942  |  |     // Is the UText chunk buffer OK?  | 
1943  | 0  |     if (ut->chunkNativeLimit > start32) { | 
1944  |  |         // this replace operation may have impacted the current chunk.  | 
1945  |  |         // invalidate it, which will force a reload on the next access.  | 
1946  | 0  |         invalidateChunk(ut);  | 
1947  | 0  |     }  | 
1948  |  |  | 
1949  |  |     // set the iteration position to the end of the newly inserted replacement text.  | 
1950  | 0  |     int32_t newIndexPos = limit32 + lengthDelta;  | 
1951  | 0  |     repTextAccess(ut, newIndexPos, TRUE);  | 
1952  |  | 
  | 
1953  | 0  |     return lengthDelta;  | 
1954  | 0  | }  | 
1955  |  |  | 
1956  |  |  | 
1957  |  | static void U_CALLCONV  | 
1958  |  | repTextCopy(UText *ut,  | 
1959  |  |                 int64_t start, int64_t limit,  | 
1960  |  |                 int64_t destIndex,  | 
1961  |  |                 UBool move,  | 
1962  |  |                 UErrorCode *status)  | 
1963  | 0  | { | 
1964  | 0  |     Replaceable *rep=(Replaceable *)ut->context;  | 
1965  | 0  |     int32_t length=rep->length();  | 
1966  |  | 
  | 
1967  | 0  |     if(U_FAILURE(*status)) { | 
1968  | 0  |         return;  | 
1969  | 0  |     }  | 
1970  | 0  |     if (start>limit || (start<destIndex && destIndex<limit))  | 
1971  | 0  |     { | 
1972  | 0  |         *status=U_INDEX_OUTOFBOUNDS_ERROR;  | 
1973  | 0  |         return;  | 
1974  | 0  |     }  | 
1975  |  |  | 
1976  | 0  |     int32_t start32     = pinIndex(start, length);  | 
1977  | 0  |     int32_t limit32     = pinIndex(limit, length);  | 
1978  | 0  |     int32_t destIndex32 = pinIndex(destIndex, length);  | 
1979  |  |  | 
1980  |  |     // TODO:  snap input parameters to code point boundaries.  | 
1981  |  | 
  | 
1982  | 0  |     if(move) { | 
1983  |  |         // move: copy to destIndex, then replace original with nothing  | 
1984  | 0  |         int32_t segLength=limit32-start32;  | 
1985  | 0  |         rep->copy(start32, limit32, destIndex32);  | 
1986  | 0  |         if(destIndex32<start32) { | 
1987  | 0  |             start32+=segLength;  | 
1988  | 0  |             limit32+=segLength;  | 
1989  | 0  |         }  | 
1990  | 0  |         rep->handleReplaceBetween(start32, limit32, UnicodeString());  | 
1991  | 0  |     } else { | 
1992  |  |         // copy  | 
1993  | 0  |         rep->copy(start32, limit32, destIndex32);  | 
1994  | 0  |     }  | 
1995  |  |  | 
1996  |  |     // If the change to the text touched the region in the chunk buffer,  | 
1997  |  |     //  invalidate the buffer.  | 
1998  | 0  |     int32_t firstAffectedIndex = destIndex32;  | 
1999  | 0  |     if (move && start32<firstAffectedIndex) { | 
2000  | 0  |         firstAffectedIndex = start32;  | 
2001  | 0  |     }  | 
2002  | 0  |     if (firstAffectedIndex < ut->chunkNativeLimit) { | 
2003  |  |         // changes may have affected range covered by the chunk  | 
2004  | 0  |         invalidateChunk(ut);  | 
2005  | 0  |     }  | 
2006  |  |  | 
2007  |  |     // Put iteration position at the newly inserted (moved) block,  | 
2008  | 0  |     int32_t  nativeIterIndex = destIndex32 + limit32 - start32;  | 
2009  | 0  |     if (move && destIndex32>start32) { | 
2010  |  |         // moved a block of text towards the end of the string.  | 
2011  | 0  |         nativeIterIndex = destIndex32;  | 
2012  | 0  |     }  | 
2013  |  |  | 
2014  |  |     // Set position, reload chunk if needed.  | 
2015  | 0  |     repTextAccess(ut, nativeIterIndex, TRUE);  | 
2016  | 0  | }  | 
2017  |  |  | 
2018  |  | static const struct UTextFuncs repFuncs =  | 
2019  |  | { | 
2020  |  |     sizeof(UTextFuncs),  | 
2021  |  |     0, 0, 0,           // Reserved alignment padding  | 
2022  |  |     repTextClone,  | 
2023  |  |     repTextLength,  | 
2024  |  |     repTextAccess,  | 
2025  |  |     repTextExtract,  | 
2026  |  |     repTextReplace,  | 
2027  |  |     repTextCopy,  | 
2028  |  |     NULL,              // MapOffsetToNative,  | 
2029  |  |     NULL,              // MapIndexToUTF16,  | 
2030  |  |     repTextClose,  | 
2031  |  |     NULL,              // spare 1  | 
2032  |  |     NULL,              // spare 2  | 
2033  |  |     NULL               // spare 3  | 
2034  |  | };  | 
2035  |  |  | 
2036  |  |  | 
2037  |  | U_CAPI UText * U_EXPORT2  | 
2038  |  | utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status)  | 
2039  | 0  | { | 
2040  | 0  |     if(U_FAILURE(*status)) { | 
2041  | 0  |         return NULL;  | 
2042  | 0  |     }  | 
2043  | 0  |     if(rep==NULL) { | 
2044  | 0  |         *status=U_ILLEGAL_ARGUMENT_ERROR;  | 
2045  | 0  |         return NULL;  | 
2046  | 0  |     }  | 
2047  | 0  |     ut = utext_setup(ut, sizeof(ReplExtra), status);  | 
2048  | 0  |     if(U_FAILURE(*status)) { | 
2049  | 0  |         return ut;  | 
2050  | 0  |     }  | 
2051  |  |  | 
2052  | 0  |     ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);  | 
2053  | 0  |     if(rep->hasMetaData()) { | 
2054  | 0  |         ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);  | 
2055  | 0  |     }  | 
2056  |  | 
  | 
2057  | 0  |     ut->pFuncs  = &repFuncs;  | 
2058  | 0  |     ut->context =  rep;  | 
2059  | 0  |     return ut;  | 
2060  | 0  | }  | 
2061  |  |  | 
2062  |  | U_CDECL_END  | 
2063  |  |  | 
2064  |  |  | 
2065  |  |  | 
2066  |  |  | 
2067  |  |  | 
2068  |  |  | 
2069  |  |  | 
2070  |  |  | 
2071  |  | //------------------------------------------------------------------------------  | 
2072  |  | //  | 
2073  |  | //     UText implementation for UnicodeString (read/write)  and  | 
2074  |  | //                    for const UnicodeString (read only)  | 
2075  |  | //             (same implementation, only the flags are different)  | 
2076  |  | //  | 
2077  |  | //         Use of UText data members:  | 
2078  |  | //            context    pointer to UnicodeString  | 
2079  |  | //            p          pointer to UnicodeString IF this UText owns the string  | 
2080  |  | //                       and it must be deleted on close().  NULL otherwise.  | 
2081  |  | //  | 
2082  |  | //------------------------------------------------------------------------------  | 
2083  |  |  | 
2084  |  | U_CDECL_BEGIN  | 
2085  |  |  | 
2086  |  |  | 
2087  |  | static UText * U_CALLCONV  | 
2088  | 0  | unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) { | 
2089  |  |     // First do a generic shallow clone.  Does everything needed for the UText struct itself.  | 
2090  | 0  |     dest = shallowTextClone(dest, src, status);  | 
2091  |  |  | 
2092  |  |     // For deep clones, make a copy of the UnicodeSring.  | 
2093  |  |     //  The copied UnicodeString storage is owned by the newly created UText clone.  | 
2094  |  |     //  A non-NULL pointer in UText.p is the signal to the close() function to delete  | 
2095  |  |     //    the UText.  | 
2096  |  |     //  | 
2097  | 0  |     if (deep && U_SUCCESS(*status)) { | 
2098  | 0  |         const UnicodeString *srcString = (const UnicodeString *)src->context;  | 
2099  | 0  |         dest->context = new UnicodeString(*srcString);  | 
2100  | 0  |         dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);  | 
2101  |  |  | 
2102  |  |         // with deep clone, the copy is writable, even when the source is not.  | 
2103  | 0  |         dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);  | 
2104  | 0  |     }  | 
2105  | 0  |     return dest;  | 
2106  | 0  | }  | 
2107  |  |  | 
2108  |  | static void U_CALLCONV  | 
2109  | 0  | unistrTextClose(UText *ut) { | 
2110  |  |     // Most of the work of close is done by the generic UText framework close.  | 
2111  |  |     // All that needs to be done here is delete the UnicodeString if the UText  | 
2112  |  |     //  owns it.  This occurs if the UText was created by cloning.  | 
2113  | 0  |     if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { | 
2114  | 0  |         UnicodeString *str = (UnicodeString *)ut->context;  | 
2115  | 0  |         delete str;  | 
2116  | 0  |         ut->context = NULL;  | 
2117  | 0  |     }  | 
2118  | 0  | }  | 
2119  |  |  | 
2120  |  |  | 
2121  |  | static int64_t U_CALLCONV  | 
2122  | 0  | unistrTextLength(UText *t) { | 
2123  | 0  |     return ((const UnicodeString *)t->context)->length();  | 
2124  | 0  | }  | 
2125  |  |  | 
2126  |  |  | 
2127  |  | static UBool U_CALLCONV  | 
2128  | 0  | unistrTextAccess(UText *ut, int64_t index, UBool  forward) { | 
2129  | 0  |     int32_t length  = ut->chunkLength;  | 
2130  | 0  |     ut->chunkOffset = pinIndex(index, length);  | 
2131  |  |  | 
2132  |  |     // Check whether request is at the start or end  | 
2133  | 0  |     UBool retVal = (forward && index<length) || (!forward && index>0);  | 
2134  | 0  |     return retVal;  | 
2135  | 0  | }  | 
2136  |  |  | 
2137  |  |  | 
2138  |  |  | 
2139  |  | static int32_t U_CALLCONV  | 
2140  |  | unistrTextExtract(UText *t,  | 
2141  |  |                   int64_t start, int64_t limit,  | 
2142  |  |                   UChar *dest, int32_t destCapacity,  | 
2143  | 0  |                   UErrorCode *pErrorCode) { | 
2144  | 0  |     const UnicodeString *us=(const UnicodeString *)t->context;  | 
2145  | 0  |     int32_t length=us->length();  | 
2146  |  | 
  | 
2147  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
2148  | 0  |         return 0;  | 
2149  | 0  |     }  | 
2150  | 0  |     if(destCapacity<0 || (dest==NULL && destCapacity>0)) { | 
2151  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
2152  | 0  |     }  | 
2153  | 0  |     if(start<0 || start>limit) { | 
2154  | 0  |         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
2155  | 0  |         return 0;  | 
2156  | 0  |     }  | 
2157  |  |  | 
2158  | 0  |     int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length;  | 
2159  | 0  |     int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length;  | 
2160  |  | 
  | 
2161  | 0  |     length=limit32-start32;  | 
2162  | 0  |     if (destCapacity>0 && dest!=NULL) { | 
2163  | 0  |         int32_t trimmedLength = length;  | 
2164  | 0  |         if(trimmedLength>destCapacity) { | 
2165  | 0  |             trimmedLength=destCapacity;  | 
2166  | 0  |         }  | 
2167  | 0  |         us->extract(start32, trimmedLength, dest);  | 
2168  | 0  |         t->chunkOffset = start32+trimmedLength;  | 
2169  | 0  |     } else { | 
2170  | 0  |         t->chunkOffset = start32;  | 
2171  | 0  |     }  | 
2172  | 0  |     u_terminateUChars(dest, destCapacity, length, pErrorCode);  | 
2173  | 0  |     return length;  | 
2174  | 0  | }  | 
2175  |  |  | 
2176  |  | static int32_t U_CALLCONV  | 
2177  |  | unistrTextReplace(UText *ut,  | 
2178  |  |                   int64_t start, int64_t limit,  | 
2179  |  |                   const UChar *src, int32_t length,  | 
2180  | 0  |                   UErrorCode *pErrorCode) { | 
2181  | 0  |     UnicodeString *us=(UnicodeString *)ut->context;  | 
2182  | 0  |     int32_t oldLength;  | 
2183  |  | 
  | 
2184  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
2185  | 0  |         return 0;  | 
2186  | 0  |     }  | 
2187  | 0  |     if(src==NULL && length!=0) { | 
2188  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
2189  | 0  |     }  | 
2190  | 0  |     if(start>limit) { | 
2191  | 0  |         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
2192  | 0  |         return 0;  | 
2193  | 0  |     }  | 
2194  | 0  |     oldLength=us->length();  | 
2195  | 0  |     int32_t start32 = pinIndex(start, oldLength);  | 
2196  | 0  |     int32_t limit32 = pinIndex(limit, oldLength);  | 
2197  | 0  |     if (start32 < oldLength) { | 
2198  | 0  |         start32 = us->getChar32Start(start32);  | 
2199  | 0  |     }  | 
2200  | 0  |     if (limit32 < oldLength) { | 
2201  | 0  |         limit32 = us->getChar32Start(limit32);  | 
2202  | 0  |     }  | 
2203  |  |  | 
2204  |  |     // replace  | 
2205  | 0  |     us->replace(start32, limit32-start32, src, length);  | 
2206  | 0  |     int32_t newLength = us->length();  | 
2207  |  |  | 
2208  |  |     // Update the chunk description.  | 
2209  | 0  |     ut->chunkContents    = us->getBuffer();  | 
2210  | 0  |     ut->chunkLength      = newLength;  | 
2211  | 0  |     ut->chunkNativeLimit = newLength;  | 
2212  | 0  |     ut->nativeIndexingLimit = newLength;  | 
2213  |  |  | 
2214  |  |     // Set iteration position to the point just following the newly inserted text.  | 
2215  | 0  |     int32_t lengthDelta = newLength - oldLength;  | 
2216  | 0  |     ut->chunkOffset = limit32 + lengthDelta;  | 
2217  |  | 
  | 
2218  | 0  |     return lengthDelta;  | 
2219  | 0  | }  | 
2220  |  |  | 
2221  |  | static void U_CALLCONV  | 
2222  |  | unistrTextCopy(UText *ut,  | 
2223  |  |                int64_t start, int64_t limit,  | 
2224  |  |                int64_t destIndex,  | 
2225  |  |                UBool move,  | 
2226  | 0  |                UErrorCode *pErrorCode) { | 
2227  | 0  |     UnicodeString *us=(UnicodeString *)ut->context;  | 
2228  | 0  |     int32_t length=us->length();  | 
2229  |  | 
  | 
2230  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
2231  | 0  |         return;  | 
2232  | 0  |     }  | 
2233  | 0  |     int32_t start32 = pinIndex(start, length);  | 
2234  | 0  |     int32_t limit32 = pinIndex(limit, length);  | 
2235  | 0  |     int32_t destIndex32 = pinIndex(destIndex, length);  | 
2236  |  | 
  | 
2237  | 0  |     if( start32>limit32 || (start32<destIndex32 && destIndex32<limit32)) { | 
2238  | 0  |         *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;  | 
2239  | 0  |         return;  | 
2240  | 0  |     }  | 
2241  |  |  | 
2242  | 0  |     if(move) { | 
2243  |  |         // move: copy to destIndex, then remove original  | 
2244  | 0  |         int32_t segLength=limit32-start32;  | 
2245  | 0  |         us->copy(start32, limit32, destIndex32);  | 
2246  | 0  |         if(destIndex32<start32) { | 
2247  | 0  |             start32+=segLength;  | 
2248  | 0  |         }  | 
2249  | 0  |         us->remove(start32, segLength);  | 
2250  | 0  |     } else { | 
2251  |  |         // copy  | 
2252  | 0  |         us->copy(start32, limit32, destIndex32);  | 
2253  | 0  |     }  | 
2254  |  |  | 
2255  |  |     // update chunk description, set iteration position.  | 
2256  | 0  |     ut->chunkContents = us->getBuffer();  | 
2257  | 0  |     if (move==FALSE) { | 
2258  |  |         // copy operation, string length grows  | 
2259  | 0  |         ut->chunkLength += limit32-start32;  | 
2260  | 0  |         ut->chunkNativeLimit = ut->chunkLength;  | 
2261  | 0  |         ut->nativeIndexingLimit = ut->chunkLength;  | 
2262  | 0  |     }  | 
2263  |  |  | 
2264  |  |     // Iteration position to end of the newly inserted text.  | 
2265  | 0  |     ut->chunkOffset = destIndex32+limit32-start32;  | 
2266  | 0  |     if (move && destIndex32>start32) { | 
2267  | 0  |         ut->chunkOffset = destIndex32;  | 
2268  | 0  |     }  | 
2269  |  | 
  | 
2270  | 0  | }  | 
2271  |  |  | 
2272  |  | static const struct UTextFuncs unistrFuncs =  | 
2273  |  | { | 
2274  |  |     sizeof(UTextFuncs),  | 
2275  |  |     0, 0, 0,             // Reserved alignment padding  | 
2276  |  |     unistrTextClone,  | 
2277  |  |     unistrTextLength,  | 
2278  |  |     unistrTextAccess,  | 
2279  |  |     unistrTextExtract,  | 
2280  |  |     unistrTextReplace,  | 
2281  |  |     unistrTextCopy,  | 
2282  |  |     NULL,                // MapOffsetToNative,  | 
2283  |  |     NULL,                // MapIndexToUTF16,  | 
2284  |  |     unistrTextClose,  | 
2285  |  |     NULL,                // spare 1  | 
2286  |  |     NULL,                // spare 2  | 
2287  |  |     NULL                 // spare 3  | 
2288  |  | };  | 
2289  |  |  | 
2290  |  |  | 
2291  |  |  | 
2292  |  | U_CDECL_END  | 
2293  |  |  | 
2294  |  |  | 
2295  |  | U_CAPI UText * U_EXPORT2  | 
2296  | 0  | utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) { | 
2297  | 0  |     ut = utext_openConstUnicodeString(ut, s, status);  | 
2298  | 0  |     if (U_SUCCESS(*status)) { | 
2299  | 0  |         ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);  | 
2300  | 0  |     }  | 
2301  | 0  |     return ut;  | 
2302  | 0  | }  | 
2303  |  |  | 
2304  |  |  | 
2305  |  |  | 
2306  |  | U_CAPI UText * U_EXPORT2  | 
2307  | 0  | utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) { | 
2308  | 0  |     if (U_SUCCESS(*status) && s->isBogus()) { | 
2309  |  |         // The UnicodeString is bogus, but we still need to detach the UText  | 
2310  |  |         //   from whatever it was hooked to before, if anything.  | 
2311  | 0  |         utext_openUChars(ut, NULL, 0, status);  | 
2312  | 0  |         *status = U_ILLEGAL_ARGUMENT_ERROR;  | 
2313  | 0  |         return ut;  | 
2314  | 0  |     }  | 
2315  | 0  |     ut = utext_setup(ut, 0, status);  | 
2316  |  |     //    note:  use the standard (writable) function table for UnicodeString.  | 
2317  |  |     //           The flag settings disable writing, so having the functions in  | 
2318  |  |     //           the table is harmless.  | 
2319  | 0  |     if (U_SUCCESS(*status)) { | 
2320  | 0  |         ut->pFuncs              = &unistrFuncs;  | 
2321  | 0  |         ut->context             = s;  | 
2322  | 0  |         ut->providerProperties  = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);  | 
2323  | 0  |         ut->chunkContents       = s->getBuffer();  | 
2324  | 0  |         ut->chunkLength         = s->length();  | 
2325  | 0  |         ut->chunkNativeStart    = 0;  | 
2326  | 0  |         ut->chunkNativeLimit    = ut->chunkLength;  | 
2327  | 0  |         ut->nativeIndexingLimit = ut->chunkLength;  | 
2328  | 0  |     }  | 
2329  | 0  |     return ut;  | 
2330  | 0  | }  | 
2331  |  |  | 
2332  |  | //------------------------------------------------------------------------------  | 
2333  |  | //  | 
2334  |  | //     UText implementation for const UChar * strings  | 
2335  |  | //  | 
2336  |  | //         Use of UText data members:  | 
2337  |  | //            context    pointer to UnicodeString  | 
2338  |  | //            a          length.  -1 if not yet known.  | 
2339  |  | //  | 
2340  |  | //         TODO:  support 64 bit lengths.  | 
2341  |  | //  | 
2342  |  | //------------------------------------------------------------------------------  | 
2343  |  |  | 
2344  |  | U_CDECL_BEGIN  | 
2345  |  |  | 
2346  |  |  | 
2347  |  | static UText * U_CALLCONV  | 
2348  | 0  | ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) { | 
2349  |  |     // First do a generic shallow clone.  | 
2350  | 0  |     dest = shallowTextClone(dest, src, status);  | 
2351  |  |  | 
2352  |  |     // For deep clones, make a copy of the string.  | 
2353  |  |     //  The copied storage is owned by the newly created clone.  | 
2354  |  |     //  A non-NULL pointer in UText.p is the signal to the close() function to delete  | 
2355  |  |     //    it.  | 
2356  |  |     //  | 
2357  | 0  |     if (deep && U_SUCCESS(*status)) { | 
2358  | 0  |         U_ASSERT(utext_nativeLength(dest) < INT32_MAX);  | 
2359  | 0  |         int32_t  len = (int32_t)utext_nativeLength(dest);  | 
2360  |  |  | 
2361  |  |         // The cloned string IS going to be NUL terminated, whether or not the original was.  | 
2362  | 0  |         const UChar *srcStr = (const UChar *)src->context;  | 
2363  | 0  |         UChar *copyStr = (UChar *)uprv_malloc((len+1) * sizeof(UChar));  | 
2364  | 0  |         if (copyStr == NULL) { | 
2365  | 0  |             *status = U_MEMORY_ALLOCATION_ERROR;  | 
2366  | 0  |         } else { | 
2367  | 0  |             int64_t i;  | 
2368  | 0  |             for (i=0; i<len; i++) { | 
2369  | 0  |                 copyStr[i] = srcStr[i];  | 
2370  | 0  |             }  | 
2371  | 0  |             copyStr[len] = 0;  | 
2372  | 0  |             dest->context = copyStr;  | 
2373  | 0  |             dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);  | 
2374  | 0  |         }  | 
2375  | 0  |     }  | 
2376  | 0  |     return dest;  | 
2377  | 0  | }  | 
2378  |  |  | 
2379  |  |  | 
2380  |  | static void U_CALLCONV  | 
2381  | 0  | ucstrTextClose(UText *ut) { | 
2382  |  |     // Most of the work of close is done by the generic UText framework close.  | 
2383  |  |     // All that needs to be done here is delete the string if the UText  | 
2384  |  |     //  owns it.  This occurs if the UText was created by cloning.  | 
2385  | 0  |     if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) { | 
2386  | 0  |         UChar *s = (UChar *)ut->context;  | 
2387  | 0  |         uprv_free(s);  | 
2388  | 0  |         ut->context = NULL;  | 
2389  | 0  |     }  | 
2390  | 0  | }  | 
2391  |  |  | 
2392  |  |  | 
2393  |  |  | 
2394  |  | static int64_t U_CALLCONV  | 
2395  | 0  | ucstrTextLength(UText *ut) { | 
2396  | 0  |     if (ut->a < 0) { | 
2397  |  |         // null terminated, we don't yet know the length. Scan for it.  | 
2398  |  |         //    Access is not convenient for doing this  | 
2399  |  |         //    because the current iteration position can't be changed.  | 
2400  | 0  |         const UChar  *str = (const UChar *)ut->context;  | 
2401  | 0  |         for (;;) { | 
2402  | 0  |             if (str[ut->chunkNativeLimit] == 0) { | 
2403  | 0  |                 break;  | 
2404  | 0  |             }  | 
2405  | 0  |             ut->chunkNativeLimit++;  | 
2406  | 0  |         }  | 
2407  | 0  |         ut->a = ut->chunkNativeLimit;  | 
2408  | 0  |         ut->chunkLength = (int32_t)ut->chunkNativeLimit;  | 
2409  | 0  |         ut->nativeIndexingLimit = ut->chunkLength;  | 
2410  | 0  |         ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);  | 
2411  | 0  |     }  | 
2412  | 0  |     return ut->a;  | 
2413  | 0  | }  | 
2414  |  |  | 
2415  |  |  | 
2416  |  | static UBool U_CALLCONV  | 
2417  | 0  | ucstrTextAccess(UText *ut, int64_t index, UBool  forward) { | 
2418  | 0  |     const UChar *str   = (const UChar *)ut->context;  | 
2419  |  |  | 
2420  |  |     // pin the requested index to the bounds of the string,  | 
2421  |  |     //  and set current iteration position.  | 
2422  | 0  |     if (index<0) { | 
2423  | 0  |         index = 0;  | 
2424  | 0  |     } else if (index < ut->chunkNativeLimit) { | 
2425  |  |         // The request data is within the chunk as it is known so far.  | 
2426  |  |         // Put index on a code point boundary.  | 
2427  | 0  |         U16_SET_CP_START(str, 0, index);  | 
2428  | 0  |     } else if (ut->a >= 0) { | 
2429  |  |         // We know the length of this string, and the user is requesting something  | 
2430  |  |         // at or beyond the length.  Pin the requested index to the length.  | 
2431  | 0  |         index = ut->a;  | 
2432  | 0  |     } else { | 
2433  |  |         // Null terminated string, length not yet known, and the requested index  | 
2434  |  |         //  is beyond where we have scanned so far.  | 
2435  |  |         //  Scan to 32 UChars beyond the requested index.  The strategy here is  | 
2436  |  |         //  to avoid fully scanning a long string when the caller only wants to  | 
2437  |  |         //  see a few characters at its beginning.  | 
2438  | 0  |         int32_t scanLimit = (int32_t)index + 32;  | 
2439  | 0  |         if ((index + 32)>INT32_MAX || (index + 32)<0 ) {   // note: int64 expression | 
2440  | 0  |             scanLimit = INT32_MAX;  | 
2441  | 0  |         }  | 
2442  |  | 
  | 
2443  | 0  |         int32_t chunkLimit = (int32_t)ut->chunkNativeLimit;  | 
2444  | 0  |         for (; chunkLimit<scanLimit; chunkLimit++) { | 
2445  | 0  |             if (str[chunkLimit] == 0) { | 
2446  |  |                 // We found the end of the string.  Remember it, pin the requested index to it,  | 
2447  |  |                 //  and bail out of here.  | 
2448  | 0  |                 ut->a = chunkLimit;  | 
2449  | 0  |                 ut->chunkLength = chunkLimit;  | 
2450  | 0  |                 ut->nativeIndexingLimit = chunkLimit;  | 
2451  | 0  |                 if (index >= chunkLimit) { | 
2452  | 0  |                     index = chunkLimit;  | 
2453  | 0  |                 } else { | 
2454  | 0  |                     U16_SET_CP_START(str, 0, index);  | 
2455  | 0  |                 }  | 
2456  |  | 
  | 
2457  | 0  |                 ut->chunkNativeLimit = chunkLimit;  | 
2458  | 0  |                 ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);  | 
2459  | 0  |                 goto breakout;  | 
2460  | 0  |             }  | 
2461  | 0  |         }  | 
2462  |  |         // We scanned through the next batch of UChars without finding the end.  | 
2463  | 0  |         U16_SET_CP_START(str, 0, index);  | 
2464  | 0  |         if (chunkLimit == INT32_MAX) { | 
2465  |  |             // Scanned to the limit of a 32 bit length.  | 
2466  |  |             // Forceably trim the overlength string back so length fits in int32  | 
2467  |  |             //  TODO:  add support for 64 bit strings.  | 
2468  | 0  |             ut->a = chunkLimit;  | 
2469  | 0  |             ut->chunkLength = chunkLimit;  | 
2470  | 0  |             ut->nativeIndexingLimit = chunkLimit;  | 
2471  | 0  |             if (index > chunkLimit) { | 
2472  | 0  |                 index = chunkLimit;  | 
2473  | 0  |             }  | 
2474  | 0  |             ut->chunkNativeLimit = chunkLimit;  | 
2475  | 0  |             ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);  | 
2476  | 0  |         } else { | 
2477  |  |             // The endpoint of a chunk must not be left in the middle of a surrogate pair.  | 
2478  |  |             // If the current end is on a lead surrogate, back the end up by one.  | 
2479  |  |             // It doesn't matter if the end char happens to be an unpaired surrogate,  | 
2480  |  |             //    and it's simpler not to worry about it.  | 
2481  | 0  |             if (U16_IS_LEAD(str[chunkLimit-1])) { | 
2482  | 0  |                 --chunkLimit;  | 
2483  | 0  |             }  | 
2484  |  |             // Null-terminated chunk with end still unknown.  | 
2485  |  |             // Update the chunk length to reflect what has been scanned thus far.  | 
2486  |  |             // That the full length is still unknown is (still) flagged by  | 
2487  |  |             //    ut->a being < 0.  | 
2488  | 0  |             ut->chunkNativeLimit = chunkLimit;  | 
2489  | 0  |             ut->nativeIndexingLimit = chunkLimit;  | 
2490  | 0  |             ut->chunkLength = chunkLimit;  | 
2491  | 0  |         }  | 
2492  |  | 
  | 
2493  | 0  |     }  | 
2494  | 0  | breakout:  | 
2495  | 0  |     U_ASSERT(index<=INT32_MAX);  | 
2496  | 0  |     ut->chunkOffset = (int32_t)index;  | 
2497  |  |  | 
2498  |  |     // Check whether request is at the start or end  | 
2499  | 0  |     UBool retVal = (forward && index<ut->chunkNativeLimit) || (!forward && index>0);  | 
2500  | 0  |     return retVal;  | 
2501  | 0  | }  | 
2502  |  |  | 
2503  |  |  | 
2504  |  |  | 
2505  |  | static int32_t U_CALLCONV  | 
2506  |  | ucstrTextExtract(UText *ut,  | 
2507  |  |                   int64_t start, int64_t limit,  | 
2508  |  |                   UChar *dest, int32_t destCapacity,  | 
2509  |  |                   UErrorCode *pErrorCode)  | 
2510  | 0  | { | 
2511  | 0  |     if(U_FAILURE(*pErrorCode)) { | 
2512  | 0  |         return 0;  | 
2513  | 0  |     }  | 
2514  | 0  |     if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) { | 
2515  | 0  |         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;  | 
2516  | 0  |         return 0;  | 
2517  | 0  |     }  | 
2518  |  |  | 
2519  |  |     //const UChar *s=(const UChar *)ut->context;  | 
2520  | 0  |     int32_t si, di;  | 
2521  |  | 
  | 
2522  | 0  |     int32_t start32;  | 
2523  | 0  |     int32_t limit32;  | 
2524  |  |  | 
2525  |  |     // Access the start.  Does two things we need:  | 
2526  |  |     //   Pins 'start' to the length of the string, if it came in out-of-bounds.  | 
2527  |  |     //   Snaps 'start' to the beginning of a code point.  | 
2528  | 0  |     ucstrTextAccess(ut, start, TRUE);  | 
2529  | 0  |     const UChar *s=ut->chunkContents;  | 
2530  | 0  |     start32 = ut->chunkOffset;  | 
2531  |  | 
  | 
2532  | 0  |     int32_t strLength=(int32_t)ut->a;  | 
2533  | 0  |     if (strLength >= 0) { | 
2534  | 0  |         limit32 = pinIndex(limit, strLength);  | 
2535  | 0  |     } else { | 
2536  | 0  |         limit32 = pinIndex(limit, INT32_MAX);  | 
2537  | 0  |     }  | 
2538  | 0  |     di = 0;  | 
2539  | 0  |     for (si=start32; si<limit32; si++) { | 
2540  | 0  |         if (strLength<0 && s[si]==0) { | 
2541  |  |             // Just hit the end of a null-terminated string.  | 
2542  | 0  |             ut->a = si;               // set string length for this UText  | 
2543  | 0  |             ut->chunkNativeLimit    = si;  | 
2544  | 0  |             ut->chunkLength         = si;  | 
2545  | 0  |             ut->nativeIndexingLimit = si;  | 
2546  | 0  |             strLength               = si;  | 
2547  | 0  |             limit32                 = si;  | 
2548  | 0  |             break;  | 
2549  | 0  |         }  | 
2550  | 0  |         U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */  | 
2551  | 0  |         if (di<destCapacity) { | 
2552  |  |             // only store if there is space.  | 
2553  | 0  |             dest[di] = s[si];  | 
2554  | 0  |         } else { | 
2555  | 0  |             if (strLength>=0) { | 
2556  |  |                 // We have filled the destination buffer, and the string length is known.  | 
2557  |  |                 //  Cut the loop short.  There is no need to scan string termination.  | 
2558  | 0  |                 di = limit32 - start32;  | 
2559  | 0  |                 si = limit32;  | 
2560  | 0  |                 break;  | 
2561  | 0  |             }  | 
2562  | 0  |         }  | 
2563  | 0  |         di++;  | 
2564  | 0  |     }  | 
2565  |  |  | 
2566  |  |     // If the limit index points to a lead surrogate of a pair,  | 
2567  |  |     //   add the corresponding trail surrogate to the destination.  | 
2568  | 0  |     if (si>0 && U16_IS_LEAD(s[si-1]) &&  | 
2569  | 0  |             ((si<strLength || strLength<0)  && U16_IS_TRAIL(s[si])))  | 
2570  | 0  |     { | 
2571  | 0  |         if (di<destCapacity) { | 
2572  |  |             // store only if there is space in the output buffer.  | 
2573  | 0  |             dest[di++] = s[si];  | 
2574  | 0  |         }  | 
2575  | 0  |         si++;  | 
2576  | 0  |     }  | 
2577  |  |  | 
2578  |  |     // Put iteration position at the point just following the extracted text  | 
2579  | 0  |     if (si <= ut->chunkNativeLimit) { | 
2580  | 0  |         ut->chunkOffset = si;  | 
2581  | 0  |     } else { | 
2582  | 0  |         ucstrTextAccess(ut, si, TRUE);  | 
2583  | 0  |     }  | 
2584  |  |  | 
2585  |  |     // Add a terminating NUL if space in the buffer permits,  | 
2586  |  |     // and set the error status as required.  | 
2587  | 0  |     u_terminateUChars(dest, destCapacity, di, pErrorCode);  | 
2588  | 0  |     return di;  | 
2589  | 0  | }  | 
2590  |  |  | 
2591  |  | static const struct UTextFuncs ucstrFuncs =  | 
2592  |  | { | 
2593  |  |     sizeof(UTextFuncs),  | 
2594  |  |     0, 0, 0,           // Reserved alignment padding  | 
2595  |  |     ucstrTextClone,  | 
2596  |  |     ucstrTextLength,  | 
2597  |  |     ucstrTextAccess,  | 
2598  |  |     ucstrTextExtract,  | 
2599  |  |     NULL,              // Replace  | 
2600  |  |     NULL,              // Copy  | 
2601  |  |     NULL,              // MapOffsetToNative,  | 
2602  |  |     NULL,              // MapIndexToUTF16,  | 
2603  |  |     ucstrTextClose,  | 
2604  |  |     NULL,              // spare 1  | 
2605  |  |     NULL,              // spare 2  | 
2606  |  |     NULL,              // spare 3  | 
2607  |  | };  | 
2608  |  |  | 
2609  |  | U_CDECL_END  | 
2610  |  |  | 
2611  |  | static const UChar gEmptyUString[] = {0}; | 
2612  |  |  | 
2613  |  | U_CAPI UText * U_EXPORT2  | 
2614  | 0  | utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status) { | 
2615  | 0  |     if (U_FAILURE(*status)) { | 
2616  | 0  |         return NULL;  | 
2617  | 0  |     }  | 
2618  | 0  |     if(s==NULL && length==0) { | 
2619  | 0  |         s = gEmptyUString;  | 
2620  | 0  |     }  | 
2621  | 0  |     if (s==NULL || length < -1 || length>INT32_MAX) { | 
2622  | 0  |         *status = U_ILLEGAL_ARGUMENT_ERROR;  | 
2623  | 0  |         return NULL;  | 
2624  | 0  |     }  | 
2625  | 0  |     ut = utext_setup(ut, 0, status);  | 
2626  | 0  |     if (U_SUCCESS(*status)) { | 
2627  | 0  |         ut->pFuncs               = &ucstrFuncs;  | 
2628  | 0  |         ut->context              = s;  | 
2629  | 0  |         ut->providerProperties   = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);  | 
2630  | 0  |         if (length==-1) { | 
2631  | 0  |             ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);  | 
2632  | 0  |         }  | 
2633  | 0  |         ut->a                    = length;  | 
2634  | 0  |         ut->chunkContents        = s;  | 
2635  | 0  |         ut->chunkNativeStart     = 0;  | 
2636  | 0  |         ut->chunkNativeLimit     = length>=0? length : 0;  | 
2637  | 0  |         ut->chunkLength          = (int32_t)ut->chunkNativeLimit;  | 
2638  | 0  |         ut->chunkOffset          = 0;  | 
2639  | 0  |         ut->nativeIndexingLimit  = ut->chunkLength;  | 
2640  | 0  |     }  | 
2641  | 0  |     return ut;  | 
2642  | 0  | }  | 
2643  |  |  | 
2644  |  |  | 
2645  |  | //------------------------------------------------------------------------------  | 
2646  |  | //  | 
2647  |  | //     UText implementation for text from ICU CharacterIterators  | 
2648  |  | //  | 
2649  |  | //         Use of UText data members:  | 
2650  |  | //            context    pointer to the CharacterIterator  | 
2651  |  | //            a          length of the full text.  | 
2652  |  | //            p          pointer to  buffer 1  | 
2653  |  | //            b          start index of local buffer 1 contents  | 
2654  |  | //            q          pointer to buffer 2  | 
2655  |  | //            c          start index of local buffer 2 contents  | 
2656  |  | //            r          pointer to the character iterator if the UText owns it.  | 
2657  |  | //                       Null otherwise.  | 
2658  |  | //  | 
2659  |  | //------------------------------------------------------------------------------  | 
2660  | 0  | #define CIBufSize 16  | 
2661  |  |  | 
2662  |  | U_CDECL_BEGIN  | 
2663  |  | static void U_CALLCONV  | 
2664  | 0  | charIterTextClose(UText *ut) { | 
2665  |  |     // Most of the work of close is done by the generic UText framework close.  | 
2666  |  |     // All that needs to be done here is delete the CharacterIterator if the UText  | 
2667  |  |     //  owns it.  This occurs if the UText was created by cloning.  | 
2668  | 0  |     CharacterIterator *ci = (CharacterIterator *)ut->r;  | 
2669  | 0  |     delete ci;  | 
2670  | 0  |     ut->r = NULL;  | 
2671  | 0  | }  | 
2672  |  |  | 
2673  |  | static int64_t U_CALLCONV  | 
2674  | 0  | charIterTextLength(UText *ut) { | 
2675  | 0  |     return (int32_t)ut->a;  | 
2676  | 0  | }  | 
2677  |  |  | 
2678  |  | static UBool U_CALLCONV  | 
2679  | 0  | charIterTextAccess(UText *ut, int64_t index, UBool  forward) { | 
2680  | 0  |     CharacterIterator *ci   = (CharacterIterator *)ut->context;  | 
2681  |  | 
  | 
2682  | 0  |     int32_t clippedIndex = (int32_t)index;  | 
2683  | 0  |     if (clippedIndex<0) { | 
2684  | 0  |         clippedIndex=0;  | 
2685  | 0  |     } else if (clippedIndex>=ut->a) { | 
2686  | 0  |         clippedIndex=(int32_t)ut->a;  | 
2687  | 0  |     }  | 
2688  | 0  |     int32_t neededIndex = clippedIndex;  | 
2689  | 0  |     if (!forward && neededIndex>0) { | 
2690  |  |         // reverse iteration, want the position just before what was asked for.  | 
2691  | 0  |         neededIndex--;  | 
2692  | 0  |     } else if (forward && neededIndex==ut->a && neededIndex>0) { | 
2693  |  |         // Forward iteration, don't ask for something past the end of the text.  | 
2694  | 0  |         neededIndex--;  | 
2695  | 0  |     }  | 
2696  |  |  | 
2697  |  |     // Find the native index of the start of the buffer containing what we want.  | 
2698  | 0  |     neededIndex -= neededIndex % CIBufSize;  | 
2699  |  | 
  | 
2700  | 0  |     UChar *buf = NULL;  | 
2701  | 0  |     UBool  needChunkSetup = TRUE;  | 
2702  | 0  |     int    i;  | 
2703  | 0  |     if (ut->chunkNativeStart == neededIndex) { | 
2704  |  |         // The buffer we want is already the current chunk.  | 
2705  | 0  |         needChunkSetup = FALSE;  | 
2706  | 0  |     } else if (ut->b == neededIndex) { | 
2707  |  |         // The first buffer (buffer p) has what we need.  | 
2708  | 0  |         buf = (UChar *)ut->p;  | 
2709  | 0  |     } else if (ut->c == neededIndex) { | 
2710  |  |         // The second buffer (buffer q) has what we need.  | 
2711  | 0  |         buf = (UChar *)ut->q;  | 
2712  | 0  |     } else { | 
2713  |  |         // Neither buffer already has what we need.  | 
2714  |  |         // Load new data from the character iterator.  | 
2715  |  |         // Use the buf that is not the current buffer.  | 
2716  | 0  |         buf = (UChar *)ut->p;  | 
2717  | 0  |         if (ut->p == ut->chunkContents) { | 
2718  | 0  |             buf = (UChar *)ut->q;  | 
2719  | 0  |         }  | 
2720  | 0  |         ci->setIndex(neededIndex);  | 
2721  | 0  |         for (i=0; i<CIBufSize; i++) { | 
2722  | 0  |             buf[i] = ci->nextPostInc();  | 
2723  | 0  |             if (i+neededIndex > ut->a) { | 
2724  | 0  |                 break;  | 
2725  | 0  |             }  | 
2726  | 0  |         }  | 
2727  | 0  |     }  | 
2728  |  |  | 
2729  |  |     // We have a buffer with the data we need.  | 
2730  |  |     // Set it up as the current chunk, if it wasn't already.  | 
2731  | 0  |     if (needChunkSetup) { | 
2732  | 0  |         ut->chunkContents = buf;  | 
2733  | 0  |         ut->chunkLength   = CIBufSize;  | 
2734  | 0  |         ut->chunkNativeStart = neededIndex;  | 
2735  | 0  |         ut->chunkNativeLimit = neededIndex + CIBufSize;  | 
2736  | 0  |         if (ut->chunkNativeLimit > ut->a) { | 
2737  | 0  |             ut->chunkNativeLimit = ut->a;  | 
2738  | 0  |             ut->chunkLength  = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart);  | 
2739  | 0  |         }  | 
2740  | 0  |         ut->nativeIndexingLimit = ut->chunkLength;  | 
2741  | 0  |         U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize);  | 
2742  | 0  |     }  | 
2743  | 0  |     ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;  | 
2744  | 0  |     UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0);  | 
2745  | 0  |     return success;  | 
2746  | 0  | }  | 
2747  |  |  | 
2748  |  | static UText * U_CALLCONV  | 
2749  | 0  | charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status) { | 
2750  | 0  |     if (U_FAILURE(*status)) { | 
2751  | 0  |         return NULL;  | 
2752  | 0  |     }  | 
2753  |  |  | 
2754  | 0  |     if (deep) { | 
2755  |  |         // There is no CharacterIterator API for cloning the underlying text storage.  | 
2756  | 0  |         *status = U_UNSUPPORTED_ERROR;  | 
2757  | 0  |         return NULL;  | 
2758  | 0  |     } else { | 
2759  | 0  |         CharacterIterator *srcCI =(CharacterIterator *)src->context;  | 
2760  | 0  |         srcCI = srcCI->clone();  | 
2761  | 0  |         dest = utext_openCharacterIterator(dest, srcCI, status);  | 
2762  | 0  |         if (U_FAILURE(*status)) { | 
2763  | 0  |             return dest;  | 
2764  | 0  |         }  | 
2765  |  |         // cast off const on getNativeIndex.  | 
2766  |  |         //   For CharacterIterator based UTexts, this is safe, the operation is const.  | 
2767  | 0  |         int64_t  ix = utext_getNativeIndex((UText *)src);  | 
2768  | 0  |         utext_setNativeIndex(dest, ix);  | 
2769  | 0  |         dest->r = srcCI;    // flags that this UText owns the CharacterIterator  | 
2770  | 0  |     }  | 
2771  | 0  |     return dest;  | 
2772  | 0  | }  | 
2773  |  |  | 
2774  |  | static int32_t U_CALLCONV  | 
2775  |  | charIterTextExtract(UText *ut,  | 
2776  |  |                   int64_t start, int64_t limit,  | 
2777  |  |                   UChar *dest, int32_t destCapacity,  | 
2778  |  |                   UErrorCode *status)  | 
2779  | 0  | { | 
2780  | 0  |     if(U_FAILURE(*status)) { | 
2781  | 0  |         return 0;  | 
2782  | 0  |     }  | 
2783  | 0  |     if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) { | 
2784  | 0  |         *status=U_ILLEGAL_ARGUMENT_ERROR;  | 
2785  | 0  |         return 0;  | 
2786  | 0  |     }  | 
2787  | 0  |     int32_t  length  = (int32_t)ut->a;  | 
2788  | 0  |     int32_t  start32 = pinIndex(start, length);  | 
2789  | 0  |     int32_t  limit32 = pinIndex(limit, length);  | 
2790  | 0  |     int32_t  desti   = 0;  | 
2791  | 0  |     int32_t  srci;  | 
2792  | 0  |     int32_t  copyLimit;  | 
2793  |  | 
  | 
2794  | 0  |     CharacterIterator *ci = (CharacterIterator *)ut->context;  | 
2795  | 0  |     ci->setIndex32(start32);   // Moves ix to lead of surrogate pair, if needed.  | 
2796  | 0  |     srci = ci->getIndex();  | 
2797  | 0  |     copyLimit = srci;  | 
2798  | 0  |     while (srci<limit32) { | 
2799  | 0  |         UChar32 c = ci->next32PostInc();  | 
2800  | 0  |         int32_t  len = U16_LENGTH(c);  | 
2801  | 0  |         U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */  | 
2802  | 0  |         if (desti+len <= destCapacity) { | 
2803  | 0  |             U16_APPEND_UNSAFE(dest, desti, c);  | 
2804  | 0  |             copyLimit = srci+len;  | 
2805  | 0  |         } else { | 
2806  | 0  |             desti += len;  | 
2807  | 0  |             *status = U_BUFFER_OVERFLOW_ERROR;  | 
2808  | 0  |         }  | 
2809  | 0  |         srci += len;  | 
2810  | 0  |     }  | 
2811  |  | 
  | 
2812  | 0  |     charIterTextAccess(ut, copyLimit, TRUE);  | 
2813  |  | 
  | 
2814  | 0  |     u_terminateUChars(dest, destCapacity, desti, status);  | 
2815  | 0  |     return desti;  | 
2816  | 0  | }  | 
2817  |  |  | 
2818  |  | static const struct UTextFuncs charIterFuncs =  | 
2819  |  | { | 
2820  |  |     sizeof(UTextFuncs),  | 
2821  |  |     0, 0, 0,             // Reserved alignment padding  | 
2822  |  |     charIterTextClone,  | 
2823  |  |     charIterTextLength,  | 
2824  |  |     charIterTextAccess,  | 
2825  |  |     charIterTextExtract,  | 
2826  |  |     NULL,                // Replace  | 
2827  |  |     NULL,                // Copy  | 
2828  |  |     NULL,                // MapOffsetToNative,  | 
2829  |  |     NULL,                // MapIndexToUTF16,  | 
2830  |  |     charIterTextClose,  | 
2831  |  |     NULL,                // spare 1  | 
2832  |  |     NULL,                // spare 2  | 
2833  |  |     NULL                 // spare 3  | 
2834  |  | };  | 
2835  |  | U_CDECL_END  | 
2836  |  |  | 
2837  |  |  | 
2838  |  | U_CAPI UText * U_EXPORT2  | 
2839  | 0  | utext_openCharacterIterator(UText *ut, CharacterIterator *ci, UErrorCode *status) { | 
2840  | 0  |     if (U_FAILURE(*status)) { | 
2841  | 0  |         return NULL;  | 
2842  | 0  |     }  | 
2843  |  |  | 
2844  | 0  |     if (ci->startIndex() > 0) { | 
2845  |  |         // No support for CharacterIterators that do not start indexing from zero.  | 
2846  | 0  |         *status = U_UNSUPPORTED_ERROR;  | 
2847  | 0  |         return NULL;  | 
2848  | 0  |     }  | 
2849  |  |  | 
2850  |  |     // Extra space in UText for 2 buffers of CIBufSize UChars each.  | 
2851  | 0  |     int32_t  extraSpace = 2 * CIBufSize * sizeof(UChar);  | 
2852  | 0  |     ut = utext_setup(ut, extraSpace, status);  | 
2853  | 0  |     if (U_SUCCESS(*status)) { | 
2854  | 0  |         ut->pFuncs                = &charIterFuncs;  | 
2855  | 0  |         ut->context              = ci;  | 
2856  | 0  |         ut->providerProperties   = 0;  | 
2857  | 0  |         ut->a                    = ci->endIndex();        // Length of text  | 
2858  | 0  |         ut->p                    = ut->pExtra;            // First buffer  | 
2859  | 0  |         ut->b                    = -1;                    // Native index of first buffer contents  | 
2860  | 0  |         ut->q                    = (UChar*)ut->pExtra+CIBufSize;  // Second buffer  | 
2861  | 0  |         ut->c                    = -1;                    // Native index of second buffer contents  | 
2862  |  |  | 
2863  |  |         // Initialize current chunk contents to be empty.  | 
2864  |  |         //   First access will fault something in.  | 
2865  |  |         //   Note:  The initial nativeStart and chunkOffset must sum to zero  | 
2866  |  |         //          so that getNativeIndex() will correctly compute to zero  | 
2867  |  |         //          if no call to Access() has ever been made.  They can't be both  | 
2868  |  |         //          zero without Access() thinking that the chunk is valid.  | 
2869  | 0  |         ut->chunkContents        = (UChar *)ut->p;  | 
2870  | 0  |         ut->chunkNativeStart     = -1;  | 
2871  | 0  |         ut->chunkOffset          = 1;  | 
2872  | 0  |         ut->chunkNativeLimit     = 0;  | 
2873  | 0  |         ut->chunkLength          = 0;  | 
2874  | 0  |         ut->nativeIndexingLimit  = ut->chunkOffset;  // enables native indexing  | 
2875  | 0  |     }  | 
2876  | 0  |     return ut;  | 
2877  | 0  | }  |