Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/utext.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
*   Copyright (C) 2005-2016, International Business Machines
7
*   Corporation and others.  All Rights Reserved.
8
*
9
*******************************************************************************
10
*   file name:  utext.cpp
11
*   encoding:   UTF-8
12
*   tab size:   8 (not used)
13
*   indentation:4
14
*
15
*   created on: 2005apr12
16
*   created by: Markus W. Scherer
17
*/
18
19
#include <cstddef>
20
21
#include "unicode/utypes.h"
22
#include "unicode/ustring.h"
23
#include "unicode/unistr.h"
24
#include "unicode/chariter.h"
25
#include "unicode/utext.h"
26
#include "unicode/utf.h"
27
#include "unicode/utf8.h"
28
#include "unicode/utf16.h"
29
#include "ustr_imp.h"
30
#include "cmemory.h"
31
#include "cstring.h"
32
#include "uassert.h"
33
#include "putilimp.h"
34
35
U_NAMESPACE_USE
36
37
0
#define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex))
38
39
40
static UBool
41
0
utext_access(UText *ut, int64_t index, UBool forward) {
42
0
    return ut->pFuncs->access(ut, index, forward);
43
0
}
44
45
46
47
U_CAPI UBool U_EXPORT2
48
0
utext_moveIndex32(UText *ut, int32_t delta) {
49
0
    UChar32  c;
50
0
    if (delta > 0) {
51
0
        do {
52
0
            if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, TRUE)) {
53
0
                return FALSE;
54
0
            }
55
0
            c = ut->chunkContents[ut->chunkOffset];
56
0
            if (U16_IS_SURROGATE(c)) {
57
0
                c = utext_next32(ut);
58
0
                if (c == U_SENTINEL) {
59
0
                    return FALSE;
60
0
                }
61
0
            } else {
62
0
                ut->chunkOffset++;
63
0
            }
64
0
        } while(--delta>0);
65
66
0
    } else if (delta<0) {
67
0
        do {
68
0
            if(ut->chunkOffset<=0 && !utext_access(ut, ut->chunkNativeStart, FALSE)) {
69
0
                return FALSE;
70
0
            }
71
0
            c = ut->chunkContents[ut->chunkOffset-1];
72
0
            if (U16_IS_SURROGATE(c)) {
73
0
                c = utext_previous32(ut);
74
0
                if (c == U_SENTINEL) {
75
0
                    return FALSE;
76
0
                }
77
0
            } else {
78
0
                ut->chunkOffset--;
79
0
            }
80
0
        } while(++delta<0);
81
0
    }
82
83
0
    return TRUE;
84
0
}
85
86
87
U_CAPI int64_t U_EXPORT2
88
0
utext_nativeLength(UText *ut) {
89
0
    return ut->pFuncs->nativeLength(ut);
90
0
}
91
92
93
U_CAPI UBool U_EXPORT2
94
0
utext_isLengthExpensive(const UText *ut) {
95
0
    UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != 0;
96
0
    return r;
97
0
}
98
99
100
U_CAPI int64_t U_EXPORT2
101
0
utext_getNativeIndex(const UText *ut) {
102
0
    if(ut->chunkOffset <= ut->nativeIndexingLimit) {
103
0
        return ut->chunkNativeStart+ut->chunkOffset;
104
0
    } else {
105
0
        return ut->pFuncs->mapOffsetToNative(ut);
106
0
    }
107
0
}
108
109
110
U_CAPI void U_EXPORT2
111
0
utext_setNativeIndex(UText *ut, int64_t index) {
112
0
    if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
113
        // The desired position is outside of the current chunk.
114
        // Access the new position.  Assume a forward iteration from here,
115
        // which will also be optimimum for a single random access.
116
        // Reverse iterations may suffer slightly.
117
0
        ut->pFuncs->access(ut, index, TRUE);
118
0
    } else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) {
119
        // utf-16 indexing.
120
0
        ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);
121
0
    } else {
122
0
         ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
123
0
    }
124
    // The convention is that the index must always be on a code point boundary.
125
    // Adjust the index position if it is in the middle of a surrogate pair.
126
0
    if (ut->chunkOffset<ut->chunkLength) {
127
0
        UChar c= ut->chunkContents[ut->chunkOffset];
128
0
        if (U16_IS_TRAIL(c)) {
129
0
            if (ut->chunkOffset==0) {
130
0
                ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);
131
0
            }
132
0
            if (ut->chunkOffset>0) {
133
0
                UChar lead = ut->chunkContents[ut->chunkOffset-1];
134
0
                if (U16_IS_LEAD(lead)) {
135
0
                    ut->chunkOffset--;
136
0
                }
137
0
            }
138
0
        }
139
0
    }
140
0
}
141
142
143
144
U_CAPI int64_t U_EXPORT2
145
0
utext_getPreviousNativeIndex(UText *ut) {
146
    //
147
    //  Fast-path the common case.
148
    //     Common means current position is not at the beginning of a chunk
149
    //     and the preceding character is not supplementary.
150
    //
151
0
    int32_t i = ut->chunkOffset - 1;
152
0
    int64_t result;
153
0
    if (i >= 0) {
154
0
        UChar c = ut->chunkContents[i];
155
0
        if (U16_IS_TRAIL(c) == FALSE) {
156
0
            if (i <= ut->nativeIndexingLimit) {
157
0
                result = ut->chunkNativeStart + i;
158
0
            } else {
159
0
                ut->chunkOffset = i;
160
0
                result = ut->pFuncs->mapOffsetToNative(ut);
161
0
                ut->chunkOffset++;
162
0
            }
163
0
            return result;
164
0
        }
165
0
    }
166
167
    // If at the start of text, simply return 0.
168
0
    if (ut->chunkOffset==0 && ut->chunkNativeStart==0) {
169
0
        return 0;
170
0
    }
171
172
    // Harder, less common cases.  We are at a chunk boundary, or on a surrogate.
173
    //    Keep it simple, use other functions to handle the edges.
174
    //
175
0
    utext_previous32(ut);
176
0
    result = UTEXT_GETNATIVEINDEX(ut);
177
0
    utext_next32(ut);
178
0
    return result;
179
0
}
180
181
182
//
183
//  utext_current32.  Get the UChar32 at the current position.
184
//                    UText iteration position is always on a code point boundary,
185
//                    never on the trail half of a surrogate pair.
186
//
187
U_CAPI UChar32 U_EXPORT2
188
0
utext_current32(UText *ut) {
189
0
    UChar32  c;
190
0
    if (ut->chunkOffset==ut->chunkLength) {
191
        // Current position is just off the end of the chunk.
192
0
        if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
193
            // Off the end of the text.
194
0
            return U_SENTINEL;
195
0
        }
196
0
    }
197
198
0
    c = ut->chunkContents[ut->chunkOffset];
199
0
    if (U16_IS_LEAD(c) == FALSE) {
200
        // Normal, non-supplementary case.
201
0
        return c;
202
0
    }
203
204
    //
205
    //  Possible supplementary char.
206
    //
207
0
    UChar32   trail = 0;
208
0
    UChar32   supplementaryC = c;
209
0
    if ((ut->chunkOffset+1) < ut->chunkLength) {
210
        // The trail surrogate is in the same chunk.
211
0
        trail = ut->chunkContents[ut->chunkOffset+1];
212
0
    } else {
213
        //  The trail surrogate is in a different chunk.
214
        //     Because we must maintain the iteration position, we need to switch forward
215
        //     into the new chunk, get the trail surrogate, then revert the chunk back to the
216
        //     original one.
217
        //     An edge case to be careful of:  the entire text may end with an unpaired
218
        //        leading surrogate.  The attempt to access the trail will fail, but
219
        //        the original position before the unpaired lead still needs to be restored.
220
0
        int64_t  nativePosition = ut->chunkNativeLimit;
221
0
        int32_t  originalOffset = ut->chunkOffset;
222
0
        if (ut->pFuncs->access(ut, nativePosition, TRUE)) {
223
0
            trail = ut->chunkContents[ut->chunkOffset];
224
0
        }
225
0
        UBool r = ut->pFuncs->access(ut, nativePosition, FALSE);  // reverse iteration flag loads preceding chunk
226
0
        U_ASSERT(r==TRUE);
227
0
        ut->chunkOffset = originalOffset;
228
0
        if(!r) {
229
0
            return U_SENTINEL;
230
0
        }
231
0
    }
232
233
0
    if (U16_IS_TRAIL(trail)) {
234
0
        supplementaryC = U16_GET_SUPPLEMENTARY(c, trail);
235
0
    }
236
0
    return supplementaryC;
237
238
0
}
239
240
241
U_CAPI UChar32 U_EXPORT2
242
0
utext_char32At(UText *ut, int64_t nativeIndex) {
243
0
    UChar32 c = U_SENTINEL;
244
245
    // Fast path the common case.
246
0
    if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) {
247
0
        ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart);
248
0
        c = ut->chunkContents[ut->chunkOffset];
249
0
        if (U16_IS_SURROGATE(c) == FALSE) {
250
0
            return c;
251
0
        }
252
0
    }
253
254
255
0
    utext_setNativeIndex(ut, nativeIndex);
256
0
    if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) {
257
0
        c = ut->chunkContents[ut->chunkOffset];
258
0
        if (U16_IS_SURROGATE(c)) {
259
            // For surrogates, let current32() deal with the complications
260
            //    of supplementaries that may span chunk boundaries.
261
0
            c = utext_current32(ut);
262
0
        }
263
0
    }
264
0
    return c;
265
0
}
266
267
268
U_CAPI UChar32 U_EXPORT2
269
0
utext_next32(UText *ut) {
270
0
    UChar32       c;
271
272
0
    if (ut->chunkOffset >= ut->chunkLength) {
273
0
        if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
274
0
            return U_SENTINEL;
275
0
        }
276
0
    }
277
278
0
    c = ut->chunkContents[ut->chunkOffset++];
279
0
    if (U16_IS_LEAD(c) == FALSE) {
280
        // Normal case, not supplementary.
281
        //   (A trail surrogate seen here is just returned as is, as a surrogate value.
282
        //    It cannot be part of a pair.)
283
0
        return c;
284
0
    }
285
286
0
    if (ut->chunkOffset >= ut->chunkLength) {
287
0
        if (ut->pFuncs->access(ut, ut->chunkNativeLimit, TRUE) == FALSE) {
288
            // c is an unpaired lead surrogate at the end of the text.
289
            // return it as it is.
290
0
            return c;
291
0
        }
292
0
    }
293
0
    UChar32 trail = ut->chunkContents[ut->chunkOffset];
294
0
    if (U16_IS_TRAIL(trail) == FALSE) {
295
        // c was an unpaired lead surrogate, not at the end of the text.
296
        // return it as it is (unpaired).  Iteration position is on the
297
        // following character, possibly in the next chunk, where the
298
        //  trail surrogate would have been if it had existed.
299
0
        return c;
300
0
    }
301
302
0
    UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail);
303
0
    ut->chunkOffset++;   // move iteration position over the trail surrogate.
304
0
    return supplementary;
305
0
    }
306
307
308
U_CAPI UChar32 U_EXPORT2
309
0
utext_previous32(UText *ut) {
310
0
    UChar32       c;
311
312
0
    if (ut->chunkOffset <= 0) {
313
0
        if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {
314
0
            return U_SENTINEL;
315
0
        }
316
0
    }
317
0
    ut->chunkOffset--;
318
0
    c = ut->chunkContents[ut->chunkOffset];
319
0
    if (U16_IS_TRAIL(c) == FALSE) {
320
        // Normal case, not supplementary.
321
        //   (A lead surrogate seen here is just returned as is, as a surrogate value.
322
        //    It cannot be part of a pair.)
323
0
        return c;
324
0
    }
325
326
0
    if (ut->chunkOffset <= 0) {
327
0
        if (ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE) == FALSE) {
328
            // c is an unpaired trail surrogate at the start of the text.
329
            // return it as it is.
330
0
            return c;
331
0
        }
332
0
    }
333
334
0
    UChar32 lead = ut->chunkContents[ut->chunkOffset-1];
335
0
    if (U16_IS_LEAD(lead) == FALSE) {
336
        // c was an unpaired trail surrogate, not at the end of the text.
337
        // return it as it is (unpaired).  Iteration position is at c
338
0
        return c;
339
0
    }
340
341
0
    UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c);
342
0
    ut->chunkOffset--;   // move iteration position over the lead surrogate.
343
0
    return supplementary;
344
0
}
345
346
347
348
U_CAPI UChar32 U_EXPORT2
349
0
utext_next32From(UText *ut, int64_t index) {
350
0
    UChar32       c      = U_SENTINEL;
351
352
0
    if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
353
        // Desired position is outside of the current chunk.
354
0
        if(!ut->pFuncs->access(ut, index, TRUE)) {
355
            // no chunk available here
356
0
            return U_SENTINEL;
357
0
        }
358
0
    } else if (index - ut->chunkNativeStart  <= (int64_t)ut->nativeIndexingLimit) {
359
        // Desired position is in chunk, with direct 1:1 native to UTF16 indexing
360
0
        ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
361
0
    } else {
362
        // Desired position is in chunk, with non-UTF16 indexing.
363
0
        ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index);
364
0
    }
365
366
0
    c = ut->chunkContents[ut->chunkOffset++];
367
0
    if (U16_IS_SURROGATE(c)) {
368
        // Surrogates.  Many edge cases.  Use other functions that already
369
        //              deal with the problems.
370
0
        utext_setNativeIndex(ut, index);
371
0
        c = utext_next32(ut);
372
0
    }
373
0
    return c;
374
0
}
375
376
377
U_CAPI UChar32 U_EXPORT2
378
0
utext_previous32From(UText *ut, int64_t index) {
379
    //
380
    //  Return the character preceding the specified index.
381
    //  Leave the iteration position at the start of the character that was returned.
382
    //
383
0
    UChar32     cPrev;    // The character preceding cCurr, which is what we will return.
384
385
    // Address the chunk containing the position preceding the incoming index
386
    // A tricky edge case:
387
    //   We try to test the requested native index against the chunkNativeStart to determine
388
    //    whether the character preceding the one at the index is in the current chunk.
389
    //    BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the
390
    //    requested index is on something other than the first position of the first char.
391
    //
392
0
    if(index<=ut->chunkNativeStart || index>ut->chunkNativeLimit) {
393
        // Requested native index is outside of the current chunk.
394
0
        if(!ut->pFuncs->access(ut, index, FALSE)) {
395
            // no chunk available here
396
0
            return U_SENTINEL;
397
0
        }
398
0
    } else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {
399
        // Direct UTF-16 indexing.
400
0
        ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
401
0
    } else {
402
0
        ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
403
0
        if (ut->chunkOffset==0 && !ut->pFuncs->access(ut, index, FALSE)) {
404
            // no chunk available here
405
0
            return U_SENTINEL;
406
0
        }
407
0
    }
408
409
    //
410
    // Simple case with no surrogates.
411
    //
412
0
    ut->chunkOffset--;
413
0
    cPrev = ut->chunkContents[ut->chunkOffset];
414
415
0
    if (U16_IS_SURROGATE(cPrev)) {
416
        // Possible supplementary.  Many edge cases.
417
        // Let other functions do the heavy lifting.
418
0
        utext_setNativeIndex(ut, index);
419
0
        cPrev = utext_previous32(ut);
420
0
    }
421
0
    return cPrev;
422
0
}
423
424
425
U_CAPI int32_t U_EXPORT2
426
utext_extract(UText *ut,
427
             int64_t start, int64_t limit,
428
             UChar *dest, int32_t destCapacity,
429
0
             UErrorCode *status) {
430
0
                 return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status);
431
0
             }
432
433
434
435
U_CAPI UBool U_EXPORT2
436
0
utext_equals(const UText *a, const UText *b) {
437
0
    if (a==NULL || b==NULL ||
438
0
        a->magic != UTEXT_MAGIC ||
439
0
        b->magic != UTEXT_MAGIC) {
440
            // Null or invalid arguments don't compare equal to anything.
441
0
            return FALSE;
442
0
    }
443
444
0
    if (a->pFuncs != b->pFuncs) {
445
        // Different types of text providers.
446
0
        return FALSE;
447
0
    }
448
449
0
    if (a->context != b->context) {
450
        // Different sources (different strings)
451
0
        return FALSE;
452
0
    }
453
0
    if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) {
454
        // Different current position in the string.
455
0
        return FALSE;
456
0
    }
457
458
0
    return TRUE;
459
0
}
460
461
U_CAPI UBool U_EXPORT2
462
utext_isWritable(const UText *ut)
463
0
{
464
0
    UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != 0;
465
0
    return b;
466
0
}
467
468
469
U_CAPI void U_EXPORT2
470
0
utext_freeze(UText *ut) {
471
    // Zero out the WRITABLE flag.
472
0
    ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE));
473
0
}
474
475
476
U_CAPI UBool U_EXPORT2
477
utext_hasMetaData(const UText *ut)
478
0
{
479
0
    UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != 0;
480
0
    return b;
481
0
}
482
483
484
485
U_CAPI int32_t U_EXPORT2
486
utext_replace(UText *ut,
487
             int64_t nativeStart, int64_t nativeLimit,
488
             const UChar *replacementText, int32_t replacementLength,
489
             UErrorCode *status)
490
0
{
491
0
    if (U_FAILURE(*status)) {
492
0
        return 0;
493
0
    }
494
0
    if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
495
0
        *status = U_NO_WRITE_PERMISSION;
496
0
        return 0;
497
0
    }
498
0
    int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status);
499
0
    return i;
500
0
}
501
502
U_CAPI void U_EXPORT2
503
utext_copy(UText *ut,
504
          int64_t nativeStart, int64_t nativeLimit,
505
          int64_t destIndex,
506
          UBool move,
507
          UErrorCode *status)
508
0
{
509
0
    if (U_FAILURE(*status)) {
510
0
        return;
511
0
    }
512
0
    if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == 0) {
513
0
        *status = U_NO_WRITE_PERMISSION;
514
0
        return;
515
0
    }
516
0
    ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status);
517
0
}
518
519
520
521
U_CAPI UText * U_EXPORT2
522
0
utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) {
523
0
    if (U_FAILURE(*status)) {
524
0
        return dest;
525
0
    }
526
0
    UText *result = src->pFuncs->clone(dest, src, deep, status);
527
0
    if (U_FAILURE(*status)) {
528
0
        return result;
529
0
    }
530
0
    if (result == NULL) {
531
0
        *status = U_MEMORY_ALLOCATION_ERROR;
532
0
        return result;
533
0
    }
534
0
    if (readOnly) {
535
0
        utext_freeze(result);
536
0
    }
537
0
    return result;
538
0
}
539
540
541
542
//------------------------------------------------------------------------------
543
//
544
//   UText common functions implementation
545
//
546
//------------------------------------------------------------------------------
547
548
//
549
//  UText.flags bit definitions
550
//
551
enum {
552
    UTEXT_HEAP_ALLOCATED  = 1,      //  1 if ICU has allocated this UText struct on the heap.
553
                                    //  0 if caller provided storage for the UText.
554
555
    UTEXT_EXTRA_HEAP_ALLOCATED = 2, //  1 if ICU has allocated extra storage as a separate
556
                                    //     heap block.
557
                                    //  0 if there is no separate allocation.  Either no extra
558
                                    //     storage was requested, or it is appended to the end
559
                                    //     of the main UText storage.
560
561
    UTEXT_OPEN = 4                  //  1 if this UText is currently open
562
                                    //  0 if this UText is not open.
563
};
564
565
566
//
567
//  Extended form of a UText.  The purpose is to aid in computing the total size required
568
//    when a provider asks for a UText to be allocated with extra storage.
569
570
struct ExtendedUText {
571
    UText               ut;
572
    std::max_align_t    extension;
573
};
574
575
static const UText emptyText = UTEXT_INITIALIZER;
576
577
U_CAPI UText * U_EXPORT2
578
0
utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status) {
579
0
    if (U_FAILURE(*status)) {
580
0
        return ut;
581
0
    }
582
583
0
    if (ut == NULL) {
584
        // We need to heap-allocate storage for the new UText
585
0
        int32_t spaceRequired = sizeof(UText);
586
0
        if (extraSpace > 0) {
587
0
            spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(std::max_align_t);
588
0
        }
589
0
        ut = (UText *)uprv_malloc(spaceRequired);
590
0
        if (ut == NULL) {
591
0
            *status = U_MEMORY_ALLOCATION_ERROR;
592
0
            return NULL;
593
0
        } else {
594
0
            *ut = emptyText;
595
0
            ut->flags |= UTEXT_HEAP_ALLOCATED;
596
0
            if (spaceRequired>0) {
597
0
                ut->extraSize = extraSpace;
598
0
                ut->pExtra    = &((ExtendedUText *)ut)->extension;
599
0
            }
600
0
        }
601
0
    } else {
602
        // We have been supplied with an already existing UText.
603
        // Verify that it really appears to be a UText.
604
0
        if (ut->magic != UTEXT_MAGIC) {
605
0
            *status = U_ILLEGAL_ARGUMENT_ERROR;
606
0
            return ut;
607
0
        }
608
        // If the ut is already open and there's a provider supplied close
609
        //   function, call it.
610
0
        if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != NULL)  {
611
0
            ut->pFuncs->close(ut);
612
0
        }
613
0
        ut->flags &= ~UTEXT_OPEN;
614
615
        // If extra space was requested by our caller, check whether
616
        //   sufficient already exists, and allocate new if needed.
617
0
        if (extraSpace > ut->extraSize) {
618
            // Need more space.  If there is existing separately allocated space,
619
            //   delete it first, then allocate new space.
620
0
            if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
621
0
                uprv_free(ut->pExtra);
622
0
                ut->extraSize = 0;
623
0
            }
624
0
            ut->pExtra = uprv_malloc(extraSpace);
625
0
            if (ut->pExtra == NULL) {
626
0
                *status = U_MEMORY_ALLOCATION_ERROR;
627
0
            } else {
628
0
                ut->extraSize = extraSpace;
629
0
                ut->flags |= UTEXT_EXTRA_HEAP_ALLOCATED;
630
0
            }
631
0
        }
632
0
    }
633
0
    if (U_SUCCESS(*status)) {
634
0
        ut->flags |= UTEXT_OPEN;
635
636
        // Initialize all remaining fields of the UText.
637
        //
638
0
        ut->context             = NULL;
639
0
        ut->chunkContents       = NULL;
640
0
        ut->p                   = NULL;
641
0
        ut->q                   = NULL;
642
0
        ut->r                   = NULL;
643
0
        ut->a                   = 0;
644
0
        ut->b                   = 0;
645
0
        ut->c                   = 0;
646
0
        ut->chunkOffset         = 0;
647
0
        ut->chunkLength         = 0;
648
0
        ut->chunkNativeStart    = 0;
649
0
        ut->chunkNativeLimit    = 0;
650
0
        ut->nativeIndexingLimit = 0;
651
0
        ut->providerProperties  = 0;
652
0
        ut->privA               = 0;
653
0
        ut->privB               = 0;
654
0
        ut->privC               = 0;
655
0
        ut->privP               = NULL;
656
0
        if (ut->pExtra!=NULL && ut->extraSize>0)
657
0
            uprv_memset(ut->pExtra, 0, ut->extraSize);
658
659
0
    }
660
0
    return ut;
661
0
}
662
663
664
U_CAPI UText * U_EXPORT2
665
0
utext_close(UText *ut) {
666
0
    if (ut==NULL ||
667
0
        ut->magic != UTEXT_MAGIC ||
668
0
        (ut->flags & UTEXT_OPEN) == 0)
669
0
    {
670
        // The supplied ut is not an open UText.
671
        // Do nothing.
672
0
        return ut;
673
0
    }
674
675
    // If the provider gave us a close function, call it now.
676
    // This will clean up anything allocated specifically by the provider.
677
0
    if (ut->pFuncs->close != NULL) {
678
0
        ut->pFuncs->close(ut);
679
0
    }
680
0
    ut->flags &= ~UTEXT_OPEN;
681
682
    // If we (the framework) allocated the UText or subsidiary storage,
683
    //   delete it.
684
0
    if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
685
0
        uprv_free(ut->pExtra);
686
0
        ut->pExtra = NULL;
687
0
        ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED;
688
0
        ut->extraSize = 0;
689
0
    }
690
691
    // Zero out function table of the closed UText.  This is a defensive move,
692
    //   intended to cause applications that inadvertently use a closed
693
    //   utext to crash with null pointer errors.
694
0
    ut->pFuncs        = NULL;
695
696
0
    if (ut->flags & UTEXT_HEAP_ALLOCATED) {
697
        // This UText was allocated by UText setup.  We need to free it.
698
        // Clear magic, so we can detect if the user messes up and immediately
699
        //  tries to reopen another UText using the deleted storage.
700
0
        ut->magic = 0;
701
0
        uprv_free(ut);
702
0
        ut = NULL;
703
0
    }
704
0
    return ut;
705
0
}
706
707
708
709
710
//
711
// invalidateChunk   Reset a chunk to have no contents, so that the next call
712
//                   to access will cause new data to load.
713
//                   This is needed when copy/move/replace operate directly on the
714
//                   backing text, potentially putting it out of sync with the
715
//                   contents in the chunk.
716
//
717
static void
718
0
invalidateChunk(UText *ut) {
719
0
    ut->chunkLength = 0;
720
0
    ut->chunkNativeLimit = 0;
721
0
    ut->chunkNativeStart = 0;
722
0
    ut->chunkOffset = 0;
723
0
    ut->nativeIndexingLimit = 0;
724
0
}
725
726
//
727
// pinIndex        Do range pinning on a native index parameter.
728
//                 64 bit pinning is done in place.
729
//                 32 bit truncated result is returned as a convenience for
730
//                        use in providers that don't need 64 bits.
731
static int32_t
732
0
pinIndex(int64_t &index, int64_t limit) {
733
0
    if (index<0) {
734
0
        index = 0;
735
0
    } else if (index > limit) {
736
0
        index = limit;
737
0
    }
738
0
    return (int32_t)index;
739
0
}
740
741
742
U_CDECL_BEGIN
743
744
//
745
// Pointer relocation function,
746
//   a utility used by shallow clone.
747
//   Adjust a pointer that refers to something within one UText (the source)
748
//   to refer to the same relative offset within a another UText (the target)
749
//
750
0
static void adjustPointer(UText *dest, const void **destPtr, const UText *src) {
751
    // convert all pointers to (char *) so that byte address arithmetic will work.
752
0
    char  *dptr = (char *)*destPtr;
753
0
    char  *dUText = (char *)dest;
754
0
    char  *sUText = (char *)src;
755
756
0
    if (dptr >= (char *)src->pExtra && dptr < ((char*)src->pExtra)+src->extraSize) {
757
        // target ptr was to something within the src UText's pExtra storage.
758
        //   relocate it into the target UText's pExtra region.
759
0
        *destPtr = ((char *)dest->pExtra) + (dptr - (char *)src->pExtra);
760
0
    } else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) {
761
        // target ptr was pointing to somewhere within the source UText itself.
762
        //   Move it to the same offset within the target UText.
763
0
        *destPtr = dUText + (dptr-sUText);
764
0
    }
765
0
}
766
767
768
//
769
//  Clone.  This is a generic copy-the-utext-by-value clone function that can be
770
//          used as-is with some utext types, and as a helper by other clones.
771
//
772
static UText * U_CALLCONV
773
0
shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {
774
0
    if (U_FAILURE(*status)) {
775
0
        return NULL;
776
0
    }
777
0
    int32_t  srcExtraSize = src->extraSize;
778
779
    //
780
    // Use the generic text_setup to allocate storage if required.
781
    //
782
0
    dest = utext_setup(dest, srcExtraSize, status);
783
0
    if (U_FAILURE(*status)) {
784
0
        return dest;
785
0
    }
786
787
    //
788
    //  flags (how the UText was allocated) and the pointer to the
789
    //   extra storage must retain the values in the cloned utext that
790
    //   were set up by utext_setup.  Save them separately before
791
    //   copying the whole struct.
792
    //
793
0
    void *destExtra = dest->pExtra;
794
0
    int32_t flags   = dest->flags;
795
796
797
    //
798
    //  Copy the whole UText struct by value.
799
    //  Any "Extra" storage is copied also.
800
    //
801
0
    int sizeToCopy = src->sizeOfStruct;
802
0
    if (sizeToCopy > dest->sizeOfStruct) {
803
0
        sizeToCopy = dest->sizeOfStruct;
804
0
    }
805
0
    uprv_memcpy(dest, src, sizeToCopy);
806
0
    dest->pExtra = destExtra;
807
0
    dest->flags  = flags;
808
0
    if (srcExtraSize > 0) {
809
0
        uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize);
810
0
    }
811
812
    //
813
    // Relocate any pointers in the target that refer to the UText itself
814
    //   to point to the cloned copy rather than the original source.
815
    //
816
0
    adjustPointer(dest, &dest->context, src);
817
0
    adjustPointer(dest, &dest->p, src);
818
0
    adjustPointer(dest, &dest->q, src);
819
0
    adjustPointer(dest, &dest->r, src);
820
0
    adjustPointer(dest, (const void **)&dest->chunkContents, src);
821
822
    // The newly shallow-cloned UText does _not_ own the underlying storage for the text.
823
    // (The source for the clone may or may not have owned the text.)
824
825
0
    dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
826
827
0
    return dest;
828
0
}
829
830
831
U_CDECL_END
832
833
834
835
//------------------------------------------------------------------------------
836
//
837
//     UText implementation for UTF-8 char * strings (read-only)
838
//     Limitation:  string length must be <= 0x7fffffff in length.
839
//                  (length must for in an int32_t variable)
840
//
841
//         Use of UText data members:
842
//              context    pointer to UTF-8 string
843
//              utext.b    is the input string length (bytes).
844
//              utext.c    Length scanned so far in string
845
//                           (for optimizing finding length of zero terminated strings.)
846
//              utext.p    pointer to the current buffer
847
//              utext.q    pointer to the other buffer.
848
//
849
//------------------------------------------------------------------------------
850
851
// Chunk size.
852
//     Must be less than 85 (256/3), because of byte mapping from UChar indexes to native indexes.
853
//     Worst case is three native bytes to one UChar.  (Supplemenaries are 4 native bytes
854
//     to two UChars.)
855
//     The longest illegal byte sequence treated as a single error (and converted to U+FFFD)
856
//     is a three-byte sequence (truncated four-byte sequence).
857
//
858
enum { UTF8_TEXT_CHUNK_SIZE=32 };
859
860
//
861
// UTF8Buf  Two of these structs will be set up in the UText's extra allocated space.
862
//          Each contains the UChar chunk buffer, the to and from native maps, and
863
//          header info.
864
//
865
//     because backwards iteration fills the buffers starting at the end and
866
//     working towards the front, the filled part of the buffers may not begin
867
//     at the start of the available storage for the buffers.
868
//
869
//     Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for
870
//     the last character added being a supplementary, and thus requiring a surrogate
871
//     pair.  Doing this is simpler than checking for the edge case.
872
//
873
874
struct UTF8Buf {
875
    int32_t   bufNativeStart;                        // Native index of first char in UChar buf
876
    int32_t   bufNativeLimit;                        // Native index following last char in buf.
877
    int32_t   bufStartIdx;                           // First filled position in buf.
878
    int32_t   bufLimitIdx;                           // Limit of filled range in buf.
879
    int32_t   bufNILimit;                            // Limit of native indexing part of buf
880
    int32_t   toUCharsMapStart;                      // Native index corresponding to
881
                                                     //   mapToUChars[0].
882
                                                     //   Set to bufNativeStart when filling forwards.
883
                                                     //   Set to computed value when filling backwards.
884
885
    UChar     buf[UTF8_TEXT_CHUNK_SIZE+4];           // The UChar buffer.  Requires one extra position beyond the
886
                                                     //   the chunk size, to allow for surrogate at the end.
887
                                                     //   Length must be identical to mapToNative array, below,
888
                                                     //   because of the way indexing works when the array is
889
                                                     //   filled backwards during a reverse iteration.  Thus,
890
                                                     //   the additional extra size.
891
    uint8_t   mapToNative[UTF8_TEXT_CHUNK_SIZE+4];   // map UChar index in buf to
892
                                                     //  native offset from bufNativeStart.
893
                                                     //  Requires two extra slots,
894
                                                     //    one for a supplementary starting in the last normal position,
895
                                                     //    and one for an entry for the buffer limit position.
896
    uint8_t   mapToUChars[UTF8_TEXT_CHUNK_SIZE*3+6]; // Map native offset from bufNativeStart to
897
                                                     //   corresponding offset in filled part of buf.
898
    int32_t   align;
899
};
900
901
U_CDECL_BEGIN
902
903
//
904
//   utf8TextLength
905
//
906
//        Get the length of the string.  If we don't already know it,
907
//              we'll need to scan for the trailing  nul.
908
//
909
static int64_t U_CALLCONV
910
0
utf8TextLength(UText *ut) {
911
0
    if (ut->b < 0) {
912
        // Zero terminated string, and we haven't scanned to the end yet.
913
        // Scan it now.
914
0
        const char *r = (const char *)ut->context + ut->c;
915
0
        while (*r != 0) {
916
0
            r++;
917
0
        }
918
0
        if ((r - (const char *)ut->context) < 0x7fffffff) {
919
0
            ut->b = (int32_t)(r - (const char *)ut->context);
920
0
        } else {
921
            // Actual string was bigger (more than 2 gig) than we
922
            //   can handle.  Clip it to 2 GB.
923
0
            ut->b = 0x7fffffff;
924
0
        }
925
0
        ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
926
0
    }
927
0
    return ut->b;
928
0
}
929
930
931
932
933
934
935
static UBool U_CALLCONV
936
0
utf8TextAccess(UText *ut, int64_t index, UBool forward) {
937
    //
938
    //  Apologies to those who are allergic to goto statements.
939
    //    Consider each goto to a labelled block to be the equivalent of
940
    //         call the named block as if it were a function();
941
    //         return;
942
    //
943
0
    const uint8_t *s8=(const uint8_t *)ut->context;
944
0
    UTF8Buf *u8b = NULL;
945
0
    int32_t  length = ut->b;         // Length of original utf-8
946
0
    int32_t  ix= (int32_t)index;     // Requested index, trimmed to 32 bits.
947
0
    int32_t  mapIndex = 0;
948
0
    if (index<0) {
949
0
        ix=0;
950
0
    } else if (index > 0x7fffffff) {
951
        // Strings with 64 bit lengths not supported by this UTF-8 provider.
952
0
        ix = 0x7fffffff;
953
0
    }
954
955
    // Pin requested index to the string length.
956
0
    if (ix>length) {
957
0
        if (length>=0) {
958
0
            ix=length;
959
0
        } else if (ix>=ut->c) {
960
            // Zero terminated string, and requested index is beyond
961
            //   the region that has already been scanned.
962
            //   Scan up to either the end of the string or to the
963
            //   requested position, whichever comes first.
964
0
            while (ut->c<ix && s8[ut->c]!=0) {
965
0
                ut->c++;
966
0
            }
967
            //  TODO:  support for null terminated string length > 32 bits.
968
0
            if (s8[ut->c] == 0) {
969
                // We just found the actual length of the string.
970
                //  Trim the requested index back to that.
971
0
                ix     = ut->c;
972
0
                ut->b  = ut->c;
973
0
                length = ut->c;
974
0
                ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
975
0
            }
976
0
        }
977
0
    }
978
979
    //
980
    // Dispatch to the appropriate action for a forward iteration request.
981
    //
982
0
    if (forward) {
983
0
        if (ix==ut->chunkNativeLimit) {
984
            // Check for normal sequential iteration cases first.
985
0
            if (ix==length) {
986
                // Just reached end of string
987
                // Don't swap buffers, but do set the
988
                //   current buffer position.
989
0
                ut->chunkOffset = ut->chunkLength;
990
0
                return FALSE;
991
0
            } else {
992
                // End of current buffer.
993
                //   check whether other buffer already has what we need.
994
0
                UTF8Buf *altB = (UTF8Buf *)ut->q;
995
0
                if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) {
996
0
                    goto swapBuffers;
997
0
                }
998
0
            }
999
0
        }
1000
1001
        // A random access.  Desired index could be in either or niether buf.
1002
        // For optimizing the order of testing, first check for the index
1003
        //    being in the other buffer.  This will be the case for uses that
1004
        //    move back and forth over a fairly limited range
1005
0
        {
1006
0
            u8b = (UTF8Buf *)ut->q;   // the alternate buffer
1007
0
            if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) {
1008
                // Requested index is in the other buffer.
1009
0
                goto swapBuffers;
1010
0
            }
1011
0
            if (ix == length) {
1012
                // Requested index is end-of-string.
1013
                //   (this is the case of randomly seeking to the end.
1014
                //    The case of iterating off the end is handled earlier.)
1015
0
                if (ix == ut->chunkNativeLimit) {
1016
                    // Current buffer extends up to the end of the string.
1017
                    //   Leave it as the current buffer.
1018
0
                    ut->chunkOffset = ut->chunkLength;
1019
0
                    return FALSE;
1020
0
                }
1021
0
                if (ix == u8b->bufNativeLimit) {
1022
                    // Alternate buffer extends to the end of string.
1023
                    //   Swap it in as the current buffer.
1024
0
                    goto swapBuffersAndFail;
1025
0
                }
1026
1027
                // Neither existing buffer extends to the end of the string.
1028
0
                goto makeStubBuffer;
1029
0
            }
1030
1031
0
            if (ix<ut->chunkNativeStart || ix>=ut->chunkNativeLimit) {
1032
                // Requested index is in neither buffer.
1033
0
                goto fillForward;
1034
0
            }
1035
1036
            // Requested index is in this buffer.
1037
0
            u8b = (UTF8Buf *)ut->p;   // the current buffer
1038
0
            mapIndex = ix - u8b->toUCharsMapStart;
1039
0
            U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
1040
0
            ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
1041
0
            return TRUE;
1042
1043
0
        }
1044
0
    }
1045
1046
1047
    //
1048
    // Dispatch to the appropriate action for a
1049
    //   Backwards Direction iteration request.
1050
    //
1051
0
    if (ix==ut->chunkNativeStart) {
1052
        // Check for normal sequential iteration cases first.
1053
0
        if (ix==0) {
1054
            // Just reached the start of string
1055
            // Don't swap buffers, but do set the
1056
            //   current buffer position.
1057
0
            ut->chunkOffset = 0;
1058
0
            return FALSE;
1059
0
        } else {
1060
            // Start of current buffer.
1061
            //   check whether other buffer already has what we need.
1062
0
            UTF8Buf *altB = (UTF8Buf *)ut->q;
1063
0
            if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) {
1064
0
                goto swapBuffers;
1065
0
            }
1066
0
        }
1067
0
    }
1068
1069
    // A random access.  Desired index could be in either or niether buf.
1070
    // For optimizing the order of testing,
1071
    //    Most likely case:  in the other buffer.
1072
    //    Second most likely: in neither buffer.
1073
    //    Unlikely, but must work:  in the current buffer.
1074
0
    u8b = (UTF8Buf *)ut->q;   // the alternate buffer
1075
0
    if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) {
1076
        // Requested index is in the other buffer.
1077
0
        goto swapBuffers;
1078
0
    }
1079
    // Requested index is start-of-string.
1080
    //   (this is the case of randomly seeking to the start.
1081
    //    The case of iterating off the start is handled earlier.)
1082
0
    if (ix==0) {
1083
0
        if (u8b->bufNativeStart==0) {
1084
            // Alternate buffer contains the data for the start string.
1085
            // Make it be the current buffer.
1086
0
            goto swapBuffersAndFail;
1087
0
        } else {
1088
            // Request for data before the start of string,
1089
            //   neither buffer is usable.
1090
            //   set up a zero-length buffer.
1091
0
            goto makeStubBuffer;
1092
0
        }
1093
0
    }
1094
1095
0
    if (ix<=ut->chunkNativeStart || ix>ut->chunkNativeLimit) {
1096
        // Requested index is in neither buffer.
1097
0
        goto fillReverse;
1098
0
    }
1099
1100
    // Requested index is in this buffer.
1101
    //   Set the utf16 buffer index.
1102
0
    u8b = (UTF8Buf *)ut->p;
1103
0
    mapIndex = ix - u8b->toUCharsMapStart;
1104
0
    ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
1105
0
    if (ut->chunkOffset==0) {
1106
        // This occurs when the first character in the text is
1107
        //   a multi-byte UTF-8 char, and the requested index is to
1108
        //   one of the trailing bytes.  Because there is no preceding ,
1109
        //   character, this access fails.  We can't pick up on the
1110
        //   situation sooner because the requested index is not zero.
1111
0
        return FALSE;
1112
0
    } else {
1113
0
        return TRUE;
1114
0
    }
1115
1116
1117
1118
0
swapBuffers:
1119
    //  The alternate buffer (ut->q) has the string data that was requested.
1120
    //  Swap the primary and alternate buffers, and set the
1121
    //   chunk index into the new primary buffer.
1122
0
    {
1123
0
        u8b   = (UTF8Buf *)ut->q;
1124
0
        ut->q = ut->p;
1125
0
        ut->p = u8b;
1126
0
        ut->chunkContents       = &u8b->buf[u8b->bufStartIdx];
1127
0
        ut->chunkLength         = u8b->bufLimitIdx - u8b->bufStartIdx;
1128
0
        ut->chunkNativeStart    = u8b->bufNativeStart;
1129
0
        ut->chunkNativeLimit    = u8b->bufNativeLimit;
1130
0
        ut->nativeIndexingLimit = u8b->bufNILimit;
1131
1132
        // Index into the (now current) chunk
1133
        // Use the map to set the chunk index.  It's more trouble than it's worth
1134
        //    to check whether native indexing can be used.
1135
0
        U_ASSERT(ix>=u8b->bufNativeStart);
1136
0
        U_ASSERT(ix<=u8b->bufNativeLimit);
1137
0
        mapIndex = ix - u8b->toUCharsMapStart;
1138
0
        U_ASSERT(mapIndex>=0);
1139
0
        U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars));
1140
0
        ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
1141
1142
0
        return TRUE;
1143
0
    }
1144
1145
1146
0
 swapBuffersAndFail:
1147
    // We got a request for either the start or end of the string,
1148
    //  with iteration continuing in the out-of-bounds direction.
1149
    // The alternate buffer already contains the data up to the
1150
    //  start/end.
1151
    // Swap the buffers, then return failure, indicating that we couldn't
1152
    //  make things correct for continuing the iteration in the requested
1153
    //  direction.  The position & buffer are correct should the
1154
    //  user decide to iterate in the opposite direction.
1155
0
    u8b   = (UTF8Buf *)ut->q;
1156
0
    ut->q = ut->p;
1157
0
    ut->p = u8b;
1158
0
    ut->chunkContents       = &u8b->buf[u8b->bufStartIdx];
1159
0
    ut->chunkLength         = u8b->bufLimitIdx - u8b->bufStartIdx;
1160
0
    ut->chunkNativeStart    = u8b->bufNativeStart;
1161
0
    ut->chunkNativeLimit    = u8b->bufNativeLimit;
1162
0
    ut->nativeIndexingLimit = u8b->bufNILimit;
1163
1164
    // Index into the (now current) chunk
1165
    //  For this function  (swapBuffersAndFail), the requested index
1166
    //    will always be at either the start or end of the chunk.
1167
0
    if (ix==u8b->bufNativeLimit) {
1168
0
        ut->chunkOffset = ut->chunkLength;
1169
0
    } else  {
1170
0
        ut->chunkOffset = 0;
1171
0
        U_ASSERT(ix == u8b->bufNativeStart);
1172
0
    }
1173
0
    return FALSE;
1174
1175
0
makeStubBuffer:
1176
    //   The user has done a seek/access past the start or end
1177
    //   of the string.  Rather than loading data that is likely
1178
    //   to never be used, just set up a zero-length buffer at
1179
    //   the position.
1180
0
    u8b = (UTF8Buf *)ut->q;
1181
0
    u8b->bufNativeStart   = ix;
1182
0
    u8b->bufNativeLimit   = ix;
1183
0
    u8b->bufStartIdx      = 0;
1184
0
    u8b->bufLimitIdx      = 0;
1185
0
    u8b->bufNILimit       = 0;
1186
0
    u8b->toUCharsMapStart = ix;
1187
0
    u8b->mapToNative[0]   = 0;
1188
0
    u8b->mapToUChars[0]   = 0;
1189
0
    goto swapBuffersAndFail;
1190
1191
1192
1193
0
fillForward:
1194
0
    {
1195
        // Move the incoming index to a code point boundary.
1196
0
        U8_SET_CP_START(s8, 0, ix);
1197
1198
        // Swap the UText buffers.
1199
        //  We want to fill what was previously the alternate buffer,
1200
        //  and make what was the current buffer be the new alternate.
1201
0
        UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
1202
0
        ut->q = ut->p;
1203
0
        ut->p = u8b_swap;
1204
1205
0
        int32_t strLen = ut->b;
1206
0
        UBool   nulTerminated = FALSE;
1207
0
        if (strLen < 0) {
1208
0
            strLen = 0x7fffffff;
1209
0
            nulTerminated = TRUE;
1210
0
        }
1211
1212
0
        UChar   *buf = u8b_swap->buf;
1213
0
        uint8_t *mapToNative  = u8b_swap->mapToNative;
1214
0
        uint8_t *mapToUChars  = u8b_swap->mapToUChars;
1215
0
        int32_t  destIx       = 0;
1216
0
        int32_t  srcIx        = ix;
1217
0
        UBool    seenNonAscii = FALSE;
1218
0
        UChar32  c = 0;
1219
1220
        // Fill the chunk buffer and mapping arrays.
1221
0
        while (destIx<UTF8_TEXT_CHUNK_SIZE) {
1222
0
            c = s8[srcIx];
1223
0
            if (c>0 && c<0x80) {
1224
                // Special case ASCII range for speed.
1225
                //   zero is excluded to simplify bounds checking.
1226
0
                buf[destIx] = (UChar)c;
1227
0
                mapToNative[destIx]    = (uint8_t)(srcIx - ix);
1228
0
                mapToUChars[srcIx-ix]  = (uint8_t)destIx;
1229
0
                srcIx++;
1230
0
                destIx++;
1231
0
            } else {
1232
                // General case, handle everything.
1233
0
                if (seenNonAscii == FALSE) {
1234
0
                    seenNonAscii = TRUE;
1235
0
                    u8b_swap->bufNILimit = destIx;
1236
0
                }
1237
1238
0
                int32_t  cIx      = srcIx;
1239
0
                int32_t  dIx      = destIx;
1240
0
                int32_t  dIxSaved = destIx;
1241
0
                U8_NEXT_OR_FFFD(s8, srcIx, strLen, c);
1242
0
                if (c==0 && nulTerminated) {
1243
0
                    srcIx--;
1244
0
                    break;
1245
0
                }
1246
1247
0
                U16_APPEND_UNSAFE(buf, destIx, c);
1248
0
                do {
1249
0
                    mapToNative[dIx++] = (uint8_t)(cIx - ix);
1250
0
                } while (dIx < destIx);
1251
1252
0
                do {
1253
0
                    mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;
1254
0
                } while (cIx < srcIx);
1255
0
            }
1256
0
            if (srcIx>=strLen) {
1257
0
                break;
1258
0
            }
1259
1260
0
        }
1261
1262
        //  store Native <--> Chunk Map entries for the end of the buffer.
1263
        //    There is no actual character here, but the index position is valid.
1264
0
        mapToNative[destIx]     = (uint8_t)(srcIx - ix);
1265
0
        mapToUChars[srcIx - ix] = (uint8_t)destIx;
1266
1267
        //  fill in Buffer descriptor
1268
0
        u8b_swap->bufNativeStart     = ix;
1269
0
        u8b_swap->bufNativeLimit     = srcIx;
1270
0
        u8b_swap->bufStartIdx        = 0;
1271
0
        u8b_swap->bufLimitIdx        = destIx;
1272
0
        if (seenNonAscii == FALSE) {
1273
0
            u8b_swap->bufNILimit     = destIx;
1274
0
        }
1275
0
        u8b_swap->toUCharsMapStart   = u8b_swap->bufNativeStart;
1276
1277
        // Set UText chunk to refer to this buffer.
1278
0
        ut->chunkContents       = buf;
1279
0
        ut->chunkOffset         = 0;
1280
0
        ut->chunkLength         = u8b_swap->bufLimitIdx;
1281
0
        ut->chunkNativeStart    = u8b_swap->bufNativeStart;
1282
0
        ut->chunkNativeLimit    = u8b_swap->bufNativeLimit;
1283
0
        ut->nativeIndexingLimit = u8b_swap->bufNILimit;
1284
1285
        // For zero terminated strings, keep track of the maximum point
1286
        //   scanned so far.
1287
0
        if (nulTerminated && srcIx>ut->c) {
1288
0
            ut->c = srcIx;
1289
0
            if (c==0) {
1290
                // We scanned to the end.
1291
                //   Remember the actual length.
1292
0
                ut->b = srcIx;
1293
0
                ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
1294
0
            }
1295
0
        }
1296
0
        return TRUE;
1297
0
    }
1298
1299
1300
0
fillReverse:
1301
0
    {
1302
        // Move the incoming index to a code point boundary.
1303
        // Can only do this if the incoming index is somewhere in the interior of the string.
1304
        //   If index is at the end, there is no character there to look at.
1305
0
        if (ix != ut->b) {
1306
            // Note: this function will only move the index back if it is on a trail byte
1307
            //       and there is a preceding lead byte and the sequence from the lead 
1308
            //       through this trail could be part of a valid UTF-8 sequence
1309
            //       Otherwise the index remains unchanged.
1310
0
            U8_SET_CP_START(s8, 0, ix);
1311
0
        }
1312
1313
        // Swap the UText buffers.
1314
        //  We want to fill what was previously the alternate buffer,
1315
        //  and make what was the current buffer be the new alternate.
1316
0
        UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
1317
0
        ut->q = ut->p;
1318
0
        ut->p = u8b_swap;
1319
1320
0
        UChar   *buf = u8b_swap->buf;
1321
0
        uint8_t *mapToNative = u8b_swap->mapToNative;
1322
0
        uint8_t *mapToUChars = u8b_swap->mapToUChars;
1323
0
        int32_t  toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
1324
        // Note that toUCharsMapStart can be negative. Happens when the remaining
1325
        // text from current position to the beginning is less than the buffer size.
1326
        // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
1327
0
        int32_t  destIx = UTF8_TEXT_CHUNK_SIZE+2;   // Start in the overflow region
1328
                                                    //   at end of buffer to leave room
1329
                                                    //   for a surrogate pair at the
1330
                                                    //   buffer start.
1331
0
        int32_t  srcIx  = ix;
1332
0
        int32_t  bufNILimit = destIx;
1333
0
        UChar32   c;
1334
1335
        // Map to/from Native Indexes, fill in for the position at the end of
1336
        //   the buffer.
1337
        //
1338
0
        mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1339
0
        mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
1340
1341
        // Fill the chunk buffer
1342
        // Work backwards, filling from the end of the buffer towards the front.
1343
        //
1344
0
        while (destIx>2 && (srcIx - toUCharsMapStart > 5) && (srcIx > 0)) {
1345
0
            srcIx--;
1346
0
            destIx--;
1347
1348
            // Get last byte of the UTF-8 character
1349
0
            c = s8[srcIx];
1350
0
            if (c<0x80) {
1351
                // Special case ASCII range for speed.
1352
0
                buf[destIx] = (UChar)c;
1353
0
                U_ASSERT(toUCharsMapStart <= srcIx);
1354
0
                mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
1355
0
                mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1356
0
            } else {
1357
                // General case, handle everything non-ASCII.
1358
1359
0
                int32_t  sIx      = srcIx;  // ix of last byte of multi-byte u8 char
1360
1361
                // Get the full character from the UTF8 string.
1362
                //   use code derived from the macros in utf8.h
1363
                //   Leaves srcIx pointing at the first byte of the UTF-8 char.
1364
                //
1365
0
                c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -3);
1366
                // leaves srcIx at first byte of the multi-byte char.
1367
1368
                // Store the character in UTF-16 buffer.
1369
0
                if (c<0x10000) {
1370
0
                    buf[destIx] = (UChar)c;
1371
0
                    mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1372
0
                } else {
1373
0
                    buf[destIx]         = U16_TRAIL(c);
1374
0
                    mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1375
0
                    buf[--destIx]       = U16_LEAD(c);
1376
0
                    mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1377
0
                }
1378
1379
                // Fill in the map from native indexes to UChars buf index.
1380
0
                do {
1381
0
                    mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
1382
0
                } while (sIx >= srcIx);
1383
0
                U_ASSERT(toUCharsMapStart <= (srcIx+1));
1384
1385
                // Set native indexing limit to be the current position.
1386
                //   We are processing a non-ascii, non-native-indexing char now;
1387
                //     the limit will be here if the rest of the chars to be
1388
                //     added to this buffer are ascii.
1389
0
                bufNILimit = destIx;
1390
0
            }
1391
0
        }
1392
0
        u8b_swap->bufNativeStart     = srcIx;
1393
0
        u8b_swap->bufNativeLimit     = ix;
1394
0
        u8b_swap->bufStartIdx        = destIx;
1395
0
        u8b_swap->bufLimitIdx        = UTF8_TEXT_CHUNK_SIZE+2;
1396
0
        u8b_swap->bufNILimit         = bufNILimit - u8b_swap->bufStartIdx;
1397
0
        u8b_swap->toUCharsMapStart   = toUCharsMapStart;
1398
1399
0
        ut->chunkContents       = &buf[u8b_swap->bufStartIdx];
1400
0
        ut->chunkLength         = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx;
1401
0
        ut->chunkOffset         = ut->chunkLength;
1402
0
        ut->chunkNativeStart    = u8b_swap->bufNativeStart;
1403
0
        ut->chunkNativeLimit    = u8b_swap->bufNativeLimit;
1404
0
        ut->nativeIndexingLimit = u8b_swap->bufNILimit;
1405
0
        return TRUE;
1406
0
    }
1407
1408
0
}
1409
1410
1411
1412
//
1413
//  This is a slightly modified copy of u_strFromUTF8,
1414
//     Inserts a Replacement Char rather than failing on invalid UTF-8
1415
//     Removes unnecessary features.
1416
//
1417
static UChar*
1418
utext_strFromUTF8(UChar *dest,
1419
              int32_t destCapacity,
1420
              int32_t *pDestLength,
1421
              const char* src,
1422
              int32_t srcLength,        // required.  NUL terminated not supported.
1423
              UErrorCode *pErrorCode
1424
              )
1425
0
{
1426
1427
0
    UChar *pDest = dest;
1428
0
    UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
1429
0
    UChar32 ch=0;
1430
0
    int32_t index = 0;
1431
0
    int32_t reqLength = 0;
1432
0
    uint8_t* pSrc = (uint8_t*) src;
1433
1434
1435
0
    while((index < srcLength)&&(pDest<pDestLimit)){
1436
0
        ch = pSrc[index++];
1437
0
        if(ch <=0x7f){
1438
0
            *pDest++=(UChar)ch;
1439
0
        }else{
1440
0
            ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
1441
0
            if(U_IS_BMP(ch)){
1442
0
                *(pDest++)=(UChar)ch;
1443
0
            }else{
1444
0
                *(pDest++)=U16_LEAD(ch);
1445
0
                if(pDest<pDestLimit){
1446
0
                    *(pDest++)=U16_TRAIL(ch);
1447
0
                }else{
1448
0
                    reqLength++;
1449
0
                    break;
1450
0
                }
1451
0
            }
1452
0
        }
1453
0
    }
1454
    /* donot fill the dest buffer just count the UChars needed */
1455
0
    while(index < srcLength){
1456
0
        ch = pSrc[index++];
1457
0
        if(ch <= 0x7f){
1458
0
            reqLength++;
1459
0
        }else{
1460
0
            ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
1461
0
            reqLength+=U16_LENGTH(ch);
1462
0
        }
1463
0
    }
1464
1465
0
    reqLength+=(int32_t)(pDest - dest);
1466
1467
0
    if(pDestLength){
1468
0
        *pDestLength = reqLength;
1469
0
    }
1470
1471
    /* Terminate the buffer */
1472
0
    u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
1473
1474
0
    return dest;
1475
0
}
1476
1477
1478
1479
static int32_t U_CALLCONV
1480
utf8TextExtract(UText *ut,
1481
                int64_t start, int64_t limit,
1482
                UChar *dest, int32_t destCapacity,
1483
0
                UErrorCode *pErrorCode) {
1484
0
    if(U_FAILURE(*pErrorCode)) {
1485
0
        return 0;
1486
0
    }
1487
0
    if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
1488
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1489
0
        return 0;
1490
0
    }
1491
0
    int32_t  length  = ut->b;
1492
0
    int32_t  start32 = pinIndex(start, length);
1493
0
    int32_t  limit32 = pinIndex(limit, length);
1494
1495
0
    if(start32>limit32) {
1496
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1497
0
        return 0;
1498
0
    }
1499
1500
1501
    // adjust the incoming indexes to land on code point boundaries if needed.
1502
    //    adjust by no more than three, because that is the largest number of trail bytes
1503
    //    in a well formed UTF8 character.
1504
0
    const uint8_t *buf = (const uint8_t *)ut->context;
1505
0
    int i;
1506
0
    if (start32 < ut->chunkNativeLimit) {
1507
0
        for (i=0; i<3; i++) {
1508
0
            if (U8_IS_SINGLE(buf[start32]) || U8_IS_LEAD(buf[start32]) || start32==0) {
1509
0
                break;
1510
0
            }
1511
0
            start32--;
1512
0
        }
1513
0
    }
1514
1515
0
    if (limit32 < ut->chunkNativeLimit) {
1516
0
        for (i=0; i<3; i++) {
1517
0
            if (U8_IS_SINGLE(buf[limit32]) || U8_IS_LEAD(buf[limit32]) || limit32==0) {
1518
0
                break;
1519
0
            }
1520
0
            limit32--;
1521
0
        }
1522
0
    }
1523
1524
    // Do the actual extract.
1525
0
    int32_t destLength=0;
1526
0
    utext_strFromUTF8(dest, destCapacity, &destLength,
1527
0
                    (const char *)ut->context+start32, limit32-start32,
1528
0
                    pErrorCode);
1529
0
    utf8TextAccess(ut, limit32, TRUE);
1530
0
    return destLength;
1531
0
}
1532
1533
//
1534
// utf8TextMapOffsetToNative
1535
//
1536
// Map a chunk (UTF-16) offset to a native index.
1537
static int64_t U_CALLCONV
1538
0
utf8TextMapOffsetToNative(const UText *ut) {
1539
    //
1540
0
    UTF8Buf *u8b = (UTF8Buf *)ut->p;
1541
0
    U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength);
1542
0
    int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart;
1543
0
    U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit);
1544
0
    return nativeOffset;
1545
0
}
1546
1547
//
1548
// Map a native index to the corresponding chunk offset
1549
//
1550
static int32_t U_CALLCONV
1551
0
utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
1552
0
    U_ASSERT(index64 <= 0x7fffffff);
1553
0
    int32_t index = (int32_t)index64;
1554
0
    UTF8Buf *u8b = (UTF8Buf *)ut->p;
1555
0
    U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
1556
0
    U_ASSERT(index<=ut->chunkNativeLimit);
1557
0
    int32_t mapIndex = index - u8b->toUCharsMapStart;
1558
0
    U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
1559
0
    int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
1560
0
    U_ASSERT(offset>=0 && offset<=ut->chunkLength);
1561
0
    return offset;
1562
0
}
1563
1564
static UText * U_CALLCONV
1565
utf8TextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
1566
0
{
1567
    // First do a generic shallow clone.  Does everything needed for the UText struct itself.
1568
0
    dest = shallowTextClone(dest, src, status);
1569
1570
    // For deep clones, make a copy of the string.
1571
    //  The copied storage is owned by the newly created clone.
1572
    //
1573
    // TODO:  There is an issue with using utext_nativeLength().
1574
    //        That function is non-const in cases where the input was NUL terminated
1575
    //          and the length has not yet been determined.
1576
    //        This function (clone()) is const.
1577
    //        There potentially a thread safety issue lurking here.
1578
    //
1579
0
    if (deep && U_SUCCESS(*status)) {
1580
0
        int32_t  len = (int32_t)utext_nativeLength((UText *)src);
1581
0
        char *copyStr = (char *)uprv_malloc(len+1);
1582
0
        if (copyStr == NULL) {
1583
0
            *status = U_MEMORY_ALLOCATION_ERROR;
1584
0
        } else {
1585
0
            uprv_memcpy(copyStr, src->context, len+1);
1586
0
            dest->context = copyStr;
1587
0
            dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
1588
0
        }
1589
0
    }
1590
0
    return dest;
1591
0
}
1592
1593
1594
static void U_CALLCONV
1595
0
utf8TextClose(UText *ut) {
1596
    // Most of the work of close is done by the generic UText framework close.
1597
    // All that needs to be done here is to delete the UTF8 string if the UText
1598
    //  owns it.  This occurs if the UText was created by cloning.
1599
0
    if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
1600
0
        char *s = (char *)ut->context;
1601
0
        uprv_free(s);
1602
0
        ut->context = NULL;
1603
0
    }
1604
0
}
1605
1606
U_CDECL_END
1607
1608
1609
static const struct UTextFuncs utf8Funcs =
1610
{
1611
    sizeof(UTextFuncs),
1612
    0, 0, 0,             // Reserved alignment padding
1613
    utf8TextClone,
1614
    utf8TextLength,
1615
    utf8TextAccess,
1616
    utf8TextExtract,
1617
    NULL,                /* replace*/
1618
    NULL,                /* copy   */
1619
    utf8TextMapOffsetToNative,
1620
    utf8TextMapIndexToUTF16,
1621
    utf8TextClose,
1622
    NULL,                // spare 1
1623
    NULL,                // spare 2
1624
    NULL                 // spare 3
1625
};
1626
1627
1628
static const char gEmptyString[] = {0};
1629
1630
U_CAPI UText * U_EXPORT2
1631
0
utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status) {
1632
0
    if(U_FAILURE(*status)) {
1633
0
        return NULL;
1634
0
    }
1635
0
    if(s==NULL && length==0) {
1636
0
        s = gEmptyString;
1637
0
    }
1638
1639
0
    if(s==NULL || length<-1 || length>INT32_MAX) {
1640
0
        *status=U_ILLEGAL_ARGUMENT_ERROR;
1641
0
        return NULL;
1642
0
    }
1643
1644
0
    ut = utext_setup(ut, sizeof(UTF8Buf) * 2, status);
1645
0
    if (U_FAILURE(*status)) {
1646
0
        return ut;
1647
0
    }
1648
1649
0
    ut->pFuncs  = &utf8Funcs;
1650
0
    ut->context = s;
1651
0
    ut->b       = (int32_t)length;
1652
0
    ut->c       = (int32_t)length;
1653
0
    if (ut->c < 0) {
1654
0
        ut->c = 0;
1655
0
        ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
1656
0
    }
1657
0
    ut->p = ut->pExtra;
1658
0
    ut->q = (char *)ut->pExtra + sizeof(UTF8Buf);
1659
0
    return ut;
1660
1661
0
}
1662
1663
1664
1665
1666
1667
1668
1669
1670
//------------------------------------------------------------------------------
1671
//
1672
//     UText implementation wrapper for Replaceable (read/write)
1673
//
1674
//         Use of UText data members:
1675
//            context    pointer to Replaceable.
1676
//            p          pointer to Replaceable if it is owned by the UText.
1677
//
1678
//------------------------------------------------------------------------------
1679
1680
1681
1682
// minimum chunk size for this implementation: 3
1683
// to allow for possible trimming for code point boundaries
1684
enum { REP_TEXT_CHUNK_SIZE=10 };
1685
1686
struct ReplExtra {
1687
    /*
1688
     * Chunk UChars.
1689
     * +1 to simplify filling with surrogate pair at the end.
1690
     */
1691
    UChar s[REP_TEXT_CHUNK_SIZE+1];
1692
};
1693
1694
1695
U_CDECL_BEGIN
1696
1697
static UText * U_CALLCONV
1698
0
repTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
1699
    // First do a generic shallow clone.  Does everything needed for the UText struct itself.
1700
0
    dest = shallowTextClone(dest, src, status);
1701
1702
    // For deep clones, make a copy of the Replaceable.
1703
    //  The copied Replaceable storage is owned by the newly created UText clone.
1704
    //  A non-NULL pointer in UText.p is the signal to the close() function to delete
1705
    //    it.
1706
    //
1707
0
    if (deep && U_SUCCESS(*status)) {
1708
0
        const Replaceable *replSrc = (const Replaceable *)src->context;
1709
0
        dest->context = replSrc->clone();
1710
0
        dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
1711
1712
        // with deep clone, the copy is writable, even when the source is not.
1713
0
        dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
1714
0
    }
1715
0
    return dest;
1716
0
}
1717
1718
1719
static void U_CALLCONV
1720
0
repTextClose(UText *ut) {
1721
    // Most of the work of close is done by the generic UText framework close.
1722
    // All that needs to be done here is delete the Replaceable if the UText
1723
    //  owns it.  This occurs if the UText was created by cloning.
1724
0
    if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
1725
0
        Replaceable *rep = (Replaceable *)ut->context;
1726
0
        delete rep;
1727
0
        ut->context = NULL;
1728
0
    }
1729
0
}
1730
1731
1732
static int64_t U_CALLCONV
1733
0
repTextLength(UText *ut) {
1734
0
    const Replaceable *replSrc = (const Replaceable *)ut->context;
1735
0
    int32_t  len = replSrc->length();
1736
0
    return len;
1737
0
}
1738
1739
1740
static UBool U_CALLCONV
1741
0
repTextAccess(UText *ut, int64_t index, UBool forward) {
1742
0
    const Replaceable *rep=(const Replaceable *)ut->context;
1743
0
    int32_t length=rep->length();   // Full length of the input text (bigger than a chunk)
1744
1745
    // clip the requested index to the limits of the text.
1746
0
    int32_t index32 = pinIndex(index, length);
1747
0
    U_ASSERT(index<=INT32_MAX);
1748
1749
1750
    /*
1751
     * Compute start/limit boundaries around index, for a segment of text
1752
     * to be extracted.
1753
     * To allow for the possibility that our user gave an index to the trailing
1754
     * half of a surrogate pair, we must request one extra preceding UChar when
1755
     * going in the forward direction.  This will ensure that the buffer has the
1756
     * entire code point at the specified index.
1757
     */
1758
0
    if(forward) {
1759
1760
0
        if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) {
1761
            // Buffer already contains the requested position.
1762
0
            ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
1763
0
            return TRUE;
1764
0
        }
1765
0
        if (index32>=length && ut->chunkNativeLimit==length) {
1766
            // Request for end of string, and buffer already extends up to it.
1767
            // Can't get the data, but don't change the buffer.
1768
0
            ut->chunkOffset = length - (int32_t)ut->chunkNativeStart;
1769
0
            return FALSE;
1770
0
        }
1771
1772
0
        ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - 1;
1773
        // Going forward, so we want to have the buffer with stuff at and beyond
1774
        //   the requested index.  The -1 gets us one code point before the
1775
        //   requested index also, to handle the case of the index being on
1776
        //   a trail surrogate of a surrogate pair.
1777
0
        if(ut->chunkNativeLimit > length) {
1778
0
            ut->chunkNativeLimit = length;
1779
0
        }
1780
        // unless buffer ran off end, start is index-1.
1781
0
        ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE;
1782
0
        if(ut->chunkNativeStart < 0) {
1783
0
            ut->chunkNativeStart = 0;
1784
0
        }
1785
0
    } else {
1786
        // Reverse iteration.  Fill buffer with data preceding the requested index.
1787
0
        if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) {
1788
            // Requested position already in buffer.
1789
0
            ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart;
1790
0
            return TRUE;
1791
0
        }
1792
0
        if (index32==0 && ut->chunkNativeStart==0) {
1793
            // Request for start, buffer already begins at start.
1794
            //  No data, but keep the buffer as is.
1795
0
            ut->chunkOffset = 0;
1796
0
            return FALSE;
1797
0
        }
1798
1799
        // Figure out the bounds of the chunk to extract for reverse iteration.
1800
        // Need to worry about chunk not splitting surrogate pairs, and while still
1801
        // containing the data we need.
1802
        // Fix by requesting a chunk that includes an extra UChar at the end.
1803
        // If this turns out to be a lead surrogate, we can lop it off and still have
1804
        //   the data we wanted.
1805
0
        ut->chunkNativeStart = index32 + 1 - REP_TEXT_CHUNK_SIZE;
1806
0
        if (ut->chunkNativeStart < 0) {
1807
0
            ut->chunkNativeStart = 0;
1808
0
        }
1809
1810
0
        ut->chunkNativeLimit = index32 + 1;
1811
0
        if (ut->chunkNativeLimit > length) {
1812
0
            ut->chunkNativeLimit = length;
1813
0
        }
1814
0
    }
1815
1816
    // Extract the new chunk of text from the Replaceable source.
1817
0
    ReplExtra *ex = (ReplExtra *)ut->pExtra;
1818
    // UnicodeString with its buffer a writable alias to the chunk buffer
1819
0
    UnicodeString buffer(ex->s, 0 /*buffer length*/, REP_TEXT_CHUNK_SIZE /*buffer capacity*/);
1820
0
    rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer);
1821
1822
0
    ut->chunkContents  = ex->s;
1823
0
    ut->chunkLength    = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart);
1824
0
    ut->chunkOffset    = (int32_t)(index32 - ut->chunkNativeStart);
1825
1826
    // Surrogate pairs from the input text must not span chunk boundaries.
1827
    // If end of chunk could be the start of a surrogate, trim it off.
1828
0
    if (ut->chunkNativeLimit < length &&
1829
0
        U16_IS_LEAD(ex->s[ut->chunkLength-1])) {
1830
0
            ut->chunkLength--;
1831
0
            ut->chunkNativeLimit--;
1832
0
            if (ut->chunkOffset > ut->chunkLength) {
1833
0
                ut->chunkOffset = ut->chunkLength;
1834
0
            }
1835
0
        }
1836
1837
    // if the first UChar in the chunk could be the trailing half of a surrogate pair,
1838
    // trim it off.
1839
0
    if(ut->chunkNativeStart>0 && U16_IS_TRAIL(ex->s[0])) {
1840
0
        ++(ut->chunkContents);
1841
0
        ++(ut->chunkNativeStart);
1842
0
        --(ut->chunkLength);
1843
0
        --(ut->chunkOffset);
1844
0
    }
1845
1846
    // adjust the index/chunkOffset to a code point boundary
1847
0
    U16_SET_CP_START(ut->chunkContents, 0, ut->chunkOffset);
1848
1849
    // Use fast indexing for get/setNativeIndex()
1850
0
    ut->nativeIndexingLimit = ut->chunkLength;
1851
1852
0
    return TRUE;
1853
0
}
1854
1855
1856
1857
static int32_t U_CALLCONV
1858
repTextExtract(UText *ut,
1859
               int64_t start, int64_t limit,
1860
               UChar *dest, int32_t destCapacity,
1861
0
               UErrorCode *status) {
1862
0
    const Replaceable *rep=(const Replaceable *)ut->context;
1863
0
    int32_t  length=rep->length();
1864
1865
0
    if(U_FAILURE(*status)) {
1866
0
        return 0;
1867
0
    }
1868
0
    if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
1869
0
        *status=U_ILLEGAL_ARGUMENT_ERROR;
1870
0
    }
1871
0
    if(start>limit) {
1872
0
        *status=U_INDEX_OUTOFBOUNDS_ERROR;
1873
0
        return 0;
1874
0
    }
1875
1876
0
    int32_t  start32 = pinIndex(start, length);
1877
0
    int32_t  limit32 = pinIndex(limit, length);
1878
1879
    // adjust start, limit if they point to trail half of surrogates
1880
0
    if (start32<length && U16_IS_TRAIL(rep->charAt(start32)) &&
1881
0
        U_IS_SUPPLEMENTARY(rep->char32At(start32))){
1882
0
            start32--;
1883
0
    }
1884
0
    if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32)) &&
1885
0
        U_IS_SUPPLEMENTARY(rep->char32At(limit32))){
1886
0
            limit32--;
1887
0
    }
1888
1889
0
    length=limit32-start32;
1890
0
    if(length>destCapacity) {
1891
0
        limit32 = start32 + destCapacity;
1892
0
    }
1893
0
    UnicodeString buffer(dest, 0, destCapacity); // writable alias
1894
0
    rep->extractBetween(start32, limit32, buffer);
1895
0
    repTextAccess(ut, limit32, TRUE);
1896
1897
0
    return u_terminateUChars(dest, destCapacity, length, status);
1898
0
}
1899
1900
static int32_t U_CALLCONV
1901
repTextReplace(UText *ut,
1902
               int64_t start, int64_t limit,
1903
               const UChar *src, int32_t length,
1904
0
               UErrorCode *status) {
1905
0
    Replaceable *rep=(Replaceable *)ut->context;
1906
0
    int32_t oldLength;
1907
1908
0
    if(U_FAILURE(*status)) {
1909
0
        return 0;
1910
0
    }
1911
0
    if(src==NULL && length!=0) {
1912
0
        *status=U_ILLEGAL_ARGUMENT_ERROR;
1913
0
        return 0;
1914
0
    }
1915
0
    oldLength=rep->length(); // will subtract from new length
1916
0
    if(start>limit ) {
1917
0
        *status=U_INDEX_OUTOFBOUNDS_ERROR;
1918
0
        return 0;
1919
0
    }
1920
1921
0
    int32_t start32 = pinIndex(start, oldLength);
1922
0
    int32_t limit32 = pinIndex(limit, oldLength);
1923
1924
    // Snap start & limit to code point boundaries.
1925
0
    if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32)) &&
1926
0
        start32>0 && U16_IS_LEAD(rep->charAt(start32-1)))
1927
0
    {
1928
0
            start32--;
1929
0
    }
1930
0
    if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-1)) &&
1931
0
        U16_IS_TRAIL(rep->charAt(limit32)))
1932
0
    {
1933
0
            limit32++;
1934
0
    }
1935
1936
    // Do the actual replace operation using methods of the Replaceable class
1937
0
    UnicodeString replStr((UBool)(length<0), src, length); // read-only alias
1938
0
    rep->handleReplaceBetween(start32, limit32, replStr);
1939
0
    int32_t newLength = rep->length();
1940
0
    int32_t lengthDelta = newLength - oldLength;
1941
1942
    // Is the UText chunk buffer OK?
1943
0
    if (ut->chunkNativeLimit > start32) {
1944
        // this replace operation may have impacted the current chunk.
1945
        // invalidate it, which will force a reload on the next access.
1946
0
        invalidateChunk(ut);
1947
0
    }
1948
1949
    // set the iteration position to the end of the newly inserted replacement text.
1950
0
    int32_t newIndexPos = limit32 + lengthDelta;
1951
0
    repTextAccess(ut, newIndexPos, TRUE);
1952
1953
0
    return lengthDelta;
1954
0
}
1955
1956
1957
static void U_CALLCONV
1958
repTextCopy(UText *ut,
1959
                int64_t start, int64_t limit,
1960
                int64_t destIndex,
1961
                UBool move,
1962
                UErrorCode *status)
1963
0
{
1964
0
    Replaceable *rep=(Replaceable *)ut->context;
1965
0
    int32_t length=rep->length();
1966
1967
0
    if(U_FAILURE(*status)) {
1968
0
        return;
1969
0
    }
1970
0
    if (start>limit || (start<destIndex && destIndex<limit))
1971
0
    {
1972
0
        *status=U_INDEX_OUTOFBOUNDS_ERROR;
1973
0
        return;
1974
0
    }
1975
1976
0
    int32_t start32     = pinIndex(start, length);
1977
0
    int32_t limit32     = pinIndex(limit, length);
1978
0
    int32_t destIndex32 = pinIndex(destIndex, length);
1979
1980
    // TODO:  snap input parameters to code point boundaries.
1981
1982
0
    if(move) {
1983
        // move: copy to destIndex, then replace original with nothing
1984
0
        int32_t segLength=limit32-start32;
1985
0
        rep->copy(start32, limit32, destIndex32);
1986
0
        if(destIndex32<start32) {
1987
0
            start32+=segLength;
1988
0
            limit32+=segLength;
1989
0
        }
1990
0
        rep->handleReplaceBetween(start32, limit32, UnicodeString());
1991
0
    } else {
1992
        // copy
1993
0
        rep->copy(start32, limit32, destIndex32);
1994
0
    }
1995
1996
    // If the change to the text touched the region in the chunk buffer,
1997
    //  invalidate the buffer.
1998
0
    int32_t firstAffectedIndex = destIndex32;
1999
0
    if (move && start32<firstAffectedIndex) {
2000
0
        firstAffectedIndex = start32;
2001
0
    }
2002
0
    if (firstAffectedIndex < ut->chunkNativeLimit) {
2003
        // changes may have affected range covered by the chunk
2004
0
        invalidateChunk(ut);
2005
0
    }
2006
2007
    // Put iteration position at the newly inserted (moved) block,
2008
0
    int32_t  nativeIterIndex = destIndex32 + limit32 - start32;
2009
0
    if (move && destIndex32>start32) {
2010
        // moved a block of text towards the end of the string.
2011
0
        nativeIterIndex = destIndex32;
2012
0
    }
2013
2014
    // Set position, reload chunk if needed.
2015
0
    repTextAccess(ut, nativeIterIndex, TRUE);
2016
0
}
2017
2018
static const struct UTextFuncs repFuncs =
2019
{
2020
    sizeof(UTextFuncs),
2021
    0, 0, 0,           // Reserved alignment padding
2022
    repTextClone,
2023
    repTextLength,
2024
    repTextAccess,
2025
    repTextExtract,
2026
    repTextReplace,
2027
    repTextCopy,
2028
    NULL,              // MapOffsetToNative,
2029
    NULL,              // MapIndexToUTF16,
2030
    repTextClose,
2031
    NULL,              // spare 1
2032
    NULL,              // spare 2
2033
    NULL               // spare 3
2034
};
2035
2036
2037
U_CAPI UText * U_EXPORT2
2038
utext_openReplaceable(UText *ut, Replaceable *rep, UErrorCode *status)
2039
0
{
2040
0
    if(U_FAILURE(*status)) {
2041
0
        return NULL;
2042
0
    }
2043
0
    if(rep==NULL) {
2044
0
        *status=U_ILLEGAL_ARGUMENT_ERROR;
2045
0
        return NULL;
2046
0
    }
2047
0
    ut = utext_setup(ut, sizeof(ReplExtra), status);
2048
0
    if(U_FAILURE(*status)) {
2049
0
        return ut;
2050
0
    }
2051
2052
0
    ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);
2053
0
    if(rep->hasMetaData()) {
2054
0
        ut->providerProperties |=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);
2055
0
    }
2056
2057
0
    ut->pFuncs  = &repFuncs;
2058
0
    ut->context =  rep;
2059
0
    return ut;
2060
0
}
2061
2062
U_CDECL_END
2063
2064
2065
2066
2067
2068
2069
2070
2071
//------------------------------------------------------------------------------
2072
//
2073
//     UText implementation for UnicodeString (read/write)  and
2074
//                    for const UnicodeString (read only)
2075
//             (same implementation, only the flags are different)
2076
//
2077
//         Use of UText data members:
2078
//            context    pointer to UnicodeString
2079
//            p          pointer to UnicodeString IF this UText owns the string
2080
//                       and it must be deleted on close().  NULL otherwise.
2081
//
2082
//------------------------------------------------------------------------------
2083
2084
U_CDECL_BEGIN
2085
2086
2087
static UText * U_CALLCONV
2088
0
unistrTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status) {
2089
    // First do a generic shallow clone.  Does everything needed for the UText struct itself.
2090
0
    dest = shallowTextClone(dest, src, status);
2091
2092
    // For deep clones, make a copy of the UnicodeSring.
2093
    //  The copied UnicodeString storage is owned by the newly created UText clone.
2094
    //  A non-NULL pointer in UText.p is the signal to the close() function to delete
2095
    //    the UText.
2096
    //
2097
0
    if (deep && U_SUCCESS(*status)) {
2098
0
        const UnicodeString *srcString = (const UnicodeString *)src->context;
2099
0
        dest->context = new UnicodeString(*srcString);
2100
0
        dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
2101
2102
        // with deep clone, the copy is writable, even when the source is not.
2103
0
        dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
2104
0
    }
2105
0
    return dest;
2106
0
}
2107
2108
static void U_CALLCONV
2109
0
unistrTextClose(UText *ut) {
2110
    // Most of the work of close is done by the generic UText framework close.
2111
    // All that needs to be done here is delete the UnicodeString if the UText
2112
    //  owns it.  This occurs if the UText was created by cloning.
2113
0
    if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
2114
0
        UnicodeString *str = (UnicodeString *)ut->context;
2115
0
        delete str;
2116
0
        ut->context = NULL;
2117
0
    }
2118
0
}
2119
2120
2121
static int64_t U_CALLCONV
2122
0
unistrTextLength(UText *t) {
2123
0
    return ((const UnicodeString *)t->context)->length();
2124
0
}
2125
2126
2127
static UBool U_CALLCONV
2128
0
unistrTextAccess(UText *ut, int64_t index, UBool  forward) {
2129
0
    int32_t length  = ut->chunkLength;
2130
0
    ut->chunkOffset = pinIndex(index, length);
2131
2132
    // Check whether request is at the start or end
2133
0
    UBool retVal = (forward && index<length) || (!forward && index>0);
2134
0
    return retVal;
2135
0
}
2136
2137
2138
2139
static int32_t U_CALLCONV
2140
unistrTextExtract(UText *t,
2141
                  int64_t start, int64_t limit,
2142
                  UChar *dest, int32_t destCapacity,
2143
0
                  UErrorCode *pErrorCode) {
2144
0
    const UnicodeString *us=(const UnicodeString *)t->context;
2145
0
    int32_t length=us->length();
2146
2147
0
    if(U_FAILURE(*pErrorCode)) {
2148
0
        return 0;
2149
0
    }
2150
0
    if(destCapacity<0 || (dest==NULL && destCapacity>0)) {
2151
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2152
0
    }
2153
0
    if(start<0 || start>limit) {
2154
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2155
0
        return 0;
2156
0
    }
2157
2158
0
    int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length;
2159
0
    int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length;
2160
2161
0
    length=limit32-start32;
2162
0
    if (destCapacity>0 && dest!=NULL) {
2163
0
        int32_t trimmedLength = length;
2164
0
        if(trimmedLength>destCapacity) {
2165
0
            trimmedLength=destCapacity;
2166
0
        }
2167
0
        us->extract(start32, trimmedLength, dest);
2168
0
        t->chunkOffset = start32+trimmedLength;
2169
0
    } else {
2170
0
        t->chunkOffset = start32;
2171
0
    }
2172
0
    u_terminateUChars(dest, destCapacity, length, pErrorCode);
2173
0
    return length;
2174
0
}
2175
2176
static int32_t U_CALLCONV
2177
unistrTextReplace(UText *ut,
2178
                  int64_t start, int64_t limit,
2179
                  const UChar *src, int32_t length,
2180
0
                  UErrorCode *pErrorCode) {
2181
0
    UnicodeString *us=(UnicodeString *)ut->context;
2182
0
    int32_t oldLength;
2183
2184
0
    if(U_FAILURE(*pErrorCode)) {
2185
0
        return 0;
2186
0
    }
2187
0
    if(src==NULL && length!=0) {
2188
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2189
0
    }
2190
0
    if(start>limit) {
2191
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2192
0
        return 0;
2193
0
    }
2194
0
    oldLength=us->length();
2195
0
    int32_t start32 = pinIndex(start, oldLength);
2196
0
    int32_t limit32 = pinIndex(limit, oldLength);
2197
0
    if (start32 < oldLength) {
2198
0
        start32 = us->getChar32Start(start32);
2199
0
    }
2200
0
    if (limit32 < oldLength) {
2201
0
        limit32 = us->getChar32Start(limit32);
2202
0
    }
2203
2204
    // replace
2205
0
    us->replace(start32, limit32-start32, src, length);
2206
0
    int32_t newLength = us->length();
2207
2208
    // Update the chunk description.
2209
0
    ut->chunkContents    = us->getBuffer();
2210
0
    ut->chunkLength      = newLength;
2211
0
    ut->chunkNativeLimit = newLength;
2212
0
    ut->nativeIndexingLimit = newLength;
2213
2214
    // Set iteration position to the point just following the newly inserted text.
2215
0
    int32_t lengthDelta = newLength - oldLength;
2216
0
    ut->chunkOffset = limit32 + lengthDelta;
2217
2218
0
    return lengthDelta;
2219
0
}
2220
2221
static void U_CALLCONV
2222
unistrTextCopy(UText *ut,
2223
               int64_t start, int64_t limit,
2224
               int64_t destIndex,
2225
               UBool move,
2226
0
               UErrorCode *pErrorCode) {
2227
0
    UnicodeString *us=(UnicodeString *)ut->context;
2228
0
    int32_t length=us->length();
2229
2230
0
    if(U_FAILURE(*pErrorCode)) {
2231
0
        return;
2232
0
    }
2233
0
    int32_t start32 = pinIndex(start, length);
2234
0
    int32_t limit32 = pinIndex(limit, length);
2235
0
    int32_t destIndex32 = pinIndex(destIndex, length);
2236
2237
0
    if( start32>limit32 || (start32<destIndex32 && destIndex32<limit32)) {
2238
0
        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2239
0
        return;
2240
0
    }
2241
2242
0
    if(move) {
2243
        // move: copy to destIndex, then remove original
2244
0
        int32_t segLength=limit32-start32;
2245
0
        us->copy(start32, limit32, destIndex32);
2246
0
        if(destIndex32<start32) {
2247
0
            start32+=segLength;
2248
0
        }
2249
0
        us->remove(start32, segLength);
2250
0
    } else {
2251
        // copy
2252
0
        us->copy(start32, limit32, destIndex32);
2253
0
    }
2254
2255
    // update chunk description, set iteration position.
2256
0
    ut->chunkContents = us->getBuffer();
2257
0
    if (move==FALSE) {
2258
        // copy operation, string length grows
2259
0
        ut->chunkLength += limit32-start32;
2260
0
        ut->chunkNativeLimit = ut->chunkLength;
2261
0
        ut->nativeIndexingLimit = ut->chunkLength;
2262
0
    }
2263
2264
    // Iteration position to end of the newly inserted text.
2265
0
    ut->chunkOffset = destIndex32+limit32-start32;
2266
0
    if (move && destIndex32>start32) {
2267
0
        ut->chunkOffset = destIndex32;
2268
0
    }
2269
2270
0
}
2271
2272
static const struct UTextFuncs unistrFuncs =
2273
{
2274
    sizeof(UTextFuncs),
2275
    0, 0, 0,             // Reserved alignment padding
2276
    unistrTextClone,
2277
    unistrTextLength,
2278
    unistrTextAccess,
2279
    unistrTextExtract,
2280
    unistrTextReplace,
2281
    unistrTextCopy,
2282
    NULL,                // MapOffsetToNative,
2283
    NULL,                // MapIndexToUTF16,
2284
    unistrTextClose,
2285
    NULL,                // spare 1
2286
    NULL,                // spare 2
2287
    NULL                 // spare 3
2288
};
2289
2290
2291
2292
U_CDECL_END
2293
2294
2295
U_CAPI UText * U_EXPORT2
2296
0
utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
2297
0
    ut = utext_openConstUnicodeString(ut, s, status);
2298
0
    if (U_SUCCESS(*status)) {
2299
0
        ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
2300
0
    }
2301
0
    return ut;
2302
0
}
2303
2304
2305
2306
U_CAPI UText * U_EXPORT2
2307
0
utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) {
2308
0
    if (U_SUCCESS(*status) && s->isBogus()) {
2309
        // The UnicodeString is bogus, but we still need to detach the UText
2310
        //   from whatever it was hooked to before, if anything.
2311
0
        utext_openUChars(ut, NULL, 0, status);
2312
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
2313
0
        return ut;
2314
0
    }
2315
0
    ut = utext_setup(ut, 0, status);
2316
    //    note:  use the standard (writable) function table for UnicodeString.
2317
    //           The flag settings disable writing, so having the functions in
2318
    //           the table is harmless.
2319
0
    if (U_SUCCESS(*status)) {
2320
0
        ut->pFuncs              = &unistrFuncs;
2321
0
        ut->context             = s;
2322
0
        ut->providerProperties  = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
2323
0
        ut->chunkContents       = s->getBuffer();
2324
0
        ut->chunkLength         = s->length();
2325
0
        ut->chunkNativeStart    = 0;
2326
0
        ut->chunkNativeLimit    = ut->chunkLength;
2327
0
        ut->nativeIndexingLimit = ut->chunkLength;
2328
0
    }
2329
0
    return ut;
2330
0
}
2331
2332
//------------------------------------------------------------------------------
2333
//
2334
//     UText implementation for const UChar * strings
2335
//
2336
//         Use of UText data members:
2337
//            context    pointer to UnicodeString
2338
//            a          length.  -1 if not yet known.
2339
//
2340
//         TODO:  support 64 bit lengths.
2341
//
2342
//------------------------------------------------------------------------------
2343
2344
U_CDECL_BEGIN
2345
2346
2347
static UText * U_CALLCONV
2348
0
ucstrTextClone(UText *dest, const UText * src, UBool deep, UErrorCode * status) {
2349
    // First do a generic shallow clone.
2350
0
    dest = shallowTextClone(dest, src, status);
2351
2352
    // For deep clones, make a copy of the string.
2353
    //  The copied storage is owned by the newly created clone.
2354
    //  A non-NULL pointer in UText.p is the signal to the close() function to delete
2355
    //    it.
2356
    //
2357
0
    if (deep && U_SUCCESS(*status)) {
2358
0
        U_ASSERT(utext_nativeLength(dest) < INT32_MAX);
2359
0
        int32_t  len = (int32_t)utext_nativeLength(dest);
2360
2361
        // The cloned string IS going to be NUL terminated, whether or not the original was.
2362
0
        const UChar *srcStr = (const UChar *)src->context;
2363
0
        UChar *copyStr = (UChar *)uprv_malloc((len+1) * sizeof(UChar));
2364
0
        if (copyStr == NULL) {
2365
0
            *status = U_MEMORY_ALLOCATION_ERROR;
2366
0
        } else {
2367
0
            int64_t i;
2368
0
            for (i=0; i<len; i++) {
2369
0
                copyStr[i] = srcStr[i];
2370
0
            }
2371
0
            copyStr[len] = 0;
2372
0
            dest->context = copyStr;
2373
0
            dest->providerProperties |= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
2374
0
        }
2375
0
    }
2376
0
    return dest;
2377
0
}
2378
2379
2380
static void U_CALLCONV
2381
0
ucstrTextClose(UText *ut) {
2382
    // Most of the work of close is done by the generic UText framework close.
2383
    // All that needs to be done here is delete the string if the UText
2384
    //  owns it.  This occurs if the UText was created by cloning.
2385
0
    if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
2386
0
        UChar *s = (UChar *)ut->context;
2387
0
        uprv_free(s);
2388
0
        ut->context = NULL;
2389
0
    }
2390
0
}
2391
2392
2393
2394
static int64_t U_CALLCONV
2395
0
ucstrTextLength(UText *ut) {
2396
0
    if (ut->a < 0) {
2397
        // null terminated, we don't yet know the length. Scan for it.
2398
        //    Access is not convenient for doing this
2399
        //    because the current iteration position can't be changed.
2400
0
        const UChar  *str = (const UChar *)ut->context;
2401
0
        for (;;) {
2402
0
            if (str[ut->chunkNativeLimit] == 0) {
2403
0
                break;
2404
0
            }
2405
0
            ut->chunkNativeLimit++;
2406
0
        }
2407
0
        ut->a = ut->chunkNativeLimit;
2408
0
        ut->chunkLength = (int32_t)ut->chunkNativeLimit;
2409
0
        ut->nativeIndexingLimit = ut->chunkLength;
2410
0
        ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
2411
0
    }
2412
0
    return ut->a;
2413
0
}
2414
2415
2416
static UBool U_CALLCONV
2417
0
ucstrTextAccess(UText *ut, int64_t index, UBool  forward) {
2418
0
    const UChar *str   = (const UChar *)ut->context;
2419
2420
    // pin the requested index to the bounds of the string,
2421
    //  and set current iteration position.
2422
0
    if (index<0) {
2423
0
        index = 0;
2424
0
    } else if (index < ut->chunkNativeLimit) {
2425
        // The request data is within the chunk as it is known so far.
2426
        // Put index on a code point boundary.
2427
0
        U16_SET_CP_START(str, 0, index);
2428
0
    } else if (ut->a >= 0) {
2429
        // We know the length of this string, and the user is requesting something
2430
        // at or beyond the length.  Pin the requested index to the length.
2431
0
        index = ut->a;
2432
0
    } else {
2433
        // Null terminated string, length not yet known, and the requested index
2434
        //  is beyond where we have scanned so far.
2435
        //  Scan to 32 UChars beyond the requested index.  The strategy here is
2436
        //  to avoid fully scanning a long string when the caller only wants to
2437
        //  see a few characters at its beginning.
2438
0
        int32_t scanLimit = (int32_t)index + 32;
2439
0
        if ((index + 32)>INT32_MAX || (index + 32)<0 ) {   // note: int64 expression
2440
0
            scanLimit = INT32_MAX;
2441
0
        }
2442
2443
0
        int32_t chunkLimit = (int32_t)ut->chunkNativeLimit;
2444
0
        for (; chunkLimit<scanLimit; chunkLimit++) {
2445
0
            if (str[chunkLimit] == 0) {
2446
                // We found the end of the string.  Remember it, pin the requested index to it,
2447
                //  and bail out of here.
2448
0
                ut->a = chunkLimit;
2449
0
                ut->chunkLength = chunkLimit;
2450
0
                ut->nativeIndexingLimit = chunkLimit;
2451
0
                if (index >= chunkLimit) {
2452
0
                    index = chunkLimit;
2453
0
                } else {
2454
0
                    U16_SET_CP_START(str, 0, index);
2455
0
                }
2456
2457
0
                ut->chunkNativeLimit = chunkLimit;
2458
0
                ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
2459
0
                goto breakout;
2460
0
            }
2461
0
        }
2462
        // We scanned through the next batch of UChars without finding the end.
2463
0
        U16_SET_CP_START(str, 0, index);
2464
0
        if (chunkLimit == INT32_MAX) {
2465
            // Scanned to the limit of a 32 bit length.
2466
            // Forceably trim the overlength string back so length fits in int32
2467
            //  TODO:  add support for 64 bit strings.
2468
0
            ut->a = chunkLimit;
2469
0
            ut->chunkLength = chunkLimit;
2470
0
            ut->nativeIndexingLimit = chunkLimit;
2471
0
            if (index > chunkLimit) {
2472
0
                index = chunkLimit;
2473
0
            }
2474
0
            ut->chunkNativeLimit = chunkLimit;
2475
0
            ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
2476
0
        } else {
2477
            // The endpoint of a chunk must not be left in the middle of a surrogate pair.
2478
            // If the current end is on a lead surrogate, back the end up by one.
2479
            // It doesn't matter if the end char happens to be an unpaired surrogate,
2480
            //    and it's simpler not to worry about it.
2481
0
            if (U16_IS_LEAD(str[chunkLimit-1])) {
2482
0
                --chunkLimit;
2483
0
            }
2484
            // Null-terminated chunk with end still unknown.
2485
            // Update the chunk length to reflect what has been scanned thus far.
2486
            // That the full length is still unknown is (still) flagged by
2487
            //    ut->a being < 0.
2488
0
            ut->chunkNativeLimit = chunkLimit;
2489
0
            ut->nativeIndexingLimit = chunkLimit;
2490
0
            ut->chunkLength = chunkLimit;
2491
0
        }
2492
2493
0
    }
2494
0
breakout:
2495
0
    U_ASSERT(index<=INT32_MAX);
2496
0
    ut->chunkOffset = (int32_t)index;
2497
2498
    // Check whether request is at the start or end
2499
0
    UBool retVal = (forward && index<ut->chunkNativeLimit) || (!forward && index>0);
2500
0
    return retVal;
2501
0
}
2502
2503
2504
2505
static int32_t U_CALLCONV
2506
ucstrTextExtract(UText *ut,
2507
                  int64_t start, int64_t limit,
2508
                  UChar *dest, int32_t destCapacity,
2509
                  UErrorCode *pErrorCode)
2510
0
{
2511
0
    if(U_FAILURE(*pErrorCode)) {
2512
0
        return 0;
2513
0
    }
2514
0
    if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {
2515
0
        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2516
0
        return 0;
2517
0
    }
2518
2519
    //const UChar *s=(const UChar *)ut->context;
2520
0
    int32_t si, di;
2521
2522
0
    int32_t start32;
2523
0
    int32_t limit32;
2524
2525
    // Access the start.  Does two things we need:
2526
    //   Pins 'start' to the length of the string, if it came in out-of-bounds.
2527
    //   Snaps 'start' to the beginning of a code point.
2528
0
    ucstrTextAccess(ut, start, TRUE);
2529
0
    const UChar *s=ut->chunkContents;
2530
0
    start32 = ut->chunkOffset;
2531
2532
0
    int32_t strLength=(int32_t)ut->a;
2533
0
    if (strLength >= 0) {
2534
0
        limit32 = pinIndex(limit, strLength);
2535
0
    } else {
2536
0
        limit32 = pinIndex(limit, INT32_MAX);
2537
0
    }
2538
0
    di = 0;
2539
0
    for (si=start32; si<limit32; si++) {
2540
0
        if (strLength<0 && s[si]==0) {
2541
            // Just hit the end of a null-terminated string.
2542
0
            ut->a = si;               // set string length for this UText
2543
0
            ut->chunkNativeLimit    = si;
2544
0
            ut->chunkLength         = si;
2545
0
            ut->nativeIndexingLimit = si;
2546
0
            strLength               = si;
2547
0
            limit32                 = si;
2548
0
            break;
2549
0
        }
2550
0
        U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */
2551
0
        if (di<destCapacity) {
2552
            // only store if there is space.
2553
0
            dest[di] = s[si];
2554
0
        } else {
2555
0
            if (strLength>=0) {
2556
                // We have filled the destination buffer, and the string length is known.
2557
                //  Cut the loop short.  There is no need to scan string termination.
2558
0
                di = limit32 - start32;
2559
0
                si = limit32;
2560
0
                break;
2561
0
            }
2562
0
        }
2563
0
        di++;
2564
0
    }
2565
2566
    // If the limit index points to a lead surrogate of a pair,
2567
    //   add the corresponding trail surrogate to the destination.
2568
0
    if (si>0 && U16_IS_LEAD(s[si-1]) &&
2569
0
            ((si<strLength || strLength<0)  && U16_IS_TRAIL(s[si])))
2570
0
    {
2571
0
        if (di<destCapacity) {
2572
            // store only if there is space in the output buffer.
2573
0
            dest[di++] = s[si];
2574
0
        }
2575
0
        si++;
2576
0
    }
2577
2578
    // Put iteration position at the point just following the extracted text
2579
0
    if (si <= ut->chunkNativeLimit) {
2580
0
        ut->chunkOffset = si;
2581
0
    } else {
2582
0
        ucstrTextAccess(ut, si, TRUE);
2583
0
    }
2584
2585
    // Add a terminating NUL if space in the buffer permits,
2586
    // and set the error status as required.
2587
0
    u_terminateUChars(dest, destCapacity, di, pErrorCode);
2588
0
    return di;
2589
0
}
2590
2591
static const struct UTextFuncs ucstrFuncs =
2592
{
2593
    sizeof(UTextFuncs),
2594
    0, 0, 0,           // Reserved alignment padding
2595
    ucstrTextClone,
2596
    ucstrTextLength,
2597
    ucstrTextAccess,
2598
    ucstrTextExtract,
2599
    NULL,              // Replace
2600
    NULL,              // Copy
2601
    NULL,              // MapOffsetToNative,
2602
    NULL,              // MapIndexToUTF16,
2603
    ucstrTextClose,
2604
    NULL,              // spare 1
2605
    NULL,              // spare 2
2606
    NULL,              // spare 3
2607
};
2608
2609
U_CDECL_END
2610
2611
static const UChar gEmptyUString[] = {0};
2612
2613
U_CAPI UText * U_EXPORT2
2614
0
utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status) {
2615
0
    if (U_FAILURE(*status)) {
2616
0
        return NULL;
2617
0
    }
2618
0
    if(s==NULL && length==0) {
2619
0
        s = gEmptyUString;
2620
0
    }
2621
0
    if (s==NULL || length < -1 || length>INT32_MAX) {
2622
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
2623
0
        return NULL;
2624
0
    }
2625
0
    ut = utext_setup(ut, 0, status);
2626
0
    if (U_SUCCESS(*status)) {
2627
0
        ut->pFuncs               = &ucstrFuncs;
2628
0
        ut->context              = s;
2629
0
        ut->providerProperties   = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
2630
0
        if (length==-1) {
2631
0
            ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
2632
0
        }
2633
0
        ut->a                    = length;
2634
0
        ut->chunkContents        = s;
2635
0
        ut->chunkNativeStart     = 0;
2636
0
        ut->chunkNativeLimit     = length>=0? length : 0;
2637
0
        ut->chunkLength          = (int32_t)ut->chunkNativeLimit;
2638
0
        ut->chunkOffset          = 0;
2639
0
        ut->nativeIndexingLimit  = ut->chunkLength;
2640
0
    }
2641
0
    return ut;
2642
0
}
2643
2644
2645
//------------------------------------------------------------------------------
2646
//
2647
//     UText implementation for text from ICU CharacterIterators
2648
//
2649
//         Use of UText data members:
2650
//            context    pointer to the CharacterIterator
2651
//            a          length of the full text.
2652
//            p          pointer to  buffer 1
2653
//            b          start index of local buffer 1 contents
2654
//            q          pointer to buffer 2
2655
//            c          start index of local buffer 2 contents
2656
//            r          pointer to the character iterator if the UText owns it.
2657
//                       Null otherwise.
2658
//
2659
//------------------------------------------------------------------------------
2660
0
#define CIBufSize 16
2661
2662
U_CDECL_BEGIN
2663
static void U_CALLCONV
2664
0
charIterTextClose(UText *ut) {
2665
    // Most of the work of close is done by the generic UText framework close.
2666
    // All that needs to be done here is delete the CharacterIterator if the UText
2667
    //  owns it.  This occurs if the UText was created by cloning.
2668
0
    CharacterIterator *ci = (CharacterIterator *)ut->r;
2669
0
    delete ci;
2670
0
    ut->r = NULL;
2671
0
}
2672
2673
static int64_t U_CALLCONV
2674
0
charIterTextLength(UText *ut) {
2675
0
    return (int32_t)ut->a;
2676
0
}
2677
2678
static UBool U_CALLCONV
2679
0
charIterTextAccess(UText *ut, int64_t index, UBool  forward) {
2680
0
    CharacterIterator *ci   = (CharacterIterator *)ut->context;
2681
2682
0
    int32_t clippedIndex = (int32_t)index;
2683
0
    if (clippedIndex<0) {
2684
0
        clippedIndex=0;
2685
0
    } else if (clippedIndex>=ut->a) {
2686
0
        clippedIndex=(int32_t)ut->a;
2687
0
    }
2688
0
    int32_t neededIndex = clippedIndex;
2689
0
    if (!forward && neededIndex>0) {
2690
        // reverse iteration, want the position just before what was asked for.
2691
0
        neededIndex--;
2692
0
    } else if (forward && neededIndex==ut->a && neededIndex>0) {
2693
        // Forward iteration, don't ask for something past the end of the text.
2694
0
        neededIndex--;
2695
0
    }
2696
2697
    // Find the native index of the start of the buffer containing what we want.
2698
0
    neededIndex -= neededIndex % CIBufSize;
2699
2700
0
    UChar *buf = NULL;
2701
0
    UBool  needChunkSetup = TRUE;
2702
0
    int    i;
2703
0
    if (ut->chunkNativeStart == neededIndex) {
2704
        // The buffer we want is already the current chunk.
2705
0
        needChunkSetup = FALSE;
2706
0
    } else if (ut->b == neededIndex) {
2707
        // The first buffer (buffer p) has what we need.
2708
0
        buf = (UChar *)ut->p;
2709
0
    } else if (ut->c == neededIndex) {
2710
        // The second buffer (buffer q) has what we need.
2711
0
        buf = (UChar *)ut->q;
2712
0
    } else {
2713
        // Neither buffer already has what we need.
2714
        // Load new data from the character iterator.
2715
        // Use the buf that is not the current buffer.
2716
0
        buf = (UChar *)ut->p;
2717
0
        if (ut->p == ut->chunkContents) {
2718
0
            buf = (UChar *)ut->q;
2719
0
        }
2720
0
        ci->setIndex(neededIndex);
2721
0
        for (i=0; i<CIBufSize; i++) {
2722
0
            buf[i] = ci->nextPostInc();
2723
0
            if (i+neededIndex > ut->a) {
2724
0
                break;
2725
0
            }
2726
0
        }
2727
0
    }
2728
2729
    // We have a buffer with the data we need.
2730
    // Set it up as the current chunk, if it wasn't already.
2731
0
    if (needChunkSetup) {
2732
0
        ut->chunkContents = buf;
2733
0
        ut->chunkLength   = CIBufSize;
2734
0
        ut->chunkNativeStart = neededIndex;
2735
0
        ut->chunkNativeLimit = neededIndex + CIBufSize;
2736
0
        if (ut->chunkNativeLimit > ut->a) {
2737
0
            ut->chunkNativeLimit = ut->a;
2738
0
            ut->chunkLength  = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart);
2739
0
        }
2740
0
        ut->nativeIndexingLimit = ut->chunkLength;
2741
0
        U_ASSERT(ut->chunkOffset>=0 && ut->chunkOffset<=CIBufSize);
2742
0
    }
2743
0
    ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;
2744
0
    UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>0);
2745
0
    return success;
2746
0
}
2747
2748
static UText * U_CALLCONV
2749
0
charIterTextClone(UText *dest, const UText *src, UBool deep, UErrorCode * status) {
2750
0
    if (U_FAILURE(*status)) {
2751
0
        return NULL;
2752
0
    }
2753
2754
0
    if (deep) {
2755
        // There is no CharacterIterator API for cloning the underlying text storage.
2756
0
        *status = U_UNSUPPORTED_ERROR;
2757
0
        return NULL;
2758
0
    } else {
2759
0
        CharacterIterator *srcCI =(CharacterIterator *)src->context;
2760
0
        srcCI = srcCI->clone();
2761
0
        dest = utext_openCharacterIterator(dest, srcCI, status);
2762
0
        if (U_FAILURE(*status)) {
2763
0
            return dest;
2764
0
        }
2765
        // cast off const on getNativeIndex.
2766
        //   For CharacterIterator based UTexts, this is safe, the operation is const.
2767
0
        int64_t  ix = utext_getNativeIndex((UText *)src);
2768
0
        utext_setNativeIndex(dest, ix);
2769
0
        dest->r = srcCI;    // flags that this UText owns the CharacterIterator
2770
0
    }
2771
0
    return dest;
2772
0
}
2773
2774
static int32_t U_CALLCONV
2775
charIterTextExtract(UText *ut,
2776
                  int64_t start, int64_t limit,
2777
                  UChar *dest, int32_t destCapacity,
2778
                  UErrorCode *status)
2779
0
{
2780
0
    if(U_FAILURE(*status)) {
2781
0
        return 0;
2782
0
    }
2783
0
    if(destCapacity<0 || (dest==NULL && destCapacity>0) || start>limit) {
2784
0
        *status=U_ILLEGAL_ARGUMENT_ERROR;
2785
0
        return 0;
2786
0
    }
2787
0
    int32_t  length  = (int32_t)ut->a;
2788
0
    int32_t  start32 = pinIndex(start, length);
2789
0
    int32_t  limit32 = pinIndex(limit, length);
2790
0
    int32_t  desti   = 0;
2791
0
    int32_t  srci;
2792
0
    int32_t  copyLimit;
2793
2794
0
    CharacterIterator *ci = (CharacterIterator *)ut->context;
2795
0
    ci->setIndex32(start32);   // Moves ix to lead of surrogate pair, if needed.
2796
0
    srci = ci->getIndex();
2797
0
    copyLimit = srci;
2798
0
    while (srci<limit32) {
2799
0
        UChar32 c = ci->next32PostInc();
2800
0
        int32_t  len = U16_LENGTH(c);
2801
0
        U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */
2802
0
        if (desti+len <= destCapacity) {
2803
0
            U16_APPEND_UNSAFE(dest, desti, c);
2804
0
            copyLimit = srci+len;
2805
0
        } else {
2806
0
            desti += len;
2807
0
            *status = U_BUFFER_OVERFLOW_ERROR;
2808
0
        }
2809
0
        srci += len;
2810
0
    }
2811
2812
0
    charIterTextAccess(ut, copyLimit, TRUE);
2813
2814
0
    u_terminateUChars(dest, destCapacity, desti, status);
2815
0
    return desti;
2816
0
}
2817
2818
static const struct UTextFuncs charIterFuncs =
2819
{
2820
    sizeof(UTextFuncs),
2821
    0, 0, 0,             // Reserved alignment padding
2822
    charIterTextClone,
2823
    charIterTextLength,
2824
    charIterTextAccess,
2825
    charIterTextExtract,
2826
    NULL,                // Replace
2827
    NULL,                // Copy
2828
    NULL,                // MapOffsetToNative,
2829
    NULL,                // MapIndexToUTF16,
2830
    charIterTextClose,
2831
    NULL,                // spare 1
2832
    NULL,                // spare 2
2833
    NULL                 // spare 3
2834
};
2835
U_CDECL_END
2836
2837
2838
U_CAPI UText * U_EXPORT2
2839
0
utext_openCharacterIterator(UText *ut, CharacterIterator *ci, UErrorCode *status) {
2840
0
    if (U_FAILURE(*status)) {
2841
0
        return NULL;
2842
0
    }
2843
2844
0
    if (ci->startIndex() > 0) {
2845
        // No support for CharacterIterators that do not start indexing from zero.
2846
0
        *status = U_UNSUPPORTED_ERROR;
2847
0
        return NULL;
2848
0
    }
2849
2850
    // Extra space in UText for 2 buffers of CIBufSize UChars each.
2851
0
    int32_t  extraSpace = 2 * CIBufSize * sizeof(UChar);
2852
0
    ut = utext_setup(ut, extraSpace, status);
2853
0
    if (U_SUCCESS(*status)) {
2854
0
        ut->pFuncs                = &charIterFuncs;
2855
0
        ut->context              = ci;
2856
0
        ut->providerProperties   = 0;
2857
0
        ut->a                    = ci->endIndex();        // Length of text
2858
0
        ut->p                    = ut->pExtra;            // First buffer
2859
0
        ut->b                    = -1;                    // Native index of first buffer contents
2860
0
        ut->q                    = (UChar*)ut->pExtra+CIBufSize;  // Second buffer
2861
0
        ut->c                    = -1;                    // Native index of second buffer contents
2862
2863
        // Initialize current chunk contents to be empty.
2864
        //   First access will fault something in.
2865
        //   Note:  The initial nativeStart and chunkOffset must sum to zero
2866
        //          so that getNativeIndex() will correctly compute to zero
2867
        //          if no call to Access() has ever been made.  They can't be both
2868
        //          zero without Access() thinking that the chunk is valid.
2869
0
        ut->chunkContents        = (UChar *)ut->p;
2870
0
        ut->chunkNativeStart     = -1;
2871
0
        ut->chunkOffset          = 1;
2872
0
        ut->chunkNativeLimit     = 0;
2873
0
        ut->chunkLength          = 0;
2874
0
        ut->nativeIndexingLimit  = ut->chunkOffset;  // enables native indexing
2875
0
    }
2876
0
    return ut;
2877
0
}