Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/rbbi_cache.cpp
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// file: rbbi_cache.cpp
5
6
#include "unicode/utypes.h"
7
8
#if !UCONFIG_NO_BREAK_ITERATION
9
10
#include "unicode/ubrk.h"
11
#include "unicode/rbbi.h"
12
13
#include "rbbi_cache.h"
14
15
#include "brkeng.h"
16
#include "cmemory.h"
17
#include "rbbidata.h"
18
#include "rbbirb.h"
19
#include "uassert.h"
20
#include "uvectr32.h"
21
22
U_NAMESPACE_BEGIN
23
24
/*
25
 * DictionaryCache implementation
26
 */
27
28
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
29
        fBI(bi), fBreaks(status), fPositionInCache(-1),
30
47.1k
        fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
31
47.1k
}
32
33
47.1k
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
34
47.1k
}
35
36
487k
void RuleBasedBreakIterator::DictionaryCache::reset() {
37
487k
    fPositionInCache = -1;
38
487k
    fStart = 0;
39
487k
    fLimit = 0;
40
487k
    fFirstRuleStatusIndex = 0;
41
487k
    fOtherRuleStatusIndex = 0;
42
487k
    fBreaks.removeAllElements();
43
487k
}
44
45
1.06M
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
46
1.06M
    if (fromPos >= fLimit || fromPos < fStart) {
47
1.03M
        fPositionInCache = -1;
48
1.03M
        return FALSE;
49
1.03M
    }
50
51
    // Sequential iteration, move from previous boundary to the following
52
53
29.9k
    int32_t r = 0;
54
29.9k
    if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
55
29.9k
        ++fPositionInCache;
56
29.9k
        if (fPositionInCache >= fBreaks.size()) {
57
0
            fPositionInCache = -1;
58
0
            return FALSE;
59
0
        }
60
29.9k
        r = fBreaks.elementAti(fPositionInCache);
61
29.9k
        U_ASSERT(r > fromPos);
62
29.9k
        *result = r;
63
29.9k
        *statusIndex = fOtherRuleStatusIndex;
64
29.9k
        return TRUE;
65
29.9k
    }
66
67
    // Random indexing. Linear search for the boundary following the given position.
68
69
0
    for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
70
0
        r= fBreaks.elementAti(fPositionInCache);
71
0
        if (r > fromPos) {
72
0
            *result = r;
73
0
            *statusIndex = fOtherRuleStatusIndex;
74
0
            return TRUE;
75
0
        }
76
0
    }
77
0
    UPRV_UNREACHABLE;
78
0
}
79
80
81
0
UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
82
0
    if (fromPos <= fStart || fromPos > fLimit) {
83
0
        fPositionInCache = -1;
84
0
        return FALSE;
85
0
    }
86
87
0
    if (fromPos == fLimit) {
88
0
        fPositionInCache = fBreaks.size() - 1;
89
0
        if (fPositionInCache >= 0) {
90
0
            U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
91
0
        }
92
0
    }
93
94
0
    int32_t r;
95
0
    if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
96
0
        --fPositionInCache;
97
0
        r = fBreaks.elementAti(fPositionInCache);
98
0
        U_ASSERT(r < fromPos);
99
0
        *result = r;
100
0
        *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
101
0
        return TRUE;
102
0
    }
103
104
0
    if (fPositionInCache == 0) {
105
0
        fPositionInCache = -1;
106
0
        return FALSE;
107
0
    }
108
109
0
    for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
110
0
        r = fBreaks.elementAti(fPositionInCache);
111
0
        if (r < fromPos) {
112
0
            *result = r;
113
0
            *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
114
0
            return TRUE;
115
0
        }
116
0
    }
117
0
    UPRV_UNREACHABLE;
118
0
}
119
120
void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
121
54.1k
                                       int32_t firstRuleStatus, int32_t otherRuleStatus) {
122
54.1k
    if ((endPos - startPos) <= 1) {
123
180
        return;
124
180
    }
125
126
54.0k
    reset();
127
54.0k
    fFirstRuleStatusIndex = firstRuleStatus;
128
54.0k
    fOtherRuleStatusIndex = otherRuleStatus;
129
130
54.0k
    int32_t rangeStart = startPos;
131
54.0k
    int32_t rangeEnd = endPos;
132
133
54.0k
    uint16_t    category;
134
54.0k
    int32_t     current;
135
54.0k
    UErrorCode  status = U_ZERO_ERROR;
136
54.0k
    int32_t     foundBreakCount = 0;
137
54.0k
    UText      *text = &fBI->fText;
138
139
    // Loop through the text, looking for ranges of dictionary characters.
140
    // For each span, find the appropriate break engine, and ask it to find
141
    // any breaks within the span.
142
143
54.0k
    utext_setNativeIndex(text, rangeStart);
144
54.0k
    UChar32     c = utext_current32(text);
145
54.0k
    category = ucptrie_get(fBI->fData->fTrie, c);
146
54.0k
    uint32_t dictStart = fBI->fData->fForwardTable->fDictCategoriesStart;
147
148
112k
    while(U_SUCCESS(status)) {
149
454k
        while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd
150
400k
                && (category < dictStart)) {
151
342k
            utext_next32(text);           // TODO: cleaner loop structure.
152
342k
            c = utext_current32(text);
153
342k
            category = ucptrie_get(fBI->fData->fTrie, c);
154
342k
        }
155
112k
        if (current >= rangeEnd) {
156
54.0k
            break;
157
54.0k
        }
158
159
        // We now have a dictionary character. Get the appropriate language object
160
        // to deal with it.
161
58.2k
        const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
162
163
        // Ask the language object if there are any breaks. It will add them to the cache and
164
        // leave the text pointer on the other side of its range, ready to search for the next one.
165
58.2k
        if (lbe != NULL) {
166
58.2k
            foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
167
58.2k
        }
168
169
        // Reload the loop variables for the next go-round
170
58.2k
        c = utext_current32(text);
171
58.2k
        category = ucptrie_get(fBI->fData->fTrie, c);
172
58.2k
    }
173
174
    // If we found breaks, ensure that the first and last entries are
175
    // the original starting and ending position. And initialize the
176
    // cache iteration position to the first entry.
177
178
    // printf("foundBreakCount = %d\n", foundBreakCount);
179
54.0k
    if (foundBreakCount > 0) {
180
3.52k
        U_ASSERT(foundBreakCount == fBreaks.size());
181
3.52k
        if (startPos < fBreaks.elementAti(0)) {
182
            // The dictionary did not place a boundary at the start of the segment of text.
183
            // Add one now. This should not commonly happen, but it would be easy for interactions
184
            // of the rules for dictionary segments and the break engine implementations to
185
            // inadvertently cause it. Cover it here, just in case.
186
3.52k
            fBreaks.insertElementAt(startPos, 0, status);
187
3.52k
        }
188
3.52k
        if (endPos > fBreaks.peeki()) {
189
3.52k
            fBreaks.push(endPos, status);
190
3.52k
        }
191
3.52k
        fPositionInCache = 0;
192
        // Note: Dictionary matching may extend beyond the original limit.
193
3.52k
        fStart = fBreaks.elementAti(0);
194
3.52k
        fLimit = fBreaks.peeki();
195
50.4k
    } else {
196
        // there were no language-based breaks, even though the segment contained
197
        // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
198
        // for this range will fail, and the calling code will fall back to the rule based boundaries.
199
50.4k
    }
200
54.0k
}
201
202
203
/*
204
 *   BreakCache implemetation
205
 */
206
207
RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
208
47.1k
        fBI(bi), fSideBuffer(status) {
209
47.1k
    reset();
210
47.1k
}
211
212
213
47.1k
RuleBasedBreakIterator::BreakCache::~BreakCache() {
214
47.1k
}
215
216
217
480k
void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
218
480k
    fStartBufIdx = 0;
219
480k
    fEndBufIdx = 0;
220
480k
    fTextIdx = pos;
221
480k
    fBufIdx = 0;
222
480k
    fBoundaries[0] = pos;
223
480k
    fStatuses[0] = (uint16_t)ruleStatus;
224
480k
}
225
226
227
433k
int32_t  RuleBasedBreakIterator::BreakCache::current() {
228
433k
    fBI->fPosition = fTextIdx;
229
433k
    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
230
433k
    fBI->fDone = FALSE;
231
433k
    return fTextIdx;
232
433k
}
233
234
235
0
void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
236
0
    if (U_FAILURE(status)) {
237
0
        return;
238
0
    }
239
0
    if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
240
        // startPos is in the cache. Do a next() from that position.
241
        // TODO: an awkward set of interactions with bi->fDone
242
        //       seek() does not clear it; it can't because of interactions with populateNear().
243
        //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
244
        //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
245
0
        fBI->fDone = false;
246
0
        next();
247
0
    }
248
0
    return;
249
0
}
250
251
252
0
void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
253
0
    if (U_FAILURE(status)) {
254
0
        return;
255
0
    }
256
0
    if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
257
0
        if (startPos == fTextIdx) {
258
0
            previous(status);
259
0
        } else {
260
            // seek() leaves the BreakCache positioned at the preceding boundary
261
            //        if the requested position is between two boundaries.
262
            // current() pushes the BreakCache position out to the BreakIterator itself.
263
0
            U_ASSERT(startPos > fTextIdx);
264
0
            current();
265
0
        }
266
0
    }
267
0
    return;
268
0
}
269
270
271
/*
272
 * Out-of-line code for BreakCache::next().
273
 * Cache does not already contain the boundary
274
 */
275
1.01M
void RuleBasedBreakIterator::BreakCache::nextOL() {
276
1.01M
    fBI->fDone = !populateFollowing();
277
1.01M
    fBI->fPosition = fTextIdx;
278
1.01M
    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
279
1.01M
    return;
280
1.01M
}
281
282
283
0
void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
284
0
    if (U_FAILURE(status)) {
285
0
        return;
286
0
    }
287
0
    int32_t initialBufIdx = fBufIdx;
288
0
    if (fBufIdx == fStartBufIdx) {
289
        // At start of cache. Prepend to it.
290
0
        populatePreceding(status);
291
0
    } else {
292
        // Cache already holds the next boundary
293
0
        fBufIdx = modChunkSize(fBufIdx - 1);
294
0
        fTextIdx = fBoundaries[fBufIdx];
295
0
    }
296
0
    fBI->fDone = (fBufIdx == initialBufIdx);
297
0
    fBI->fPosition = fTextIdx;
298
0
    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
299
0
    return;
300
0
}
301
302
303
433k
UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
304
433k
    if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
305
0
        return FALSE;
306
0
    }
307
433k
    if (pos == fBoundaries[fStartBufIdx]) {
308
        // Common case: seek(0), from BreakIterator::first()
309
433k
        fBufIdx = fStartBufIdx;
310
433k
        fTextIdx = fBoundaries[fBufIdx];
311
433k
        return TRUE;
312
433k
    }
313
0
    if (pos == fBoundaries[fEndBufIdx]) {
314
0
        fBufIdx = fEndBufIdx;
315
0
        fTextIdx = fBoundaries[fBufIdx];
316
0
        return TRUE;
317
0
    }
318
319
0
    int32_t min = fStartBufIdx;
320
0
    int32_t max = fEndBufIdx;
321
0
    while (min != max) {
322
0
        int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
323
0
        probe = modChunkSize(probe);
324
0
        if (fBoundaries[probe] > pos) {
325
0
            max = probe;
326
0
        } else {
327
0
            min = modChunkSize(probe + 1);
328
0
        }
329
0
    }
330
0
    U_ASSERT(fBoundaries[max] > pos);
331
0
    fBufIdx = modChunkSize(max - 1);
332
0
    fTextIdx = fBoundaries[fBufIdx];
333
0
    U_ASSERT(fTextIdx <= pos);
334
0
    return TRUE;
335
0
}
336
337
338
0
UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
339
0
    if (U_FAILURE(status)) {
340
0
        return FALSE;
341
0
    }
342
0
    U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
343
344
    // Find a boundary somewhere in the vicinity of the requested position.
345
    // Depending on the safe rules and the text data, it could be either before, at, or after
346
    // the requested position.
347
348
349
    // If the requested position is not near already cached positions, clear the existing cache,
350
    // find a near-by boundary and begin new cache contents there.
351
352
0
    if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
353
0
        int32_t aBoundary = 0;
354
0
        int32_t ruleStatusIndex = 0;
355
0
        if (position > 20) {
356
0
            int32_t backupPos = fBI->handleSafePrevious(position);
357
358
0
            if (backupPos > 0) {
359
                // Advance to the boundary following the backup position.
360
                // There is a complication: the safe reverse rules identify pairs of code points
361
                // that are safe. If advancing from the safe point moves forwards by less than
362
                // two code points, we need to advance one more time to ensure that the boundary
363
                // is good, including a correct rules status value.
364
                //
365
0
                fBI->fPosition = backupPos;
366
0
                aBoundary = fBI->handleNext();
367
0
                if (aBoundary <= backupPos + 4) {
368
                    // +4 is a quick test for possibly having advanced only one codepoint.
369
                    // Four being the length of the longest potential code point, a supplementary in UTF-8
370
0
                    utext_setNativeIndex(&fBI->fText, aBoundary);
371
0
                    if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
372
                        // The initial handleNext() only advanced by a single code point. Go again.
373
0
                        aBoundary = fBI->handleNext();   // Safe rules identify safe pairs.
374
0
                    }
375
0
                }
376
0
                ruleStatusIndex = fBI->fRuleStatusIndex;
377
0
            }
378
0
        }
379
0
        reset(aBoundary, ruleStatusIndex);        // Reset cache to hold aBoundary as a single starting point.
380
0
    }
381
382
    // Fill in boundaries between existing cache content and the new requested position.
383
384
0
    if (fBoundaries[fEndBufIdx] < position) {
385
        // The last position in the cache precedes the requested position.
386
        // Add following position(s) to the cache.
387
0
        while (fBoundaries[fEndBufIdx] < position) {
388
0
            if (!populateFollowing()) {
389
0
                UPRV_UNREACHABLE;
390
0
            }
391
0
        }
392
0
        fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
393
0
        fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
394
0
        while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
395
0
            previous(status);
396
0
        }
397
0
        return true;
398
0
    }
399
400
0
    if (fBoundaries[fStartBufIdx] > position) {
401
        // The first position in the cache is beyond the requested position.
402
        // back up more until we get a boundary <= the requested position.
403
0
        while (fBoundaries[fStartBufIdx] > position) {
404
0
            populatePreceding(status);
405
0
        }
406
0
        fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
407
0
        fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
408
0
        while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
409
0
            next();
410
0
        }
411
0
        if (fTextIdx > position) {
412
            // If position is not itself a boundary, the next() loop above will overshoot.
413
            // Back up one, leaving cache position at the boundary preceding the requested position.
414
0
            previous(status);
415
0
        }
416
0
        return true;
417
0
    }
418
419
0
    U_ASSERT(fTextIdx == position);
420
0
    return true;
421
0
}
422
423
424
425
1.01M
UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
426
1.01M
    int32_t fromPosition = fBoundaries[fEndBufIdx];
427
1.01M
    int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
428
1.01M
    int32_t pos = 0;
429
1.01M
    int32_t ruleStatusIdx = 0;
430
431
1.01M
    if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
432
26.4k
        addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
433
26.4k
        return TRUE;
434
26.4k
    }
435
436
986k
    fBI->fPosition = fromPosition;
437
986k
    pos = fBI->handleNext();
438
986k
    if (pos == UBRK_DONE) {
439
426k
        return FALSE;
440
426k
    }
441
442
560k
    ruleStatusIdx = fBI->fRuleStatusIndex;
443
560k
    if (fBI->fDictionaryCharCount > 0) {
444
        // The text segment obtained from the rules includes dictionary characters.
445
        // Subdivide it, with subdivided results going into the dictionary cache.
446
54.1k
        fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
447
54.1k
        if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
448
3.52k
            addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
449
3.52k
            return TRUE;
450
            // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
451
            //       But be careful with interactions with populateNear().
452
3.52k
        }
453
557k
    }
454
455
    // Rule based segment did not include dictionary characters.
456
    // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
457
    //    meaning that we didn't take the return, above.
458
    // Add its end point to the cache.
459
557k
    addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
460
461
    // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
462
    //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
463
    //
464
1.39M
    for (int count=0; count<6; ++count) {
465
1.28M
        pos = fBI->handleNext();
466
1.28M
        if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
467
448k
            break;
468
448k
        }
469
836k
        addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
470
836k
    }
471
472
557k
    return TRUE;
473
557k
}
474
475
476
0
UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
477
0
    if (U_FAILURE(status)) {
478
0
        return FALSE;
479
0
    }
480
481
0
    int32_t fromPosition = fBoundaries[fStartBufIdx];
482
0
    if (fromPosition == 0) {
483
0
        return FALSE;
484
0
    }
485
486
0
    int32_t position = 0;
487
0
    int32_t positionStatusIdx = 0;
488
489
0
    if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
490
0
        addPreceding(position, positionStatusIdx, UpdateCachePosition);
491
0
        return TRUE;
492
0
    }
493
494
0
    int32_t backupPosition = fromPosition;
495
496
    // Find a boundary somewhere preceding the first already-cached boundary
497
0
    do {
498
0
        backupPosition = backupPosition - 30;
499
0
        if (backupPosition <= 0) {
500
0
            backupPosition = 0;
501
0
        } else {
502
0
            backupPosition = fBI->handleSafePrevious(backupPosition);
503
0
        }
504
0
        if (backupPosition == UBRK_DONE || backupPosition == 0) {
505
0
            position = 0;
506
0
            positionStatusIdx = 0;
507
0
        } else {
508
            // Advance to the boundary following the backup position.
509
            // There is a complication: the safe reverse rules identify pairs of code points
510
            // that are safe. If advancing from the safe point moves forwards by less than
511
            // two code points, we need to advance one more time to ensure that the boundary
512
            // is good, including a correct rules status value.
513
            //
514
0
            fBI->fPosition = backupPosition;
515
0
            position = fBI->handleNext();
516
0
            if (position <= backupPosition + 4) {
517
                // +4 is a quick test for possibly having advanced only one codepoint.
518
                // Four being the length of the longest potential code point, a supplementary in UTF-8
519
0
                utext_setNativeIndex(&fBI->fText, position);
520
0
                if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
521
                    // The initial handleNext() only advanced by a single code point. Go again.
522
0
                    position = fBI->handleNext();   // Safe rules identify safe pairs.
523
0
                }
524
0
            }
525
0
            positionStatusIdx = fBI->fRuleStatusIndex;
526
0
        }
527
0
    } while (position >= fromPosition);
528
529
    // Find boundaries between the one we just located and the first already-cached boundary
530
    // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
531
532
0
    fSideBuffer.removeAllElements();
533
0
    fSideBuffer.addElement(position, status);
534
0
    fSideBuffer.addElement(positionStatusIdx, status);
535
536
0
    do {
537
0
        int32_t prevPosition = fBI->fPosition = position;
538
0
        int32_t prevStatusIdx = positionStatusIdx;
539
0
        position = fBI->handleNext();
540
0
        positionStatusIdx = fBI->fRuleStatusIndex;
541
0
        if (position == UBRK_DONE) {
542
0
            break;
543
0
        }
544
545
0
        UBool segmentHandledByDictionary = FALSE;
546
0
        if (fBI->fDictionaryCharCount != 0) {
547
            // Segment from the rules includes dictionary characters.
548
            // Subdivide it, with subdivided results going into the dictionary cache.
549
0
            int32_t dictSegEndPosition = position;
550
0
            fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
551
0
            while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
552
0
                segmentHandledByDictionary = true;
553
0
                U_ASSERT(position > prevPosition);
554
0
                if (position >= fromPosition) {
555
0
                    break;
556
0
                }
557
0
                U_ASSERT(position <= dictSegEndPosition);
558
0
                fSideBuffer.addElement(position, status);
559
0
                fSideBuffer.addElement(positionStatusIdx, status);
560
0
                prevPosition = position;
561
0
            }
562
0
            U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
563
0
        }
564
565
0
        if (!segmentHandledByDictionary && position < fromPosition) {
566
0
            fSideBuffer.addElement(position, status);
567
0
            fSideBuffer.addElement(positionStatusIdx, status);
568
0
        }
569
0
    } while (position < fromPosition);
570
571
    // Move boundaries from the side buffer to the main circular buffer.
572
0
    UBool success = FALSE;
573
0
    if (!fSideBuffer.isEmpty()) {
574
0
        positionStatusIdx = fSideBuffer.popi();
575
0
        position = fSideBuffer.popi();
576
0
        addPreceding(position, positionStatusIdx, UpdateCachePosition);
577
0
        success = TRUE;
578
0
    }
579
580
0
    while (!fSideBuffer.isEmpty()) {
581
0
        positionStatusIdx = fSideBuffer.popi();
582
0
        position = fSideBuffer.popi();
583
0
        if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
584
            // No space in circular buffer to hold a new preceding result while
585
            // also retaining the current cache (iteration) position.
586
            // Bailing out is safe; the cache will refill again if needed.
587
0
            break;
588
0
        }
589
0
    }
590
591
0
    return success;
592
0
}
593
594
595
1.42M
void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
596
1.42M
    U_ASSERT(position > fBoundaries[fEndBufIdx]);
597
1.42M
    U_ASSERT(ruleStatusIdx <= UINT16_MAX);
598
1.42M
    int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
599
1.42M
    if (nextIdx == fStartBufIdx) {
600
56.6k
        fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
601
56.6k
    }
602
1.42M
    fBoundaries[nextIdx] = position;
603
1.42M
    fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
604
1.42M
    fEndBufIdx = nextIdx;
605
1.42M
    if (update == UpdateCachePosition) {
606
        // Set current position to the newly added boundary.
607
587k
        fBufIdx = nextIdx;
608
587k
        fTextIdx = position;
609
836k
    } else {
610
        // Retaining the original cache position.
611
        // Check if the added boundary wraps around the buffer, and would over-write the original position.
612
        // It's the responsibility of callers of this function to not add too many.
613
836k
        U_ASSERT(nextIdx != fBufIdx);
614
836k
    }
615
1.42M
}
616
617
0
bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
618
0
    U_ASSERT(position < fBoundaries[fStartBufIdx]);
619
0
    U_ASSERT(ruleStatusIdx <= UINT16_MAX);
620
0
    int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
621
0
    if (nextIdx == fEndBufIdx) {
622
0
        if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
623
            // Failure. The insertion of the new boundary would claim the buffer position that is the
624
            // current iteration position. And we also want to retain the current iteration position.
625
            // (The buffer is already completely full of entries that precede the iteration position.)
626
0
            return false;
627
0
        }
628
0
        fEndBufIdx = modChunkSize(fEndBufIdx - 1);
629
0
    }
630
0
    fBoundaries[nextIdx] = position;
631
0
    fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
632
0
    fStartBufIdx = nextIdx;
633
0
    if (update == UpdateCachePosition) {
634
0
        fBufIdx = nextIdx;
635
0
        fTextIdx = position;
636
0
    }
637
0
    return true;
638
0
}
639
640
641
0
void RuleBasedBreakIterator::BreakCache::dumpCache() {
642
#ifdef RBBI_DEBUG
643
    RBBIDebugPrintf("fTextIdx:%d   fBufIdx:%d\n", fTextIdx, fBufIdx);
644
    for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
645
        RBBIDebugPrintf("%d  %d\n", i, fBoundaries[i]);
646
        if (i == fEndBufIdx) {
647
            break;
648
        }
649
    }
650
#endif
651
0
}
652
653
U_NAMESPACE_END
654
655
#endif // #if !UCONFIG_NO_BREAK_ITERATION