Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/rbbi_cache.cpp
Line
Count
Source (jump to first uncovered line)
1
// Copyright (C) 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// file: rbbi_cache.cpp
5
6
#include "unicode/utypes.h"
7
8
#if !UCONFIG_NO_BREAK_ITERATION
9
10
#include "unicode/ubrk.h"
11
#include "unicode/rbbi.h"
12
13
#include "rbbi_cache.h"
14
15
#include "brkeng.h"
16
#include "cmemory.h"
17
#include "rbbidata.h"
18
#include "rbbirb.h"
19
#include "uassert.h"
20
#include "uvectr32.h"
21
22
U_NAMESPACE_BEGIN
23
24
/*
25
 * DictionaryCache implementation
26
 */
27
28
RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
29
        fBI(bi), fBreaks(status), fPositionInCache(-1),
30
0
        fStart(0), fLimit(0), fFirstRuleStatusIndex(0), fOtherRuleStatusIndex(0) {
31
0
}
32
33
0
RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
34
0
}
35
36
0
void RuleBasedBreakIterator::DictionaryCache::reset() {
37
0
    fPositionInCache = -1;
38
0
    fStart = 0;
39
0
    fLimit = 0;
40
0
    fFirstRuleStatusIndex = 0;
41
0
    fOtherRuleStatusIndex = 0;
42
0
    fBreaks.removeAllElements();
43
0
}
44
45
0
UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
46
0
    if (fromPos >= fLimit || fromPos < fStart) {
47
0
        fPositionInCache = -1;
48
0
        return FALSE;
49
0
    }
50
0
51
0
    // Sequential iteration, move from previous boundary to the following
52
0
53
0
    int32_t r = 0;
54
0
    if (fPositionInCache >= 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
55
0
        ++fPositionInCache;
56
0
        if (fPositionInCache >= fBreaks.size()) {
57
0
            fPositionInCache = -1;
58
0
            return FALSE;
59
0
        }
60
0
        r = fBreaks.elementAti(fPositionInCache);
61
0
        U_ASSERT(r > fromPos);
62
0
        *result = r;
63
0
        *statusIndex = fOtherRuleStatusIndex;
64
0
        return TRUE;
65
0
    }
66
0
67
0
    // Random indexing. Linear search for the boundary following the given position.
68
0
69
0
    for (fPositionInCache = 0; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
70
0
        r= fBreaks.elementAti(fPositionInCache);
71
0
        if (r > fromPos) {
72
0
            *result = r;
73
0
            *statusIndex = fOtherRuleStatusIndex;
74
0
            return TRUE;
75
0
        }
76
0
    }
77
0
    U_ASSERT(FALSE);
78
0
    fPositionInCache = -1;
79
0
    return FALSE;
80
0
}
81
82
83
0
UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t *result, int32_t *statusIndex) {
84
0
    if (fromPos <= fStart || fromPos > fLimit) {
85
0
        fPositionInCache = -1;
86
0
        return FALSE;
87
0
    }
88
0
89
0
    if (fromPos == fLimit) {
90
0
        fPositionInCache = fBreaks.size() - 1;
91
0
        if (fPositionInCache >= 0) {
92
0
            U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
93
0
        }
94
0
    }
95
0
96
0
    int32_t r;
97
0
    if (fPositionInCache > 0 && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
98
0
        --fPositionInCache;
99
0
        r = fBreaks.elementAti(fPositionInCache);
100
0
        U_ASSERT(r < fromPos);
101
0
        *result = r;
102
0
        *statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
103
0
        return TRUE;
104
0
    }
105
0
106
0
    if (fPositionInCache == 0) {
107
0
        fPositionInCache = -1;
108
0
        return FALSE;
109
0
    }
110
0
111
0
    for (fPositionInCache = fBreaks.size()-1; fPositionInCache >= 0; --fPositionInCache) {
112
0
        r = fBreaks.elementAti(fPositionInCache);
113
0
        if (r < fromPos) {
114
0
            *result = r;
115
0
            *statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
116
0
            return TRUE;
117
0
        }
118
0
    }
119
0
    U_ASSERT(FALSE);
120
0
    fPositionInCache = -1;
121
0
    return FALSE;
122
0
}
123
124
void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
125
0
                                       int32_t firstRuleStatus, int32_t otherRuleStatus) {
126
0
    if ((endPos - startPos) <= 1) {
127
0
        return;
128
0
    }
129
0
130
0
    reset();
131
0
    fFirstRuleStatusIndex = firstRuleStatus;
132
0
    fOtherRuleStatusIndex = otherRuleStatus;
133
0
134
0
    int32_t rangeStart = startPos;
135
0
    int32_t rangeEnd = endPos;
136
0
137
0
    uint16_t    category;
138
0
    int32_t     current;
139
0
    UErrorCode  status = U_ZERO_ERROR;
140
0
    int32_t     foundBreakCount = 0;
141
0
    UText      *text = &fBI->fText;
142
0
143
0
    // Loop through the text, looking for ranges of dictionary characters.
144
0
    // For each span, find the appropriate break engine, and ask it to find
145
0
    // any breaks within the span.
146
0
147
0
    utext_setNativeIndex(text, rangeStart);
148
0
    UChar32     c = utext_current32(text);
149
0
    category = UTRIE2_GET16(fBI->fData->fTrie, c);
150
0
151
0
    while(U_SUCCESS(status)) {
152
0
        while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & 0x4000) == 0) {
153
0
            utext_next32(text);           // TODO: cleaner loop structure.
154
0
            c = utext_current32(text);
155
0
            category = UTRIE2_GET16(fBI->fData->fTrie, c);
156
0
        }
157
0
        if (current >= rangeEnd) {
158
0
            break;
159
0
        }
160
0
161
0
        // We now have a dictionary character. Get the appropriate language object
162
0
        // to deal with it.
163
0
        const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
164
0
165
0
        // Ask the language object if there are any breaks. It will add them to the cache and
166
0
        // leave the text pointer on the other side of its range, ready to search for the next one.
167
0
        if (lbe != NULL) {
168
0
            foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
169
0
        }
170
0
171
0
        // Reload the loop variables for the next go-round
172
0
        c = utext_current32(text);
173
0
        category = UTRIE2_GET16(fBI->fData->fTrie, c);
174
0
    }
175
0
176
0
    // If we found breaks, ensure that the first and last entries are
177
0
    // the original starting and ending position. And initialize the
178
0
    // cache iteration position to the first entry.
179
0
180
0
    // printf("foundBreakCount = %d\n", foundBreakCount);
181
0
    if (foundBreakCount > 0) {
182
0
        U_ASSERT(foundBreakCount == fBreaks.size());
183
0
        if (startPos < fBreaks.elementAti(0)) {
184
0
            // The dictionary did not place a boundary at the start of the segment of text.
185
0
            // Add one now. This should not commonly happen, but it would be easy for interactions
186
0
            // of the rules for dictionary segments and the break engine implementations to
187
0
            // inadvertently cause it. Cover it here, just in case.
188
0
            fBreaks.insertElementAt(startPos, 0, status);
189
0
        }
190
0
        if (endPos > fBreaks.peeki()) {
191
0
            fBreaks.push(endPos, status);
192
0
        }
193
0
        fPositionInCache = 0;
194
0
        // Note: Dictionary matching may extend beyond the original limit.
195
0
        fStart = fBreaks.elementAti(0);
196
0
        fLimit = fBreaks.peeki();
197
0
    } else {
198
0
        // there were no language-based breaks, even though the segment contained
199
0
        // dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
200
0
        // for this range will fail, and the calling code will fall back to the rule based boundaries.
201
0
    }
202
0
}
203
204
205
/*
206
 *   BreakCache implemetation
207
 */
208
209
RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
210
0
        fBI(bi), fSideBuffer(status) {
211
0
    reset();
212
0
}
213
214
215
0
RuleBasedBreakIterator::BreakCache::~BreakCache() {
216
0
}
217
218
219
0
void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
220
0
    fStartBufIdx = 0;
221
0
    fEndBufIdx = 0;
222
0
    fTextIdx = pos;
223
0
    fBufIdx = 0;
224
0
    fBoundaries[0] = pos;
225
0
    fStatuses[0] = (uint16_t)ruleStatus;
226
0
}
227
228
229
0
int32_t  RuleBasedBreakIterator::BreakCache::current() {
230
0
    fBI->fPosition = fTextIdx;
231
0
    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
232
0
    fBI->fDone = FALSE;
233
0
    return fTextIdx;
234
0
}
235
236
237
0
void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
238
0
    if (U_FAILURE(status)) {
239
0
        return;
240
0
    }
241
0
    if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
242
0
        // startPos is in the cache. Do a next() from that position.
243
0
        // TODO: an awkward set of interactions with bi->fDone
244
0
        //       seek() does not clear it; it can't because of interactions with populateNear().
245
0
        //       next() does not clear it in the fast-path case, where everything matters. Maybe it should.
246
0
        //       So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
247
0
        fBI->fDone = false;
248
0
        next();
249
0
    }
250
0
    return;
251
0
}
252
253
254
0
void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
255
0
    if (U_FAILURE(status)) {
256
0
        return;
257
0
    }
258
0
    if (startPos == fTextIdx || seek(startPos) || populateNear(startPos, status)) {
259
0
        if (startPos == fTextIdx) {
260
0
            previous(status);
261
0
        } else {
262
0
            // seek() leaves the BreakCache positioned at the preceding boundary
263
0
            //        if the requested position is between two bounaries.
264
0
            // current() pushes the BreakCache position out to the BreakIterator itself.
265
0
            U_ASSERT(startPos > fTextIdx);
266
0
            current();
267
0
        }
268
0
    }
269
0
    return;
270
0
}
271
272
273
/*
274
 * Out-of-line code for BreakCache::next().
275
 * Cache does not already contain the boundary
276
 */
277
0
void RuleBasedBreakIterator::BreakCache::nextOL() {
278
0
    fBI->fDone = !populateFollowing();
279
0
    fBI->fPosition = fTextIdx;
280
0
    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
281
0
    return;
282
0
}
283
284
285
0
void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
286
0
    if (U_FAILURE(status)) {
287
0
        return;
288
0
    }
289
0
    int32_t initialBufIdx = fBufIdx;
290
0
    if (fBufIdx == fStartBufIdx) {
291
0
        // At start of cache. Prepend to it.
292
0
        populatePreceding(status);
293
0
    } else {
294
0
        // Cache already holds the next boundary
295
0
        fBufIdx = modChunkSize(fBufIdx - 1);
296
0
        fTextIdx = fBoundaries[fBufIdx];
297
0
    }
298
0
    fBI->fDone = (fBufIdx == initialBufIdx);
299
0
    fBI->fPosition = fTextIdx;
300
0
    fBI->fRuleStatusIndex = fStatuses[fBufIdx];
301
0
    return;
302
0
}
303
304
305
0
UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
306
0
    if (pos < fBoundaries[fStartBufIdx] || pos > fBoundaries[fEndBufIdx]) {
307
0
        return FALSE;
308
0
    }
309
0
    if (pos == fBoundaries[fStartBufIdx]) {
310
0
        // Common case: seek(0), from BreakIterator::first()
311
0
        fBufIdx = fStartBufIdx;
312
0
        fTextIdx = fBoundaries[fBufIdx];
313
0
        return TRUE;
314
0
    }
315
0
    if (pos == fBoundaries[fEndBufIdx]) {
316
0
        fBufIdx = fEndBufIdx;
317
0
        fTextIdx = fBoundaries[fBufIdx];
318
0
        return TRUE;
319
0
    }
320
0
321
0
    int32_t min = fStartBufIdx;
322
0
    int32_t max = fEndBufIdx;
323
0
    while (min != max) {
324
0
        int32_t probe = (min + max + (min>max ? CACHE_SIZE : 0)) / 2;
325
0
        probe = modChunkSize(probe);
326
0
        if (fBoundaries[probe] > pos) {
327
0
            max = probe;
328
0
        } else {
329
0
            min = modChunkSize(probe + 1);
330
0
        }
331
0
    }
332
0
    U_ASSERT(fBoundaries[max] > pos);
333
0
    fBufIdx = modChunkSize(max - 1);
334
0
    fTextIdx = fBoundaries[fBufIdx];
335
0
    U_ASSERT(fTextIdx <= pos);
336
0
    return TRUE;
337
0
}
338
339
340
0
UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
341
0
    if (U_FAILURE(status)) {
342
0
        return FALSE;
343
0
    }
344
0
    U_ASSERT(position < fBoundaries[fStartBufIdx] || position > fBoundaries[fEndBufIdx]);
345
0
346
0
    // Find a boundary somewhere in the vicinity of the requested position.
347
0
    // Depending on the safe rules and the text data, it could be either before, at, or after
348
0
    // the requested position.
349
0
350
0
351
0
    // If the requested position is not near already cached positions, clear the existing cache,
352
0
    // find a near-by boundary and begin new cache contents there.
353
0
354
0
    if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) {
355
0
        int32_t aBoundary = 0;
356
0
        int32_t ruleStatusIndex = 0;
357
0
        if (position > 20) {
358
0
            int32_t backupPos = fBI->handleSafePrevious(position);
359
0
360
0
            if (backupPos > 0) {
361
0
                // Advance to the boundary following the backup position.
362
0
                // There is a complication: the safe reverse rules identify pairs of code points
363
0
                // that are safe. If advancing from the safe point moves forwards by less than
364
0
                // two code points, we need to advance one more time to ensure that the boundary
365
0
                // is good, including a correct rules status value.
366
0
                //
367
0
                fBI->fPosition = backupPos;
368
0
                aBoundary = fBI->handleNext();
369
0
                if (aBoundary <= backupPos + 4) {
370
0
                    // +4 is a quick test for possibly having advanced only one codepoint.
371
0
                    // Four being the length of the longest potential code point, a supplementary in UTF-8
372
0
                    utext_setNativeIndex(&fBI->fText, aBoundary);
373
0
                    if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
374
0
                        // The initial handleNext() only advanced by a single code point. Go again.
375
0
                        aBoundary = fBI->handleNext();   // Safe rules identify safe pairs.
376
0
                    }
377
0
                }
378
0
                ruleStatusIndex = fBI->fRuleStatusIndex;
379
0
            }
380
0
        }
381
0
        reset(aBoundary, ruleStatusIndex);        // Reset cache to hold aBoundary as a single starting point.
382
0
    }
383
0
384
0
    // Fill in boundaries between existing cache content and the new requested position.
385
0
386
0
    if (fBoundaries[fEndBufIdx] < position) {
387
0
        // The last position in the cache precedes the requested position.
388
0
        // Add following position(s) to the cache.
389
0
        while (fBoundaries[fEndBufIdx] < position) {
390
0
            if (!populateFollowing()) {
391
0
                U_ASSERT(false);
392
0
                return false;
393
0
            }
394
0
        }
395
0
        fBufIdx = fEndBufIdx;                      // Set iterator position to the end of the buffer.
396
0
        fTextIdx = fBoundaries[fBufIdx];           // Required because populateFollowing may add extra boundaries.
397
0
        while (fTextIdx > position) {              // Move backwards to a position at or preceding the requested pos.
398
0
            previous(status);
399
0
        }
400
0
        return true;
401
0
    }
402
0
403
0
    if (fBoundaries[fStartBufIdx] > position) {
404
0
        // The first position in the cache is beyond the requested position.
405
0
        // back up more until we get a boundary <= the requested position.
406
0
        while (fBoundaries[fStartBufIdx] > position) {
407
0
            populatePreceding(status);
408
0
        }
409
0
        fBufIdx = fStartBufIdx;                    // Set iterator position to the start of the buffer.
410
0
        fTextIdx = fBoundaries[fBufIdx];           // Required because populatePreceding may add extra boundaries.
411
0
        while (fTextIdx < position) {              // Move forwards to a position at or following the requested pos.
412
0
            next();
413
0
        }
414
0
        if (fTextIdx > position) {
415
0
            // If position is not itself a boundary, the next() loop above will overshoot.
416
0
            // Back up one, leaving cache position at the boundary preceding the requested position.
417
0
            previous(status);
418
0
        }
419
0
        return true;
420
0
    }
421
0
422
0
    U_ASSERT(fTextIdx == position);
423
0
    return true;
424
0
}
425
426
427
428
0
UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
429
0
    int32_t fromPosition = fBoundaries[fEndBufIdx];
430
0
    int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
431
0
    int32_t pos = 0;
432
0
    int32_t ruleStatusIdx = 0;
433
0
434
0
    if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
435
0
        addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
436
0
        return TRUE;
437
0
    }
438
0
439
0
    fBI->fPosition = fromPosition;
440
0
    pos = fBI->handleNext();
441
0
    if (pos == UBRK_DONE) {
442
0
        return FALSE;
443
0
    }
444
0
445
0
    ruleStatusIdx = fBI->fRuleStatusIndex;
446
0
    if (fBI->fDictionaryCharCount > 0) {
447
0
        // The text segment obtained from the rules includes dictionary characters.
448
0
        // Subdivide it, with subdivided results going into the dictionary cache.
449
0
        fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
450
0
        if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
451
0
            addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
452
0
            return TRUE;
453
0
            // TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
454
0
            //       But be careful with interactions with populateNear().
455
0
        }
456
0
    }
457
0
458
0
    // Rule based segment did not include dictionary characters.
459
0
    // Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
460
0
    //    meaning that we didn't take the return, above.
461
0
    // Add its end point to the cache.
462
0
    addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
463
0
464
0
    // Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
465
0
    //    (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
466
0
    //
467
0
    for (int count=0; count<6; ++count) {
468
0
        pos = fBI->handleNext();
469
0
        if (pos == UBRK_DONE || fBI->fDictionaryCharCount > 0) {
470
0
            break;
471
0
        }
472
0
        addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
473
0
    }
474
0
475
0
    return TRUE;
476
0
}
477
478
479
0
UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
480
0
    if (U_FAILURE(status)) {
481
0
        return FALSE;
482
0
    }
483
0
484
0
    int32_t fromPosition = fBoundaries[fStartBufIdx];
485
0
    if (fromPosition == 0) {
486
0
        return FALSE;
487
0
    }
488
0
489
0
    int32_t position = 0;
490
0
    int32_t positionStatusIdx = 0;
491
0
492
0
    if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
493
0
        addPreceding(position, positionStatusIdx, UpdateCachePosition);
494
0
        return TRUE;
495
0
    }
496
0
497
0
    int32_t backupPosition = fromPosition;
498
0
499
0
    // Find a boundary somewhere preceding the first already-cached boundary
500
0
    do {
501
0
        backupPosition = backupPosition - 30;
502
0
        if (backupPosition <= 0) {
503
0
            backupPosition = 0;
504
0
        } else {
505
0
            backupPosition = fBI->handleSafePrevious(backupPosition);
506
0
        }
507
0
        if (backupPosition == UBRK_DONE || backupPosition == 0) {
508
0
            position = 0;
509
0
            positionStatusIdx = 0;
510
0
        } else {
511
0
            // Advance to the boundary following the backup position.
512
0
            // There is a complication: the safe reverse rules identify pairs of code points
513
0
            // that are safe. If advancing from the safe point moves forwards by less than
514
0
            // two code points, we need to advance one more time to ensure that the boundary
515
0
            // is good, including a correct rules status value.
516
0
            //
517
0
            fBI->fPosition = backupPosition;
518
0
            position = fBI->handleNext();
519
0
            if (position <= backupPosition + 4) {
520
0
                // +4 is a quick test for possibly having advanced only one codepoint.
521
0
                // Four being the length of the longest potential code point, a supplementary in UTF-8
522
0
                utext_setNativeIndex(&fBI->fText, position);
523
0
                if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
524
0
                    // The initial handleNext() only advanced by a single code point. Go again.
525
0
                    position = fBI->handleNext();   // Safe rules identify safe pairs.
526
0
                }
527
0
            };
528
0
            positionStatusIdx = fBI->fRuleStatusIndex;
529
0
        }
530
0
    } while (position >= fromPosition);
531
0
532
0
    // Find boundaries between the one we just located and the first already-cached boundary
533
0
    // Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
534
0
535
0
    fSideBuffer.removeAllElements();
536
0
    fSideBuffer.addElement(position, status);
537
0
    fSideBuffer.addElement(positionStatusIdx, status);
538
0
539
0
    do {
540
0
        int32_t prevPosition = fBI->fPosition = position;
541
0
        int32_t prevStatusIdx = positionStatusIdx;
542
0
        position = fBI->handleNext();
543
0
        positionStatusIdx = fBI->fRuleStatusIndex;
544
0
        if (position == UBRK_DONE) {
545
0
            break;
546
0
        }
547
0
548
0
        UBool segmentHandledByDictionary = FALSE;
549
0
        if (fBI->fDictionaryCharCount != 0) {
550
0
            // Segment from the rules includes dictionary characters.
551
0
            // Subdivide it, with subdivided results going into the dictionary cache.
552
0
            int32_t dictSegEndPosition = position;
553
0
            fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
554
0
            while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
555
0
                segmentHandledByDictionary = true;
556
0
                U_ASSERT(position > prevPosition);
557
0
                if (position >= fromPosition) {
558
0
                    break;
559
0
                }
560
0
                U_ASSERT(position <= dictSegEndPosition);
561
0
                fSideBuffer.addElement(position, status);
562
0
                fSideBuffer.addElement(positionStatusIdx, status);
563
0
                prevPosition = position;
564
0
            }
565
0
            U_ASSERT(position==dictSegEndPosition || position>=fromPosition);
566
0
        }
567
0
568
0
        if (!segmentHandledByDictionary && position < fromPosition) {
569
0
            fSideBuffer.addElement(position, status);
570
0
            fSideBuffer.addElement(positionStatusIdx, status);
571
0
        }
572
0
    } while (position < fromPosition);
573
0
574
0
    // Move boundaries from the side buffer to the main circular buffer.
575
0
    UBool success = FALSE;
576
0
    if (!fSideBuffer.isEmpty()) {
577
0
        positionStatusIdx = fSideBuffer.popi();
578
0
        position = fSideBuffer.popi();
579
0
        addPreceding(position, positionStatusIdx, UpdateCachePosition);
580
0
        success = TRUE;
581
0
    }
582
0
583
0
    while (!fSideBuffer.isEmpty()) {
584
0
        positionStatusIdx = fSideBuffer.popi();
585
0
        position = fSideBuffer.popi();
586
0
        if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
587
0
            // No space in circular buffer to hold a new preceding result while
588
0
            // also retaining the current cache (iteration) position.
589
0
            // Bailing out is safe; the cache will refill again if needed.
590
0
            break;
591
0
        }
592
0
    }
593
0
594
0
    return success;
595
0
}
596
597
598
0
void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
599
0
    U_ASSERT(position > fBoundaries[fEndBufIdx]);
600
0
    U_ASSERT(ruleStatusIdx <= UINT16_MAX);
601
0
    int32_t nextIdx = modChunkSize(fEndBufIdx + 1);
602
0
    if (nextIdx == fStartBufIdx) {
603
0
        fStartBufIdx = modChunkSize(fStartBufIdx + 6);    // TODO: experiment. Probably revert to 1.
604
0
    }
605
0
    fBoundaries[nextIdx] = position;
606
0
    fStatuses[nextIdx] = ruleStatusIdx;
607
0
    fEndBufIdx = nextIdx;
608
0
    if (update == UpdateCachePosition) {
609
0
        // Set current position to the newly added boundary.
610
0
        fBufIdx = nextIdx;
611
0
        fTextIdx = position;
612
0
    } else {
613
0
        // Retaining the original cache position.
614
0
        // Check if the added boundary wraps around the buffer, and would over-write the original position.
615
0
        // It's the responsibility of callers of this function to not add too many.
616
0
        U_ASSERT(nextIdx != fBufIdx);
617
0
    }
618
0
}
619
620
0
bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
621
0
    U_ASSERT(position < fBoundaries[fStartBufIdx]);
622
0
    U_ASSERT(ruleStatusIdx <= UINT16_MAX);
623
0
    int32_t nextIdx = modChunkSize(fStartBufIdx - 1);
624
0
    if (nextIdx == fEndBufIdx) {
625
0
        if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
626
0
            // Failure. The insertion of the new boundary would claim the buffer position that is the
627
0
            // current iteration position. And we also want to retain the current iteration position.
628
0
            // (The buffer is already completely full of entries that precede the iteration position.)
629
0
            return false;
630
0
        }
631
0
        fEndBufIdx = modChunkSize(fEndBufIdx - 1);
632
0
    }
633
0
    fBoundaries[nextIdx] = position;
634
0
    fStatuses[nextIdx] = ruleStatusIdx;
635
0
    fStartBufIdx = nextIdx;
636
0
    if (update == UpdateCachePosition) {
637
0
        fBufIdx = nextIdx;
638
0
        fTextIdx = position;
639
0
    }
640
0
    return true;
641
0
}
642
643
644
0
void RuleBasedBreakIterator::BreakCache::dumpCache() {
645
#ifdef RBBI_DEBUG
646
    RBBIDebugPrintf("fTextIdx:%d   fBufIdx:%d\n", fTextIdx, fBufIdx);
647
    for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+1)) {
648
        RBBIDebugPrintf("%d  %d\n", i, fBoundaries[i]);
649
        if (i == fEndBufIdx) {
650
            break;
651
        }
652
    }
653
#endif
654
}
655
656
U_NAMESPACE_END
657
658
#endif // #if !UCONFIG_NO_BREAK_ITERATION