Coverage Report

Created: 2021-08-22 09:07

/src/skia/third_party/externals/icu/source/common/rbbi_cache.h
Line
Count
Source
1
// Copyright (C) 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// file: rbbi_cache.h
5
//
6
#ifndef RBBI_CACHE_H
7
#define RBBI_CACHE_H
8
9
#include "unicode/utypes.h"
10
11
#if !UCONFIG_NO_BREAK_ITERATION
12
13
#include "unicode/rbbi.h"
14
#include "unicode/uobject.h"
15
16
#include "uvectr32.h"
17
18
U_NAMESPACE_BEGIN
19
20
/* DictionaryCache  stores the boundaries obtained from a run of dictionary characters.
21
 *                 Dictionary boundaries are moved first to this cache, then from here
22
 *                 to the main BreakCache, where they may inter-leave with non-dictionary
23
 *                 boundaries. The public BreakIterator API always fetches directly
24
 *                 from the main BreakCache, not from here.
25
 *
26
 *                 In common situations, the number of boundaries in a single dictionary run
27
 *                 should be quite small, it will be terminated by punctuation, spaces,
28
 *                 or any other non-dictionary characters. The main BreakCache may end
29
 *                 up with boundaries from multiple dictionary based runs.
30
 *
31
 *                 The boundaries are stored in a simple ArrayList (vector), with the
32
 *                 assumption that they will be accessed sequentially.
33
 */                 
34
class RuleBasedBreakIterator::DictionaryCache: public UMemory {
35
  public:
36
     DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status);
37
     ~DictionaryCache();
38
39
     void reset();
40
41
     UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
42
     UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex);
43
44
    /**
45
     * Populate the cache with the dictionary based boundaries within a region of text.
46
     * @param startPos  The start position of a range of text
47
     * @param endPos    The end position of a range of text
48
     * @param firstRuleStatus The rule status index that applies to the break at startPos
49
     * @param otherRuleStatus The rule status index that applies to boundaries other than startPos
50
     * @internal
51
     */
52
    void populateDictionary(int32_t startPos, int32_t endPos,
53
                         int32_t firstRuleStatus, int32_t otherRuleStatus);
54
55
56
57
    RuleBasedBreakIterator *fBI;
58
    
59
    UVector32           fBreaks;                // A vector containing the boundaries.
60
    int32_t             fPositionInCache;       // Index in fBreaks of last boundary returned by following()
61
                                                //    or preceding(). Optimizes sequential access.
62
    int32_t             fStart;                 // Text position of first boundary in cache.
63
    int32_t             fLimit;                 // Last boundary in cache. Which is the limit of the
64
                                                //    text segment being handled by the dictionary.
65
    int32_t             fFirstRuleStatusIndex;  // Rule status info for first boundary.
66
    int32_t             fOtherRuleStatusIndex;  // Rule status info for 2nd through last boundaries.
67
};
68
69
70
/*
71
 * class BreakCache
72
 *
73
 * Cache of break boundary positions and rule status values.
74
 * Break iterator API functions, next(), previous(), etc., will use cached results
75
 * when possible, and otherwise cache new results as they are obtained.
76
 *
77
 * Uniformly caches both dictionary and rule based (non-dictionary) boundaries.
78
 *
79
 * The cache is implemented as a single circular buffer.
80
 */
81
82
/*
83
 * size of the circular cache buffer.
84
 */
85
86
class RuleBasedBreakIterator::BreakCache: public UMemory {
87
  public:
88
                BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status);
89
    virtual     ~BreakCache();
90
    void        reset(int32_t pos = 0, int32_t ruleStatus = 0);
91
1.84M
    void        next() {    if (fBufIdx == fEndBufIdx) {
92
1.01M
                                nextOL();
93
836k
                            } else {
94
836k
                                fBufIdx = modChunkSize(fBufIdx + 1);
95
836k
                                fTextIdx = fBI->fPosition = fBoundaries[fBufIdx];
96
836k
                                fBI->fRuleStatusIndex = fStatuses[fBufIdx];
97
836k
                            }
98
1.84M
                }
99
100
101
    void        nextOL();
102
    void        previous(UErrorCode &status);
103
104
    // Move the iteration state to the position following the startPosition.
105
    // Input position must be pinned to the input length.
106
    void        following(int32_t startPosition, UErrorCode &status);
107
108
    void        preceding(int32_t startPosition, UErrorCode &status);
109
110
    /*
111
     * Update the state of the public BreakIterator (fBI) to reflect the
112
     * current state of the break iterator cache (this).
113
     */
114
    int32_t     current();
115
116
    /**
117
     * Add boundaries to the cache near the specified position.
118
     * The given position need not be a boundary itself.
119
     * The input position must be within the range of the text, and
120
     * on a code point boundary.
121
     * If the requested position is a break boundary, leave the iteration
122
     * position on it.
123
     * If the requested position is not a boundary, leave the iteration
124
     * position on the preceding boundary and include both the
125
     * preceding and following boundaries in the cache.
126
     * Additional boundaries, either preceding or following, may be added
127
     * to the cache as a side effect.
128
     *
129
     * Return false if the operation failed.
130
     */
131
    UBool populateNear(int32_t position, UErrorCode &status);
132
133
    /**
134
     *  Add boundary(s) to the cache following the current last boundary.
135
     *  Return false if at the end of the text, and no more boundaries can be added.
136
     *  Leave iteration position at the first newly added boundary, or unchanged if no boundary was added.
137
     */
138
    UBool populateFollowing();
139
140
    /**
141
     *  Add one or more boundaries to the cache preceding the first currently cached boundary.
142
     *  Leave the iteration position on the first added boundary.
143
     *  Return false if no boundaries could be added (if at the start of the text.)
144
     */
145
    UBool populatePreceding(UErrorCode &status);
146
147
    enum UpdatePositionValues {
148
        RetainCachePosition = 0,
149
        UpdateCachePosition = 1
150
    };
151
152
    /*
153
     * Add the boundary following the current position.
154
     * The current position can be left as it was, or changed to the newly added boundary,
155
     * as specified by the update parameter.
156
     */
157
    void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
158
159
160
    /*
161
     * Add the boundary preceding the current position.
162
     * The current position can be left as it was, or changed to the newly added boundary,
163
     * as specified by the update parameter.
164
     */
165
    bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update);
166
167
    /**
168
     *  Set the cache position to the specified position, or, if the position
169
     *  falls between to cached boundaries, to the preceding boundary.
170
     *  Fails if the requested position is outside of the range of boundaries currently held by the cache.
171
     *  The startPosition must be on a code point boundary.
172
     *
173
     *  Return true if successful, false if the specified position is after
174
     *  the last cached boundary or before the first.
175
     */
176
    UBool                   seek(int32_t startPosition);
177
178
    void dumpCache();
179
180
  private:
181
2.31M
    static inline int32_t   modChunkSize(int index) { return index & (CACHE_SIZE - 1); }
182
183
    static constexpr int32_t CACHE_SIZE = 128;
184
    static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two.");
185
186
    RuleBasedBreakIterator *fBI;
187
    int32_t                 fStartBufIdx;
188
    int32_t                 fEndBufIdx;    // inclusive
189
190
    int32_t                 fTextIdx;
191
    int32_t                 fBufIdx;
192
193
    int32_t                 fBoundaries[CACHE_SIZE];
194
    uint16_t                fStatuses[CACHE_SIZE];
195
196
    UVector32               fSideBuffer;
197
};
198
199
U_NAMESPACE_END
200
201
#endif // #if !UCONFIG_NO_BREAK_ITERATION
202
203
#endif // RBBI_CACHE_H