/src/skia/third_party/externals/icu/source/common/rbbi_cache.h
Line | Count | Source |
1 | | // Copyright (C) 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | // file: rbbi_cache.h |
5 | | // |
6 | | #ifndef RBBI_CACHE_H |
7 | | #define RBBI_CACHE_H |
8 | | |
9 | | #include "unicode/utypes.h" |
10 | | |
11 | | #if !UCONFIG_NO_BREAK_ITERATION |
12 | | |
13 | | #include "unicode/rbbi.h" |
14 | | #include "unicode/uobject.h" |
15 | | |
16 | | #include "uvectr32.h" |
17 | | |
18 | | U_NAMESPACE_BEGIN |
19 | | |
20 | | /* DictionaryCache stores the boundaries obtained from a run of dictionary characters. |
21 | | * Dictionary boundaries are moved first to this cache, then from here |
22 | | * to the main BreakCache, where they may inter-leave with non-dictionary |
23 | | * boundaries. The public BreakIterator API always fetches directly |
24 | | * from the main BreakCache, not from here. |
25 | | * |
26 | | * In common situations, the number of boundaries in a single dictionary run |
27 | | * should be quite small, it will be terminated by punctuation, spaces, |
28 | | * or any other non-dictionary characters. The main BreakCache may end |
29 | | * up with boundaries from multiple dictionary based runs. |
30 | | * |
31 | | * The boundaries are stored in a simple ArrayList (vector), with the |
32 | | * assumption that they will be accessed sequentially. |
33 | | */ |
34 | | class RuleBasedBreakIterator::DictionaryCache: public UMemory { |
35 | | public: |
36 | | DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status); |
37 | | ~DictionaryCache(); |
38 | | |
39 | | void reset(); |
40 | | |
41 | | UBool following(int32_t fromPos, int32_t *pos, int32_t *statusIndex); |
42 | | UBool preceding(int32_t fromPos, int32_t *pos, int32_t *statusIndex); |
43 | | |
44 | | /** |
45 | | * Populate the cache with the dictionary based boundaries within a region of text. |
46 | | * @param startPos The start position of a range of text |
47 | | * @param endPos The end position of a range of text |
48 | | * @param firstRuleStatus The rule status index that applies to the break at startPos |
49 | | * @param otherRuleStatus The rule status index that applies to boundaries other than startPos |
50 | | * @internal |
51 | | */ |
52 | | void populateDictionary(int32_t startPos, int32_t endPos, |
53 | | int32_t firstRuleStatus, int32_t otherRuleStatus); |
54 | | |
55 | | |
56 | | |
57 | | RuleBasedBreakIterator *fBI; |
58 | | |
59 | | UVector32 fBreaks; // A vector containing the boundaries. |
60 | | int32_t fPositionInCache; // Index in fBreaks of last boundary returned by following() |
61 | | // or preceding(). Optimizes sequential access. |
62 | | int32_t fStart; // Text position of first boundary in cache. |
63 | | int32_t fLimit; // Last boundary in cache. Which is the limit of the |
64 | | // text segment being handled by the dictionary. |
65 | | int32_t fFirstRuleStatusIndex; // Rule status info for first boundary. |
66 | | int32_t fOtherRuleStatusIndex; // Rule status info for 2nd through last boundaries. |
67 | | }; |
68 | | |
69 | | |
70 | | /* |
71 | | * class BreakCache |
72 | | * |
73 | | * Cache of break boundary positions and rule status values. |
74 | | * Break iterator API functions, next(), previous(), etc., will use cached results |
75 | | * when possible, and otherwise cache new results as they are obtained. |
76 | | * |
77 | | * Uniformly caches both dictionary and rule based (non-dictionary) boundaries. |
78 | | * |
79 | | * The cache is implemented as a single circular buffer. |
80 | | */ |
81 | | |
82 | | /* |
83 | | * size of the circular cache buffer. |
84 | | */ |
85 | | |
86 | | class RuleBasedBreakIterator::BreakCache: public UMemory { |
87 | | public: |
88 | | BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status); |
89 | | virtual ~BreakCache(); |
90 | | void reset(int32_t pos = 0, int32_t ruleStatus = 0); |
91 | 1.84M | void next() { if (fBufIdx == fEndBufIdx) { |
92 | 1.01M | nextOL(); |
93 | 836k | } else { |
94 | 836k | fBufIdx = modChunkSize(fBufIdx + 1); |
95 | 836k | fTextIdx = fBI->fPosition = fBoundaries[fBufIdx]; |
96 | 836k | fBI->fRuleStatusIndex = fStatuses[fBufIdx]; |
97 | 836k | } |
98 | 1.84M | } |
99 | | |
100 | | |
101 | | void nextOL(); |
102 | | void previous(UErrorCode &status); |
103 | | |
104 | | // Move the iteration state to the position following the startPosition. |
105 | | // Input position must be pinned to the input length. |
106 | | void following(int32_t startPosition, UErrorCode &status); |
107 | | |
108 | | void preceding(int32_t startPosition, UErrorCode &status); |
109 | | |
110 | | /* |
111 | | * Update the state of the public BreakIterator (fBI) to reflect the |
112 | | * current state of the break iterator cache (this). |
113 | | */ |
114 | | int32_t current(); |
115 | | |
116 | | /** |
117 | | * Add boundaries to the cache near the specified position. |
118 | | * The given position need not be a boundary itself. |
119 | | * The input position must be within the range of the text, and |
120 | | * on a code point boundary. |
121 | | * If the requested position is a break boundary, leave the iteration |
122 | | * position on it. |
123 | | * If the requested position is not a boundary, leave the iteration |
124 | | * position on the preceding boundary and include both the |
125 | | * preceding and following boundaries in the cache. |
126 | | * Additional boundaries, either preceding or following, may be added |
127 | | * to the cache as a side effect. |
128 | | * |
129 | | * Return false if the operation failed. |
130 | | */ |
131 | | UBool populateNear(int32_t position, UErrorCode &status); |
132 | | |
133 | | /** |
134 | | * Add boundary(s) to the cache following the current last boundary. |
135 | | * Return false if at the end of the text, and no more boundaries can be added. |
136 | | * Leave iteration position at the first newly added boundary, or unchanged if no boundary was added. |
137 | | */ |
138 | | UBool populateFollowing(); |
139 | | |
140 | | /** |
141 | | * Add one or more boundaries to the cache preceding the first currently cached boundary. |
142 | | * Leave the iteration position on the first added boundary. |
143 | | * Return false if no boundaries could be added (if at the start of the text.) |
144 | | */ |
145 | | UBool populatePreceding(UErrorCode &status); |
146 | | |
147 | | enum UpdatePositionValues { |
148 | | RetainCachePosition = 0, |
149 | | UpdateCachePosition = 1 |
150 | | }; |
151 | | |
152 | | /* |
153 | | * Add the boundary following the current position. |
154 | | * The current position can be left as it was, or changed to the newly added boundary, |
155 | | * as specified by the update parameter. |
156 | | */ |
157 | | void addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update); |
158 | | |
159 | | |
160 | | /* |
161 | | * Add the boundary preceding the current position. |
162 | | * The current position can be left as it was, or changed to the newly added boundary, |
163 | | * as specified by the update parameter. |
164 | | */ |
165 | | bool addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update); |
166 | | |
167 | | /** |
168 | | * Set the cache position to the specified position, or, if the position |
169 | | * falls between to cached boundaries, to the preceding boundary. |
170 | | * Fails if the requested position is outside of the range of boundaries currently held by the cache. |
171 | | * The startPosition must be on a code point boundary. |
172 | | * |
173 | | * Return true if successful, false if the specified position is after |
174 | | * the last cached boundary or before the first. |
175 | | */ |
176 | | UBool seek(int32_t startPosition); |
177 | | |
178 | | void dumpCache(); |
179 | | |
180 | | private: |
181 | 2.31M | static inline int32_t modChunkSize(int index) { return index & (CACHE_SIZE - 1); } |
182 | | |
183 | | static constexpr int32_t CACHE_SIZE = 128; |
184 | | static_assert((CACHE_SIZE & (CACHE_SIZE-1)) == 0, "CACHE_SIZE must be power of two."); |
185 | | |
186 | | RuleBasedBreakIterator *fBI; |
187 | | int32_t fStartBufIdx; |
188 | | int32_t fEndBufIdx; // inclusive |
189 | | |
190 | | int32_t fTextIdx; |
191 | | int32_t fBufIdx; |
192 | | |
193 | | int32_t fBoundaries[CACHE_SIZE]; |
194 | | uint16_t fStatuses[CACHE_SIZE]; |
195 | | |
196 | | UVector32 fSideBuffer; |
197 | | }; |
198 | | |
199 | | U_NAMESPACE_END |
200 | | |
201 | | #endif // #if !UCONFIG_NO_BREAK_ITERATION |
202 | | |
203 | | #endif // RBBI_CACHE_H |