/src/icu/source/common/unicode/edits.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | // edits.h |
5 | | // created: 2016dec30 Markus W. Scherer |
6 | | |
7 | | #ifndef __EDITS_H__ |
8 | | #define __EDITS_H__ |
9 | | |
10 | | #include "unicode/utypes.h" |
11 | | #include "unicode/uobject.h" |
12 | | |
13 | | /** |
14 | | * \file |
15 | | * \brief C++ API: C++ class Edits for low-level string transformations on styled text. |
16 | | */ |
17 | | |
18 | | U_NAMESPACE_BEGIN |
19 | | |
20 | | #ifndef U_HIDE_DRAFT_API |
21 | | |
22 | | /** |
23 | | * Records lengths of string edits but not replacement text. |
24 | | * Supports replacements, insertions, deletions in linear progression. |
25 | | * Does not support moving/reordering of text. |
26 | | * |
27 | | * An Edits object tracks a separate UErrorCode, but ICU string transformation functions |
28 | | * (e.g., case mapping functions) merge any such errors into their API's UErrorCode. |
29 | | * |
30 | | * @draft ICU 59 |
31 | | */ |
32 | | class U_COMMON_API Edits U_FINAL : public UMemory { |
33 | | public: |
34 | | /** |
35 | | * Constructs an empty object. |
36 | | * @draft ICU 59 |
37 | | */ |
38 | | Edits() : |
39 | | array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), |
40 | 0 | errorCode(U_ZERO_ERROR) {} |
41 | | /** |
42 | | * Destructor. |
43 | | * @draft ICU 59 |
44 | | */ |
45 | | ~Edits(); |
46 | | |
47 | | /** |
48 | | * Resets the data but may not release memory. |
49 | | * @draft ICU 59 |
50 | | */ |
51 | | void reset(); |
52 | | |
53 | | /** |
54 | | * Adds a record for an unchanged segment of text. |
55 | | * Normally called from inside ICU string transformation functions, not user code. |
56 | | * @draft ICU 59 |
57 | | */ |
58 | | void addUnchanged(int32_t unchangedLength); |
59 | | /** |
60 | | * Adds a record for a text replacement/insertion/deletion. |
61 | | * Normally called from inside ICU string transformation functions, not user code. |
62 | | * @draft ICU 59 |
63 | | */ |
64 | | void addReplace(int32_t oldLength, int32_t newLength); |
65 | | /** |
66 | | * Sets the UErrorCode if an error occurred while recording edits. |
67 | | * Preserves older error codes in the outErrorCode. |
68 | | * Normally called from inside ICU string transformation functions, not user code. |
69 | | * @return TRUE if U_FAILURE(outErrorCode) |
70 | | * @draft ICU 59 |
71 | | */ |
72 | | UBool copyErrorTo(UErrorCode &outErrorCode); |
73 | | |
74 | | /** |
75 | | * How much longer is the new text compared with the old text? |
76 | | * @return new length minus old length |
77 | | * @draft ICU 59 |
78 | | */ |
79 | 0 | int32_t lengthDelta() const { return delta; } |
80 | | /** |
81 | | * @return TRUE if there are any change edits |
82 | | * @draft ICU 59 |
83 | | */ |
84 | | UBool hasChanges() const; |
85 | | |
86 | | /** |
87 | | * Access to the list of edits. |
88 | | * @see getCoarseIterator |
89 | | * @see getFineIterator |
90 | | * @draft ICU 59 |
91 | | */ |
92 | | struct U_COMMON_API Iterator U_FINAL : public UMemory { |
93 | | /** |
94 | | * Copy constructor. |
95 | | * @draft ICU 59 |
96 | | */ |
97 | | Iterator(const Iterator &other) = default; |
98 | | /** |
99 | | * Assignment operator. |
100 | | * @draft ICU 59 |
101 | | */ |
102 | | Iterator &operator=(const Iterator &other) = default; |
103 | | |
104 | | /** |
105 | | * Advances to the next edit. |
106 | | * @return TRUE if there is another edit |
107 | | * @draft ICU 59 |
108 | | */ |
109 | 0 | UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); } |
110 | | |
111 | | /** |
112 | | * Finds the edit that contains the source index. |
113 | | * The source index may be found in a non-change |
114 | | * even if normal iteration would skip non-changes. |
115 | | * Normal iteration can continue from a found edit. |
116 | | * |
117 | | * The iterator state before this search logically does not matter. |
118 | | * (It may affect the performance of the search.) |
119 | | * |
120 | | * The iterator state after this search is undefined |
121 | | * if the source index is out of bounds for the source string. |
122 | | * |
123 | | * @param i source index |
124 | | * @return TRUE if the edit for the source index was found |
125 | | * @draft ICU 59 |
126 | | */ |
127 | | UBool findSourceIndex(int32_t i, UErrorCode &errorCode); |
128 | | |
129 | | /** |
130 | | * @return TRUE if this edit replaces oldLength() units with newLength() different ones. |
131 | | * FALSE if oldLength units remain unchanged. |
132 | | * @draft ICU 59 |
133 | | */ |
134 | 0 | UBool hasChange() const { return changed; } |
135 | | /** |
136 | | * @return the number of units in the original string which are replaced or remain unchanged. |
137 | | * @draft ICU 59 |
138 | | */ |
139 | 0 | int32_t oldLength() const { return oldLength_; } |
140 | | /** |
141 | | * @return the number of units in the modified string, if hasChange() is TRUE. |
142 | | * Same as oldLength if hasChange() is FALSE. |
143 | | * @draft ICU 59 |
144 | | */ |
145 | 0 | int32_t newLength() const { return newLength_; } |
146 | | |
147 | | /** |
148 | | * @return the current index into the source string |
149 | | * @draft ICU 59 |
150 | | */ |
151 | 0 | int32_t sourceIndex() const { return srcIndex; } |
152 | | /** |
153 | | * @return the current index into the replacement-characters-only string, |
154 | | * not counting unchanged spans |
155 | | * @draft ICU 59 |
156 | | */ |
157 | 0 | int32_t replacementIndex() const { return replIndex; } |
158 | | /** |
159 | | * @return the current index into the full destination string |
160 | | * @draft ICU 59 |
161 | | */ |
162 | 0 | int32_t destinationIndex() const { return destIndex; } |
163 | | |
164 | | private: |
165 | | friend class Edits; |
166 | | |
167 | | Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs); |
168 | | |
169 | | int32_t readLength(int32_t head); |
170 | | void updateIndexes(); |
171 | | UBool noNext(); |
172 | | UBool next(UBool onlyChanges, UErrorCode &errorCode); |
173 | | |
174 | | const uint16_t *array; |
175 | | int32_t index, length; |
176 | | int32_t remaining; |
177 | | UBool onlyChanges_, coarse; |
178 | | |
179 | | UBool changed; |
180 | | int32_t oldLength_, newLength_; |
181 | | int32_t srcIndex, replIndex, destIndex; |
182 | | }; |
183 | | |
184 | | /** |
185 | | * Returns an Iterator for coarse-grained changes for simple string updates. |
186 | | * Skips non-changes. |
187 | | * @return an Iterator that merges adjacent changes. |
188 | | * @draft ICU 59 |
189 | | */ |
190 | 0 | Iterator getCoarseChangesIterator() const { |
191 | 0 | return Iterator(array, length, TRUE, TRUE); |
192 | 0 | } |
193 | | |
194 | | /** |
195 | | * Returns an Iterator for coarse-grained changes and non-changes for simple string updates. |
196 | | * @return an Iterator that merges adjacent changes. |
197 | | * @draft ICU 59 |
198 | | */ |
199 | 0 | Iterator getCoarseIterator() const { |
200 | 0 | return Iterator(array, length, FALSE, TRUE); |
201 | 0 | } |
202 | | |
203 | | /** |
204 | | * Returns an Iterator for fine-grained changes for modifying styled text. |
205 | | * Skips non-changes. |
206 | | * @return an Iterator that separates adjacent changes. |
207 | | * @draft ICU 59 |
208 | | */ |
209 | 0 | Iterator getFineChangesIterator() const { |
210 | 0 | return Iterator(array, length, TRUE, FALSE); |
211 | 0 | } |
212 | | |
213 | | /** |
214 | | * Returns an Iterator for fine-grained changes and non-changes for modifying styled text. |
215 | | * @return an Iterator that separates adjacent changes. |
216 | | * @draft ICU 59 |
217 | | */ |
218 | 0 | Iterator getFineIterator() const { |
219 | 0 | return Iterator(array, length, FALSE, FALSE); |
220 | 0 | } |
221 | | |
222 | | private: |
223 | | Edits(const Edits &) = delete; |
224 | | Edits &operator=(const Edits &) = delete; |
225 | | |
226 | 0 | void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } |
227 | 0 | int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } |
228 | | |
229 | | void append(int32_t r); |
230 | | UBool growArray(); |
231 | | |
232 | | static const int32_t STACK_CAPACITY = 100; |
233 | | uint16_t *array; |
234 | | int32_t capacity; |
235 | | int32_t length; |
236 | | int32_t delta; |
237 | | UErrorCode errorCode; |
238 | | uint16_t stackArray[STACK_CAPACITY]; |
239 | | }; |
240 | | |
241 | | #endif // U_HIDE_DRAFT_API |
242 | | |
243 | | U_NAMESPACE_END |
244 | | |
245 | | #endif // __EDITS_H__ |