/src/icu/source/i18n/formatted_string_builder.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2017 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | #include "unicode/utypes.h" |
5 | | |
6 | | #if !UCONFIG_NO_FORMATTING |
7 | | |
8 | | #include "formatted_string_builder.h" |
9 | | #include "unicode/ustring.h" |
10 | | #include "unicode/utf16.h" |
11 | | #include "unicode/unum.h" // for UNumberFormatFields literals |
12 | | |
13 | | namespace { |
14 | | |
15 | | // A version of uprv_memcpy that checks for length 0. |
16 | | // By default, uprv_memcpy requires a length of at least 1. |
17 | 0 | inline void uprv_memcpy2(void* dest, const void* src, size_t len) { |
18 | 0 | if (len > 0) { |
19 | 0 | uprv_memcpy(dest, src, len); |
20 | 0 | } |
21 | 0 | } |
22 | | |
23 | | // A version of uprv_memmove that checks for length 0. |
24 | | // By default, uprv_memmove requires a length of at least 1. |
25 | 0 | inline void uprv_memmove2(void* dest, const void* src, size_t len) { |
26 | 0 | if (len > 0) { |
27 | 0 | uprv_memmove(dest, src, len); |
28 | 0 | } |
29 | 0 | } |
30 | | |
31 | | } // namespace |
32 | | |
33 | | |
34 | | U_NAMESPACE_BEGIN |
35 | | |
36 | 0 | FormattedStringBuilder::FormattedStringBuilder() { |
37 | | #if U_DEBUG |
38 | | // Initializing the memory to non-zero helps catch some bugs that involve |
39 | | // reading from an improperly terminated string. |
40 | | for (int32_t i=0; i<getCapacity(); i++) { |
41 | | getCharPtr()[i] = 1; |
42 | | } |
43 | | #endif |
44 | 0 | } |
45 | | |
46 | 0 | FormattedStringBuilder::~FormattedStringBuilder() { |
47 | 0 | if (fUsingHeap) { |
48 | 0 | uprv_free(fChars.heap.ptr); |
49 | 0 | uprv_free(fFields.heap.ptr); |
50 | 0 | } |
51 | 0 | } |
52 | | |
53 | 0 | FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) { |
54 | 0 | *this = other; |
55 | 0 | } |
56 | | |
57 | 0 | FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) { |
58 | | // Check for self-assignment |
59 | 0 | if (this == &other) { |
60 | 0 | return *this; |
61 | 0 | } |
62 | | |
63 | | // Continue with deallocation and copying |
64 | 0 | if (fUsingHeap) { |
65 | 0 | uprv_free(fChars.heap.ptr); |
66 | 0 | uprv_free(fFields.heap.ptr); |
67 | 0 | fUsingHeap = false; |
68 | 0 | } |
69 | |
|
70 | 0 | int32_t capacity = other.getCapacity(); |
71 | 0 | if (capacity > DEFAULT_CAPACITY) { |
72 | | // FIXME: uprv_malloc |
73 | | // C++ note: malloc appears in two places: here and in prepareForInsertHelper. |
74 | 0 | auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity)); |
75 | 0 | auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity)); |
76 | 0 | if (newChars == nullptr || newFields == nullptr) { |
77 | | // UErrorCode is not available; fail silently. |
78 | 0 | uprv_free(newChars); |
79 | 0 | uprv_free(newFields); |
80 | 0 | *this = FormattedStringBuilder(); // can't fail |
81 | 0 | return *this; |
82 | 0 | } |
83 | | |
84 | 0 | fUsingHeap = true; |
85 | 0 | fChars.heap.capacity = capacity; |
86 | 0 | fChars.heap.ptr = newChars; |
87 | 0 | fFields.heap.capacity = capacity; |
88 | 0 | fFields.heap.ptr = newFields; |
89 | 0 | } |
90 | | |
91 | 0 | uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity); |
92 | 0 | uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity); |
93 | |
|
94 | 0 | fZero = other.fZero; |
95 | 0 | fLength = other.fLength; |
96 | 0 | return *this; |
97 | 0 | } |
98 | | |
99 | 0 | int32_t FormattedStringBuilder::length() const { |
100 | 0 | return fLength; |
101 | 0 | } |
102 | | |
103 | 0 | int32_t FormattedStringBuilder::codePointCount() const { |
104 | 0 | return u_countChar32(getCharPtr() + fZero, fLength); |
105 | 0 | } |
106 | | |
107 | 0 | UChar32 FormattedStringBuilder::getFirstCodePoint() const { |
108 | 0 | if (fLength == 0) { |
109 | 0 | return -1; |
110 | 0 | } |
111 | 0 | UChar32 cp; |
112 | 0 | U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp); |
113 | 0 | return cp; |
114 | 0 | } |
115 | | |
116 | 0 | UChar32 FormattedStringBuilder::getLastCodePoint() const { |
117 | 0 | if (fLength == 0) { |
118 | 0 | return -1; |
119 | 0 | } |
120 | 0 | int32_t offset = fLength; |
121 | 0 | U16_BACK_1(getCharPtr() + fZero, 0, offset); |
122 | 0 | UChar32 cp; |
123 | 0 | U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); |
124 | 0 | return cp; |
125 | 0 | } |
126 | | |
127 | 0 | UChar32 FormattedStringBuilder::codePointAt(int32_t index) const { |
128 | 0 | UChar32 cp; |
129 | 0 | U16_GET(getCharPtr() + fZero, 0, index, fLength, cp); |
130 | 0 | return cp; |
131 | 0 | } |
132 | | |
133 | 0 | UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const { |
134 | 0 | int32_t offset = index; |
135 | 0 | U16_BACK_1(getCharPtr() + fZero, 0, offset); |
136 | 0 | UChar32 cp; |
137 | 0 | U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp); |
138 | 0 | return cp; |
139 | 0 | } |
140 | | |
141 | 0 | FormattedStringBuilder &FormattedStringBuilder::clear() { |
142 | | // TODO: Reset the heap here? |
143 | 0 | fZero = getCapacity() / 2; |
144 | 0 | fLength = 0; |
145 | 0 | return *this; |
146 | 0 | } |
147 | | |
148 | | int32_t |
149 | 0 | FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) { |
150 | 0 | int32_t count = U16_LENGTH(codePoint); |
151 | 0 | int32_t position = prepareForInsert(index, count, status); |
152 | 0 | if (U_FAILURE(status)) { |
153 | 0 | return count; |
154 | 0 | } |
155 | 0 | if (count == 1) { |
156 | 0 | getCharPtr()[position] = (char16_t) codePoint; |
157 | 0 | getFieldPtr()[position] = field; |
158 | 0 | } else { |
159 | 0 | getCharPtr()[position] = U16_LEAD(codePoint); |
160 | 0 | getCharPtr()[position + 1] = U16_TRAIL(codePoint); |
161 | 0 | getFieldPtr()[position] = getFieldPtr()[position + 1] = field; |
162 | 0 | } |
163 | 0 | return count; |
164 | 0 | } |
165 | | |
166 | | int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field, |
167 | 0 | UErrorCode &status) { |
168 | 0 | if (unistr.length() == 0) { |
169 | | // Nothing to insert. |
170 | 0 | return 0; |
171 | 0 | } else if (unistr.length() == 1) { |
172 | | // Fast path: insert using insertCodePoint. |
173 | 0 | return insertCodePoint(index, unistr.charAt(0), field, status); |
174 | 0 | } else { |
175 | 0 | return insert(index, unistr, 0, unistr.length(), field, status); |
176 | 0 | } |
177 | 0 | } |
178 | | |
179 | | int32_t |
180 | | FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end, |
181 | 0 | Field field, UErrorCode &status) { |
182 | 0 | int32_t count = end - start; |
183 | 0 | int32_t position = prepareForInsert(index, count, status); |
184 | 0 | if (U_FAILURE(status)) { |
185 | 0 | return count; |
186 | 0 | } |
187 | 0 | for (int32_t i = 0; i < count; i++) { |
188 | 0 | getCharPtr()[position + i] = unistr.charAt(start + i); |
189 | 0 | getFieldPtr()[position + i] = field; |
190 | 0 | } |
191 | 0 | return count; |
192 | 0 | } |
193 | | |
194 | | int32_t |
195 | | FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr, |
196 | 0 | int32_t startOther, int32_t endOther, Field field, UErrorCode& status) { |
197 | 0 | int32_t thisLength = endThis - startThis; |
198 | 0 | int32_t otherLength = endOther - startOther; |
199 | 0 | int32_t count = otherLength - thisLength; |
200 | 0 | int32_t position; |
201 | 0 | if (count > 0) { |
202 | | // Overall, chars need to be added. |
203 | 0 | position = prepareForInsert(startThis, count, status); |
204 | 0 | } else { |
205 | | // Overall, chars need to be removed or kept the same. |
206 | 0 | position = remove(startThis, -count); |
207 | 0 | } |
208 | 0 | if (U_FAILURE(status)) { |
209 | 0 | return count; |
210 | 0 | } |
211 | 0 | for (int32_t i = 0; i < otherLength; i++) { |
212 | 0 | getCharPtr()[position + i] = unistr.charAt(startOther + i); |
213 | 0 | getFieldPtr()[position + i] = field; |
214 | 0 | } |
215 | 0 | return count; |
216 | 0 | } |
217 | | |
218 | 0 | int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) { |
219 | 0 | return insert(fLength, other, status); |
220 | 0 | } |
221 | | |
222 | | int32_t |
223 | 0 | FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) { |
224 | 0 | if (this == &other) { |
225 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; |
226 | 0 | return 0; |
227 | 0 | } |
228 | 0 | int32_t count = other.fLength; |
229 | 0 | if (count == 0) { |
230 | | // Nothing to insert. |
231 | 0 | return 0; |
232 | 0 | } |
233 | 0 | int32_t position = prepareForInsert(index, count, status); |
234 | 0 | if (U_FAILURE(status)) { |
235 | 0 | return count; |
236 | 0 | } |
237 | 0 | for (int32_t i = 0; i < count; i++) { |
238 | 0 | getCharPtr()[position + i] = other.charAt(i); |
239 | 0 | getFieldPtr()[position + i] = other.fieldAt(i); |
240 | 0 | } |
241 | 0 | return count; |
242 | 0 | } |
243 | | |
244 | 0 | void FormattedStringBuilder::writeTerminator(UErrorCode& status) { |
245 | 0 | int32_t position = prepareForInsert(fLength, 1, status); |
246 | 0 | if (U_FAILURE(status)) { |
247 | 0 | return; |
248 | 0 | } |
249 | 0 | getCharPtr()[position] = 0; |
250 | 0 | getFieldPtr()[position] = kUndefinedField; |
251 | 0 | fLength--; |
252 | 0 | } |
253 | | |
254 | 0 | int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) { |
255 | 0 | U_ASSERT(index >= 0); |
256 | 0 | U_ASSERT(index <= fLength); |
257 | 0 | U_ASSERT(count >= 0); |
258 | 0 | if (index == 0 && fZero - count >= 0) { |
259 | | // Append to start |
260 | 0 | fZero -= count; |
261 | 0 | fLength += count; |
262 | 0 | return fZero; |
263 | 0 | } else if (index == fLength && fZero + fLength + count < getCapacity()) { |
264 | | // Append to end |
265 | 0 | fLength += count; |
266 | 0 | return fZero + fLength - count; |
267 | 0 | } else { |
268 | | // Move chars around and/or allocate more space |
269 | 0 | return prepareForInsertHelper(index, count, status); |
270 | 0 | } |
271 | 0 | } |
272 | | |
273 | 0 | int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) { |
274 | 0 | int32_t oldCapacity = getCapacity(); |
275 | 0 | int32_t oldZero = fZero; |
276 | 0 | char16_t *oldChars = getCharPtr(); |
277 | 0 | Field *oldFields = getFieldPtr(); |
278 | 0 | if (fLength + count > oldCapacity) { |
279 | 0 | if ((fLength + count) > INT32_MAX / 2) { |
280 | | // If we continue, then newCapacity will overflow int32_t in the next line. |
281 | 0 | status = U_INPUT_TOO_LONG_ERROR; |
282 | 0 | return -1; |
283 | 0 | } |
284 | 0 | int32_t newCapacity = (fLength + count) * 2; |
285 | 0 | int32_t newZero = newCapacity / 2 - (fLength + count) / 2; |
286 | | |
287 | | // C++ note: malloc appears in two places: here and in the assignment operator. |
288 | 0 | auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity)); |
289 | 0 | auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity)); |
290 | 0 | if (newChars == nullptr || newFields == nullptr) { |
291 | 0 | uprv_free(newChars); |
292 | 0 | uprv_free(newFields); |
293 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
294 | 0 | return -1; |
295 | 0 | } |
296 | | |
297 | | // First copy the prefix and then the suffix, leaving room for the new chars that the |
298 | | // caller wants to insert. |
299 | | // C++ note: memcpy is OK because the src and dest do not overlap. |
300 | 0 | uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index); |
301 | 0 | uprv_memcpy2(newChars + newZero + index + count, |
302 | 0 | oldChars + oldZero + index, |
303 | 0 | sizeof(char16_t) * (fLength - index)); |
304 | 0 | uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index); |
305 | 0 | uprv_memcpy2(newFields + newZero + index + count, |
306 | 0 | oldFields + oldZero + index, |
307 | 0 | sizeof(Field) * (fLength - index)); |
308 | |
|
309 | 0 | if (fUsingHeap) { |
310 | 0 | uprv_free(oldChars); |
311 | 0 | uprv_free(oldFields); |
312 | 0 | } |
313 | 0 | fUsingHeap = true; |
314 | 0 | fChars.heap.ptr = newChars; |
315 | 0 | fChars.heap.capacity = newCapacity; |
316 | 0 | fFields.heap.ptr = newFields; |
317 | 0 | fFields.heap.capacity = newCapacity; |
318 | 0 | fZero = newZero; |
319 | 0 | fLength += count; |
320 | 0 | } else { |
321 | 0 | int32_t newZero = oldCapacity / 2 - (fLength + count) / 2; |
322 | | |
323 | | // C++ note: memmove is required because src and dest may overlap. |
324 | | // First copy the entire string to the location of the prefix, and then move the suffix |
325 | | // to make room for the new chars that the caller wants to insert. |
326 | 0 | uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength); |
327 | 0 | uprv_memmove2(oldChars + newZero + index + count, |
328 | 0 | oldChars + newZero + index, |
329 | 0 | sizeof(char16_t) * (fLength - index)); |
330 | 0 | uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength); |
331 | 0 | uprv_memmove2(oldFields + newZero + index + count, |
332 | 0 | oldFields + newZero + index, |
333 | 0 | sizeof(Field) * (fLength - index)); |
334 | |
|
335 | 0 | fZero = newZero; |
336 | 0 | fLength += count; |
337 | 0 | } |
338 | 0 | U_ASSERT((fZero + index) >= 0); |
339 | 0 | return fZero + index; |
340 | 0 | } |
341 | | |
342 | 0 | int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) { |
343 | | // TODO: Reset the heap here? (If the string after removal can fit on stack?) |
344 | 0 | int32_t position = index + fZero; |
345 | 0 | U_ASSERT(position >= 0); |
346 | 0 | uprv_memmove2(getCharPtr() + position, |
347 | 0 | getCharPtr() + position + count, |
348 | 0 | sizeof(char16_t) * (fLength - index - count)); |
349 | 0 | uprv_memmove2(getFieldPtr() + position, |
350 | 0 | getFieldPtr() + position + count, |
351 | 0 | sizeof(Field) * (fLength - index - count)); |
352 | 0 | fLength -= count; |
353 | 0 | return position; |
354 | 0 | } |
355 | | |
356 | 0 | UnicodeString FormattedStringBuilder::toUnicodeString() const { |
357 | 0 | return UnicodeString(getCharPtr() + fZero, fLength); |
358 | 0 | } |
359 | | |
360 | 0 | const UnicodeString FormattedStringBuilder::toTempUnicodeString() const { |
361 | | // Readonly-alias constructor: |
362 | 0 | return UnicodeString(FALSE, getCharPtr() + fZero, fLength); |
363 | 0 | } |
364 | | |
365 | 0 | UnicodeString FormattedStringBuilder::toDebugString() const { |
366 | 0 | UnicodeString sb; |
367 | 0 | sb.append(u"<FormattedStringBuilder [", -1); |
368 | 0 | sb.append(toUnicodeString()); |
369 | 0 | sb.append(u"] [", -1); |
370 | 0 | for (int i = 0; i < fLength; i++) { |
371 | 0 | if (fieldAt(i) == kUndefinedField) { |
372 | 0 | sb.append(u'n'); |
373 | 0 | } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) { |
374 | 0 | char16_t c; |
375 | 0 | switch (fieldAt(i).getField()) { |
376 | 0 | case UNUM_SIGN_FIELD: |
377 | 0 | c = u'-'; |
378 | 0 | break; |
379 | 0 | case UNUM_INTEGER_FIELD: |
380 | 0 | c = u'i'; |
381 | 0 | break; |
382 | 0 | case UNUM_FRACTION_FIELD: |
383 | 0 | c = u'f'; |
384 | 0 | break; |
385 | 0 | case UNUM_EXPONENT_FIELD: |
386 | 0 | c = u'e'; |
387 | 0 | break; |
388 | 0 | case UNUM_EXPONENT_SIGN_FIELD: |
389 | 0 | c = u'+'; |
390 | 0 | break; |
391 | 0 | case UNUM_EXPONENT_SYMBOL_FIELD: |
392 | 0 | c = u'E'; |
393 | 0 | break; |
394 | 0 | case UNUM_DECIMAL_SEPARATOR_FIELD: |
395 | 0 | c = u'.'; |
396 | 0 | break; |
397 | 0 | case UNUM_GROUPING_SEPARATOR_FIELD: |
398 | 0 | c = u','; |
399 | 0 | break; |
400 | 0 | case UNUM_PERCENT_FIELD: |
401 | 0 | c = u'%'; |
402 | 0 | break; |
403 | 0 | case UNUM_PERMILL_FIELD: |
404 | 0 | c = u'‰'; |
405 | 0 | break; |
406 | 0 | case UNUM_CURRENCY_FIELD: |
407 | 0 | c = u'$'; |
408 | 0 | break; |
409 | 0 | default: |
410 | 0 | c = u'0' + fieldAt(i).getField(); |
411 | 0 | break; |
412 | 0 | } |
413 | 0 | sb.append(c); |
414 | 0 | } else { |
415 | 0 | sb.append(u'0' + fieldAt(i).getCategory()); |
416 | 0 | } |
417 | 0 | } |
418 | 0 | sb.append(u"]>", -1); |
419 | 0 | return sb; |
420 | 0 | } |
421 | | |
422 | 0 | const char16_t *FormattedStringBuilder::chars() const { |
423 | 0 | return getCharPtr() + fZero; |
424 | 0 | } |
425 | | |
426 | 0 | bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const { |
427 | 0 | if (fLength != other.fLength) { |
428 | 0 | return false; |
429 | 0 | } |
430 | 0 | for (int32_t i = 0; i < fLength; i++) { |
431 | 0 | if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) { |
432 | 0 | return false; |
433 | 0 | } |
434 | 0 | } |
435 | 0 | return true; |
436 | 0 | } |
437 | | |
438 | 0 | bool FormattedStringBuilder::containsField(Field field) const { |
439 | 0 | for (int32_t i = 0; i < fLength; i++) { |
440 | 0 | if (field == fieldAt(i)) { |
441 | 0 | return true; |
442 | 0 | } |
443 | 0 | } |
444 | 0 | return false; |
445 | 0 | } |
446 | | |
447 | | U_NAMESPACE_END |
448 | | |
449 | | #endif /* #if !UCONFIG_NO_FORMATTING */ |