Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/formatted_string_builder.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2017 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_FORMATTING
7
8
#include "formatted_string_builder.h"
9
#include "unicode/ustring.h"
10
#include "unicode/utf16.h"
11
#include "unicode/unum.h" // for UNumberFormatFields literals
12
13
namespace {
14
15
// A version of uprv_memcpy that checks for length 0.
16
// By default, uprv_memcpy requires a length of at least 1.
17
0
inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
18
0
    if (len > 0) {
19
0
        uprv_memcpy(dest, src, len);
20
0
    }
21
0
}
22
23
// A version of uprv_memmove that checks for length 0.
24
// By default, uprv_memmove requires a length of at least 1.
25
0
inline void uprv_memmove2(void* dest, const void* src, size_t len) {
26
0
    if (len > 0) {
27
0
        uprv_memmove(dest, src, len);
28
0
    }
29
0
}
30
31
} // namespace
32
33
34
U_NAMESPACE_BEGIN
35
36
0
FormattedStringBuilder::FormattedStringBuilder() {
37
#if U_DEBUG
38
    // Initializing the memory to non-zero helps catch some bugs that involve
39
    // reading from an improperly terminated string.
40
    for (int32_t i=0; i<getCapacity(); i++) {
41
        getCharPtr()[i] = 1;
42
    }
43
#endif
44
0
}
45
46
0
FormattedStringBuilder::~FormattedStringBuilder() {
47
0
    if (fUsingHeap) {
48
0
        uprv_free(fChars.heap.ptr);
49
0
        uprv_free(fFields.heap.ptr);
50
0
    }
51
0
}
52
53
0
FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
54
0
    *this = other;
55
0
}
56
57
0
FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
58
    // Check for self-assignment
59
0
    if (this == &other) {
60
0
        return *this;
61
0
    }
62
63
    // Continue with deallocation and copying
64
0
    if (fUsingHeap) {
65
0
        uprv_free(fChars.heap.ptr);
66
0
        uprv_free(fFields.heap.ptr);
67
0
        fUsingHeap = false;
68
0
    }
69
70
0
    int32_t capacity = other.getCapacity();
71
0
    if (capacity > DEFAULT_CAPACITY) {
72
        // FIXME: uprv_malloc
73
        // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
74
0
        auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
75
0
        auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
76
0
        if (newChars == nullptr || newFields == nullptr) {
77
            // UErrorCode is not available; fail silently.
78
0
            uprv_free(newChars);
79
0
            uprv_free(newFields);
80
0
            *this = FormattedStringBuilder();  // can't fail
81
0
            return *this;
82
0
        }
83
84
0
        fUsingHeap = true;
85
0
        fChars.heap.capacity = capacity;
86
0
        fChars.heap.ptr = newChars;
87
0
        fFields.heap.capacity = capacity;
88
0
        fFields.heap.ptr = newFields;
89
0
    }
90
91
0
    uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
92
0
    uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
93
94
0
    fZero = other.fZero;
95
0
    fLength = other.fLength;
96
0
    return *this;
97
0
}
98
99
0
int32_t FormattedStringBuilder::length() const {
100
0
    return fLength;
101
0
}
102
103
0
int32_t FormattedStringBuilder::codePointCount() const {
104
0
    return u_countChar32(getCharPtr() + fZero, fLength);
105
0
}
106
107
0
UChar32 FormattedStringBuilder::getFirstCodePoint() const {
108
0
    if (fLength == 0) {
109
0
        return -1;
110
0
    }
111
0
    UChar32 cp;
112
0
    U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
113
0
    return cp;
114
0
}
115
116
0
UChar32 FormattedStringBuilder::getLastCodePoint() const {
117
0
    if (fLength == 0) {
118
0
        return -1;
119
0
    }
120
0
    int32_t offset = fLength;
121
0
    U16_BACK_1(getCharPtr() + fZero, 0, offset);
122
0
    UChar32 cp;
123
0
    U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
124
0
    return cp;
125
0
}
126
127
0
UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
128
0
    UChar32 cp;
129
0
    U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
130
0
    return cp;
131
0
}
132
133
0
UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
134
0
    int32_t offset = index;
135
0
    U16_BACK_1(getCharPtr() + fZero, 0, offset);
136
0
    UChar32 cp;
137
0
    U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
138
0
    return cp;
139
0
}
140
141
0
FormattedStringBuilder &FormattedStringBuilder::clear() {
142
    // TODO: Reset the heap here?
143
0
    fZero = getCapacity() / 2;
144
0
    fLength = 0;
145
0
    return *this;
146
0
}
147
148
int32_t
149
0
FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
150
0
    int32_t count = U16_LENGTH(codePoint);
151
0
    int32_t position = prepareForInsert(index, count, status);
152
0
    if (U_FAILURE(status)) {
153
0
        return count;
154
0
    }
155
0
    if (count == 1) {
156
0
        getCharPtr()[position] = (char16_t) codePoint;
157
0
        getFieldPtr()[position] = field;
158
0
    } else {
159
0
        getCharPtr()[position] = U16_LEAD(codePoint);
160
0
        getCharPtr()[position + 1] = U16_TRAIL(codePoint);
161
0
        getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
162
0
    }
163
0
    return count;
164
0
}
165
166
int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
167
0
                                    UErrorCode &status) {
168
0
    if (unistr.length() == 0) {
169
        // Nothing to insert.
170
0
        return 0;
171
0
    } else if (unistr.length() == 1) {
172
        // Fast path: insert using insertCodePoint.
173
0
        return insertCodePoint(index, unistr.charAt(0), field, status);
174
0
    } else {
175
0
        return insert(index, unistr, 0, unistr.length(), field, status);
176
0
    }
177
0
}
178
179
int32_t
180
FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
181
0
                            Field field, UErrorCode &status) {
182
0
    int32_t count = end - start;
183
0
    int32_t position = prepareForInsert(index, count, status);
184
0
    if (U_FAILURE(status)) {
185
0
        return count;
186
0
    }
187
0
    for (int32_t i = 0; i < count; i++) {
188
0
        getCharPtr()[position + i] = unistr.charAt(start + i);
189
0
        getFieldPtr()[position + i] = field;
190
0
    }
191
0
    return count;
192
0
}
193
194
int32_t
195
FormattedStringBuilder::splice(int32_t startThis, int32_t endThis,  const UnicodeString &unistr,
196
0
                            int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
197
0
    int32_t thisLength = endThis - startThis;
198
0
    int32_t otherLength = endOther - startOther;
199
0
    int32_t count = otherLength - thisLength;
200
0
    int32_t position;
201
0
    if (count > 0) {
202
        // Overall, chars need to be added.
203
0
        position = prepareForInsert(startThis, count, status);
204
0
    } else {
205
        // Overall, chars need to be removed or kept the same.
206
0
        position = remove(startThis, -count);
207
0
    }
208
0
    if (U_FAILURE(status)) {
209
0
        return count;
210
0
    }
211
0
    for (int32_t i = 0; i < otherLength; i++) {
212
0
        getCharPtr()[position + i] = unistr.charAt(startOther + i);
213
0
        getFieldPtr()[position + i] = field;
214
0
    }
215
0
    return count;
216
0
}
217
218
0
int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
219
0
    return insert(fLength, other, status);
220
0
}
221
222
int32_t
223
0
FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
224
0
    if (this == &other) {
225
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
226
0
        return 0;
227
0
    }
228
0
    int32_t count = other.fLength;
229
0
    if (count == 0) {
230
        // Nothing to insert.
231
0
        return 0;
232
0
    }
233
0
    int32_t position = prepareForInsert(index, count, status);
234
0
    if (U_FAILURE(status)) {
235
0
        return count;
236
0
    }
237
0
    for (int32_t i = 0; i < count; i++) {
238
0
        getCharPtr()[position + i] = other.charAt(i);
239
0
        getFieldPtr()[position + i] = other.fieldAt(i);
240
0
    }
241
0
    return count;
242
0
}
243
244
0
void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
245
0
    int32_t position = prepareForInsert(fLength, 1, status);
246
0
    if (U_FAILURE(status)) {
247
0
        return;
248
0
    }
249
0
    getCharPtr()[position] = 0;
250
0
    getFieldPtr()[position] = kUndefinedField;
251
0
    fLength--;
252
0
}
253
254
0
int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
255
0
    U_ASSERT(index >= 0);
256
0
    U_ASSERT(index <= fLength);
257
0
    U_ASSERT(count >= 0);
258
0
    if (index == 0 && fZero - count >= 0) {
259
        // Append to start
260
0
        fZero -= count;
261
0
        fLength += count;
262
0
        return fZero;
263
0
    } else if (index == fLength && fZero + fLength + count < getCapacity()) {
264
        // Append to end
265
0
        fLength += count;
266
0
        return fZero + fLength - count;
267
0
    } else {
268
        // Move chars around and/or allocate more space
269
0
        return prepareForInsertHelper(index, count, status);
270
0
    }
271
0
}
272
273
0
int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
274
0
    int32_t oldCapacity = getCapacity();
275
0
    int32_t oldZero = fZero;
276
0
    char16_t *oldChars = getCharPtr();
277
0
    Field *oldFields = getFieldPtr();
278
0
    if (fLength + count > oldCapacity) {
279
0
        if ((fLength + count) > INT32_MAX / 2) {
280
            // If we continue, then newCapacity will overflow int32_t in the next line.
281
0
            status = U_INPUT_TOO_LONG_ERROR;
282
0
            return -1;
283
0
        }
284
0
        int32_t newCapacity = (fLength + count) * 2;
285
0
        int32_t newZero = newCapacity / 2 - (fLength + count) / 2;
286
287
        // C++ note: malloc appears in two places: here and in the assignment operator.
288
0
        auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * newCapacity));
289
0
        auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * newCapacity));
290
0
        if (newChars == nullptr || newFields == nullptr) {
291
0
            uprv_free(newChars);
292
0
            uprv_free(newFields);
293
0
            status = U_MEMORY_ALLOCATION_ERROR;
294
0
            return -1;
295
0
        }
296
297
        // First copy the prefix and then the suffix, leaving room for the new chars that the
298
        // caller wants to insert.
299
        // C++ note: memcpy is OK because the src and dest do not overlap.
300
0
        uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
301
0
        uprv_memcpy2(newChars + newZero + index + count,
302
0
                oldChars + oldZero + index,
303
0
                sizeof(char16_t) * (fLength - index));
304
0
        uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
305
0
        uprv_memcpy2(newFields + newZero + index + count,
306
0
                oldFields + oldZero + index,
307
0
                sizeof(Field) * (fLength - index));
308
309
0
        if (fUsingHeap) {
310
0
            uprv_free(oldChars);
311
0
            uprv_free(oldFields);
312
0
        }
313
0
        fUsingHeap = true;
314
0
        fChars.heap.ptr = newChars;
315
0
        fChars.heap.capacity = newCapacity;
316
0
        fFields.heap.ptr = newFields;
317
0
        fFields.heap.capacity = newCapacity;
318
0
        fZero = newZero;
319
0
        fLength += count;
320
0
    } else {
321
0
        int32_t newZero = oldCapacity / 2 - (fLength + count) / 2;
322
323
        // C++ note: memmove is required because src and dest may overlap.
324
        // First copy the entire string to the location of the prefix, and then move the suffix
325
        // to make room for the new chars that the caller wants to insert.
326
0
        uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
327
0
        uprv_memmove2(oldChars + newZero + index + count,
328
0
                oldChars + newZero + index,
329
0
                sizeof(char16_t) * (fLength - index));
330
0
        uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
331
0
        uprv_memmove2(oldFields + newZero + index + count,
332
0
                oldFields + newZero + index,
333
0
                sizeof(Field) * (fLength - index));
334
335
0
        fZero = newZero;
336
0
        fLength += count;
337
0
    }
338
0
    U_ASSERT((fZero + index) >= 0);
339
0
    return fZero + index;
340
0
}
341
342
0
int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
343
    // TODO: Reset the heap here?  (If the string after removal can fit on stack?)
344
0
    int32_t position = index + fZero;
345
0
    U_ASSERT(position >= 0);
346
0
    uprv_memmove2(getCharPtr() + position,
347
0
            getCharPtr() + position + count,
348
0
            sizeof(char16_t) * (fLength - index - count));
349
0
    uprv_memmove2(getFieldPtr() + position,
350
0
            getFieldPtr() + position + count,
351
0
            sizeof(Field) * (fLength - index - count));
352
0
    fLength -= count;
353
0
    return position;
354
0
}
355
356
0
UnicodeString FormattedStringBuilder::toUnicodeString() const {
357
0
    return UnicodeString(getCharPtr() + fZero, fLength);
358
0
}
359
360
0
const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
361
    // Readonly-alias constructor:
362
0
    return UnicodeString(FALSE, getCharPtr() + fZero, fLength);
363
0
}
364
365
0
UnicodeString FormattedStringBuilder::toDebugString() const {
366
0
    UnicodeString sb;
367
0
    sb.append(u"<FormattedStringBuilder [", -1);
368
0
    sb.append(toUnicodeString());
369
0
    sb.append(u"] [", -1);
370
0
    for (int i = 0; i < fLength; i++) {
371
0
        if (fieldAt(i) == kUndefinedField) {
372
0
            sb.append(u'n');
373
0
        } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
374
0
            char16_t c;
375
0
            switch (fieldAt(i).getField()) {
376
0
                case UNUM_SIGN_FIELD:
377
0
                    c = u'-';
378
0
                    break;
379
0
                case UNUM_INTEGER_FIELD:
380
0
                    c = u'i';
381
0
                    break;
382
0
                case UNUM_FRACTION_FIELD:
383
0
                    c = u'f';
384
0
                    break;
385
0
                case UNUM_EXPONENT_FIELD:
386
0
                    c = u'e';
387
0
                    break;
388
0
                case UNUM_EXPONENT_SIGN_FIELD:
389
0
                    c = u'+';
390
0
                    break;
391
0
                case UNUM_EXPONENT_SYMBOL_FIELD:
392
0
                    c = u'E';
393
0
                    break;
394
0
                case UNUM_DECIMAL_SEPARATOR_FIELD:
395
0
                    c = u'.';
396
0
                    break;
397
0
                case UNUM_GROUPING_SEPARATOR_FIELD:
398
0
                    c = u',';
399
0
                    break;
400
0
                case UNUM_PERCENT_FIELD:
401
0
                    c = u'%';
402
0
                    break;
403
0
                case UNUM_PERMILL_FIELD:
404
0
                    c = u'‰';
405
0
                    break;
406
0
                case UNUM_CURRENCY_FIELD:
407
0
                    c = u'$';
408
0
                    break;
409
0
                default:
410
0
                    c = u'0' + fieldAt(i).getField();
411
0
                    break;
412
0
            }
413
0
            sb.append(c);
414
0
        } else {
415
0
            sb.append(u'0' + fieldAt(i).getCategory());
416
0
        }
417
0
    }
418
0
    sb.append(u"]>", -1);
419
0
    return sb;
420
0
}
421
422
0
const char16_t *FormattedStringBuilder::chars() const {
423
0
    return getCharPtr() + fZero;
424
0
}
425
426
0
bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
427
0
    if (fLength != other.fLength) {
428
0
        return false;
429
0
    }
430
0
    for (int32_t i = 0; i < fLength; i++) {
431
0
        if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
432
0
            return false;
433
0
        }
434
0
    }
435
0
    return true;
436
0
}
437
438
0
bool FormattedStringBuilder::containsField(Field field) const {
439
0
    for (int32_t i = 0; i < fLength; i++) {
440
0
        if (field == fieldAt(i)) {
441
0
            return true;
442
0
        }
443
0
    }
444
0
    return false;
445
0
}
446
447
U_NAMESPACE_END
448
449
#endif /* #if !UCONFIG_NO_FORMATTING */