Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/number_modifiers.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2017 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_FORMATTING
7
8
#include "umutex.h"
9
#include "ucln_cmn.h"
10
#include "ucln_in.h"
11
#include "number_modifiers.h"
12
13
using namespace icu;
14
using namespace icu::number;
15
using namespace icu::number::impl;
16
17
namespace {
18
19
// TODO: This is copied from simpleformatter.cpp
20
const int32_t ARG_NUM_LIMIT = 0x100;
21
22
// These are the default currency spacing UnicodeSets in CLDR.
23
// Pre-compute them for performance.
24
// The Java unit test testCurrencySpacingPatternStability() will start failing if these change in CLDR.
25
icu::UInitOnce gDefaultCurrencySpacingInitOnce = U_INITONCE_INITIALIZER;
26
27
UnicodeSet *UNISET_DIGIT = nullptr;
28
UnicodeSet *UNISET_NOTSZ = nullptr;
29
30
0
UBool U_CALLCONV cleanupDefaultCurrencySpacing() {
31
0
    delete UNISET_DIGIT;
32
0
    UNISET_DIGIT = nullptr;
33
0
    delete UNISET_NOTSZ;
34
0
    UNISET_NOTSZ = nullptr;
35
0
    gDefaultCurrencySpacingInitOnce.reset();
36
0
    return TRUE;
37
0
}
38
39
0
void U_CALLCONV initDefaultCurrencySpacing(UErrorCode &status) {
40
0
    ucln_i18n_registerCleanup(UCLN_I18N_CURRENCY_SPACING, cleanupDefaultCurrencySpacing);
41
0
    UNISET_DIGIT = new UnicodeSet(UnicodeString(u"[:digit:]"), status);
42
0
    UNISET_NOTSZ = new UnicodeSet(UnicodeString(u"[[:^S:]&[:^Z:]]"), status);
43
0
    if (UNISET_DIGIT == nullptr || UNISET_NOTSZ == nullptr) {
44
0
        status = U_MEMORY_ALLOCATION_ERROR;
45
0
        return;
46
0
    }
47
0
    UNISET_DIGIT->freeze();
48
0
    UNISET_NOTSZ->freeze();
49
0
}
50
51
}  // namespace
52
53
54
0
Modifier::~Modifier() = default;
55
56
Modifier::Parameters::Parameters()
57
0
        : obj(nullptr) {}
58
59
Modifier::Parameters::Parameters(
60
    const ModifierStore* _obj, Signum _signum, StandardPlural::Form _plural)
61
0
        : obj(_obj), signum(_signum), plural(_plural) {}
62
63
0
ModifierStore::~ModifierStore() = default;
64
65
0
AdoptingModifierStore::~AdoptingModifierStore()  {
66
0
    for (const Modifier *mod : mods) {
67
0
        delete mod;
68
0
    }
69
0
}
70
71
72
int32_t ConstantAffixModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
73
0
                                     UErrorCode &status) const {
74
    // Insert the suffix first since inserting the prefix will change the rightIndex
75
0
    int length = output.insert(rightIndex, fSuffix, fField, status);
76
0
    length += output.insert(leftIndex, fPrefix, fField, status);
77
0
    return length;
78
0
}
79
80
0
int32_t ConstantAffixModifier::getPrefixLength() const {
81
0
    return fPrefix.length();
82
0
}
83
84
0
int32_t ConstantAffixModifier::getCodePointCount() const {
85
0
    return fPrefix.countChar32() + fSuffix.countChar32();
86
0
}
87
88
0
bool ConstantAffixModifier::isStrong() const {
89
0
    return fStrong;
90
0
}
91
92
0
bool ConstantAffixModifier::containsField(Field field) const {
93
0
    (void)field;
94
    // This method is not currently used.
95
0
    UPRV_UNREACHABLE;
96
0
}
97
98
0
void ConstantAffixModifier::getParameters(Parameters& output) const {
99
0
    (void)output;
100
    // This method is not currently used.
101
0
    UPRV_UNREACHABLE;
102
0
}
103
104
0
bool ConstantAffixModifier::semanticallyEquivalent(const Modifier& other) const {
105
0
    auto* _other = dynamic_cast<const ConstantAffixModifier*>(&other);
106
0
    if (_other == nullptr) {
107
0
        return false;
108
0
    }
109
0
    return fPrefix == _other->fPrefix
110
0
        && fSuffix == _other->fSuffix
111
0
        && fField == _other->fField
112
0
        && fStrong == _other->fStrong;
113
0
}
114
115
116
SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong)
117
0
        : SimpleModifier(simpleFormatter, field, strong, {}) {}
118
119
SimpleModifier::SimpleModifier(const SimpleFormatter &simpleFormatter, Field field, bool strong,
120
                               const Modifier::Parameters parameters)
121
0
        : fCompiledPattern(simpleFormatter.compiledPattern), fField(field), fStrong(strong),
122
0
          fParameters(parameters) {
123
0
    int32_t argLimit = SimpleFormatter::getArgumentLimit(
124
0
            fCompiledPattern.getBuffer(), fCompiledPattern.length());
125
0
    if (argLimit == 0) {
126
        // No arguments in compiled pattern
127
0
        fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
128
0
        U_ASSERT(2 + fPrefixLength == fCompiledPattern.length());
129
        // Set suffixOffset = -1 to indicate no arguments in compiled pattern.
130
0
        fSuffixOffset = -1;
131
0
        fSuffixLength = 0;
132
0
    } else {
133
0
        U_ASSERT(argLimit == 1);
134
0
        if (fCompiledPattern.charAt(1) != 0) {
135
            // Found prefix
136
0
            fPrefixLength = fCompiledPattern.charAt(1) - ARG_NUM_LIMIT;
137
0
            fSuffixOffset = 3 + fPrefixLength;
138
0
        } else {
139
            // No prefix
140
0
            fPrefixLength = 0;
141
0
            fSuffixOffset = 2;
142
0
        }
143
0
        if (3 + fPrefixLength < fCompiledPattern.length()) {
144
            // Found suffix
145
0
            fSuffixLength = fCompiledPattern.charAt(fSuffixOffset) - ARG_NUM_LIMIT;
146
0
        } else {
147
            // No suffix
148
0
            fSuffixLength = 0;
149
0
        }
150
0
    }
151
0
}
152
153
SimpleModifier::SimpleModifier()
154
0
        : fField(kUndefinedField), fStrong(false), fPrefixLength(0), fSuffixLength(0) {
155
0
}
156
157
int32_t SimpleModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
158
0
                              UErrorCode &status) const {
159
0
    return formatAsPrefixSuffix(output, leftIndex, rightIndex, status);
160
0
}
161
162
0
int32_t SimpleModifier::getPrefixLength() const {
163
0
    return fPrefixLength;
164
0
}
165
166
0
int32_t SimpleModifier::getCodePointCount() const {
167
0
    int32_t count = 0;
168
0
    if (fPrefixLength > 0) {
169
0
        count += fCompiledPattern.countChar32(2, fPrefixLength);
170
0
    }
171
0
    if (fSuffixLength > 0) {
172
0
        count += fCompiledPattern.countChar32(1 + fSuffixOffset, fSuffixLength);
173
0
    }
174
0
    return count;
175
0
}
176
177
0
bool SimpleModifier::isStrong() const {
178
0
    return fStrong;
179
0
}
180
181
0
bool SimpleModifier::containsField(Field field) const {
182
0
    (void)field;
183
    // This method is not currently used.
184
0
    UPRV_UNREACHABLE;
185
0
}
186
187
0
void SimpleModifier::getParameters(Parameters& output) const {
188
0
    output = fParameters;
189
0
}
190
191
0
bool SimpleModifier::semanticallyEquivalent(const Modifier& other) const {
192
0
    auto* _other = dynamic_cast<const SimpleModifier*>(&other);
193
0
    if (_other == nullptr) {
194
0
        return false;
195
0
    }
196
0
    if (fParameters.obj != nullptr) {
197
0
        return fParameters.obj == _other->fParameters.obj;
198
0
    }
199
0
    return fCompiledPattern == _other->fCompiledPattern
200
0
        && fField == _other->fField
201
0
        && fStrong == _other->fStrong;
202
0
}
203
204
205
int32_t
206
SimpleModifier::formatAsPrefixSuffix(FormattedStringBuilder &result, int32_t startIndex, int32_t endIndex,
207
0
                                     UErrorCode &status) const {
208
0
    if (fSuffixOffset == -1 && fPrefixLength + fSuffixLength > 0) {
209
        // There is no argument for the inner number; overwrite the entire segment with our string.
210
0
        return result.splice(startIndex, endIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
211
0
    } else {
212
0
        if (fPrefixLength > 0) {
213
0
            result.insert(startIndex, fCompiledPattern, 2, 2 + fPrefixLength, fField, status);
214
0
        }
215
0
        if (fSuffixLength > 0) {
216
0
            result.insert(
217
0
                    endIndex + fPrefixLength,
218
0
                    fCompiledPattern,
219
0
                    1 + fSuffixOffset,
220
0
                    1 + fSuffixOffset + fSuffixLength,
221
0
                    fField,
222
0
                    status);
223
0
        }
224
0
        return fPrefixLength + fSuffixLength;
225
0
    }
226
0
}
227
228
229
int32_t
230
SimpleModifier::formatTwoArgPattern(const SimpleFormatter& compiled, FormattedStringBuilder& result,
231
                                    int32_t index, int32_t* outPrefixLength, int32_t* outSuffixLength,
232
0
                                    Field field, UErrorCode& status) {
233
0
    const UnicodeString& compiledPattern = compiled.compiledPattern;
234
0
    int32_t argLimit = SimpleFormatter::getArgumentLimit(
235
0
            compiledPattern.getBuffer(), compiledPattern.length());
236
0
    if (argLimit != 2) {
237
0
        status = U_INTERNAL_PROGRAM_ERROR;
238
0
        return 0;
239
0
    }
240
0
    int32_t offset = 1; // offset into compiledPattern
241
0
    int32_t length = 0; // chars added to result
242
243
0
    int32_t prefixLength = compiledPattern.charAt(offset);
244
0
    offset++;
245
0
    if (prefixLength < ARG_NUM_LIMIT) {
246
        // No prefix
247
0
        prefixLength = 0;
248
0
    } else {
249
0
        prefixLength -= ARG_NUM_LIMIT;
250
0
        result.insert(index + length, compiledPattern, offset, offset + prefixLength, field, status);
251
0
        offset += prefixLength;
252
0
        length += prefixLength;
253
0
        offset++;
254
0
    }
255
256
0
    int32_t infixLength = compiledPattern.charAt(offset);
257
0
    offset++;
258
0
    if (infixLength < ARG_NUM_LIMIT) {
259
        // No infix
260
0
        infixLength = 0;
261
0
    } else {
262
0
        infixLength -= ARG_NUM_LIMIT;
263
0
        result.insert(index + length, compiledPattern, offset, offset + infixLength, field, status);
264
0
        offset += infixLength;
265
0
        length += infixLength;
266
0
        offset++;
267
0
    }
268
269
0
    int32_t suffixLength;
270
0
    if (offset == compiledPattern.length()) {
271
        // No suffix
272
0
        suffixLength = 0;
273
0
    } else {
274
0
        suffixLength = compiledPattern.charAt(offset) -  ARG_NUM_LIMIT;
275
0
        offset++;
276
0
        result.insert(index + length, compiledPattern, offset, offset + suffixLength, field, status);
277
0
        length += suffixLength;
278
0
    }
279
280
0
    *outPrefixLength = prefixLength;
281
0
    *outSuffixLength = suffixLength;
282
283
0
    return length;
284
0
}
285
286
287
int32_t ConstantMultiFieldModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
288
0
                                          UErrorCode &status) const {
289
0
    int32_t length = output.insert(leftIndex, fPrefix, status);
290
0
    if (fOverwrite) {
291
0
        length += output.splice(
292
0
            leftIndex + length,
293
0
            rightIndex + length,
294
0
            UnicodeString(), 0, 0,
295
0
            kUndefinedField, status);
296
0
    }
297
0
    length += output.insert(rightIndex + length, fSuffix, status);
298
0
    return length;
299
0
}
300
301
0
int32_t ConstantMultiFieldModifier::getPrefixLength() const {
302
0
    return fPrefix.length();
303
0
}
304
305
0
int32_t ConstantMultiFieldModifier::getCodePointCount() const {
306
0
    return fPrefix.codePointCount() + fSuffix.codePointCount();
307
0
}
308
309
0
bool ConstantMultiFieldModifier::isStrong() const {
310
0
    return fStrong;
311
0
}
312
313
0
bool ConstantMultiFieldModifier::containsField(Field field) const {
314
0
    return fPrefix.containsField(field) || fSuffix.containsField(field);
315
0
}
316
317
0
void ConstantMultiFieldModifier::getParameters(Parameters& output) const {
318
0
    output = fParameters;
319
0
}
320
321
0
bool ConstantMultiFieldModifier::semanticallyEquivalent(const Modifier& other) const {
322
0
    auto* _other = dynamic_cast<const ConstantMultiFieldModifier*>(&other);
323
0
    if (_other == nullptr) {
324
0
        return false;
325
0
    }
326
0
    if (fParameters.obj != nullptr) {
327
0
        return fParameters.obj == _other->fParameters.obj;
328
0
    }
329
0
    return fPrefix.contentEquals(_other->fPrefix)
330
0
        && fSuffix.contentEquals(_other->fSuffix)
331
0
        && fOverwrite == _other->fOverwrite
332
0
        && fStrong == _other->fStrong;
333
0
}
334
335
336
CurrencySpacingEnabledModifier::CurrencySpacingEnabledModifier(const FormattedStringBuilder &prefix,
337
                                                               const FormattedStringBuilder &suffix,
338
                                                               bool overwrite,
339
                                                               bool strong,
340
                                                               const DecimalFormatSymbols &symbols,
341
                                                               UErrorCode &status)
342
0
        : ConstantMultiFieldModifier(prefix, suffix, overwrite, strong) {
343
    // Check for currency spacing. Do not build the UnicodeSets unless there is
344
    // a currency code point at a boundary.
345
0
    if (prefix.length() > 0 && prefix.fieldAt(prefix.length() - 1) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
346
0
        int prefixCp = prefix.getLastCodePoint();
347
0
        UnicodeSet prefixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, PREFIX, status);
348
0
        if (prefixUnicodeSet.contains(prefixCp)) {
349
0
            fAfterPrefixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, PREFIX, status);
350
0
            fAfterPrefixUnicodeSet.freeze();
351
0
            fAfterPrefixInsert = getInsertString(symbols, PREFIX, status);
352
0
        } else {
353
0
            fAfterPrefixUnicodeSet.setToBogus();
354
0
            fAfterPrefixInsert.setToBogus();
355
0
        }
356
0
    } else {
357
0
        fAfterPrefixUnicodeSet.setToBogus();
358
0
        fAfterPrefixInsert.setToBogus();
359
0
    }
360
0
    if (suffix.length() > 0 && suffix.fieldAt(0) == Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
361
0
        int suffixCp = suffix.getFirstCodePoint();
362
0
        UnicodeSet suffixUnicodeSet = getUnicodeSet(symbols, IN_CURRENCY, SUFFIX, status);
363
0
        if (suffixUnicodeSet.contains(suffixCp)) {
364
0
            fBeforeSuffixUnicodeSet = getUnicodeSet(symbols, IN_NUMBER, SUFFIX, status);
365
0
            fBeforeSuffixUnicodeSet.freeze();
366
0
            fBeforeSuffixInsert = getInsertString(symbols, SUFFIX, status);
367
0
        } else {
368
0
            fBeforeSuffixUnicodeSet.setToBogus();
369
0
            fBeforeSuffixInsert.setToBogus();
370
0
        }
371
0
    } else {
372
0
        fBeforeSuffixUnicodeSet.setToBogus();
373
0
        fBeforeSuffixInsert.setToBogus();
374
0
    }
375
0
}
376
377
int32_t CurrencySpacingEnabledModifier::apply(FormattedStringBuilder &output, int leftIndex, int rightIndex,
378
0
                                              UErrorCode &status) const {
379
    // Currency spacing logic
380
0
    int length = 0;
381
0
    if (rightIndex - leftIndex > 0 && !fAfterPrefixUnicodeSet.isBogus() &&
382
0
        fAfterPrefixUnicodeSet.contains(output.codePointAt(leftIndex))) {
383
        // TODO: Should we use the CURRENCY field here?
384
0
        length += output.insert(
385
0
            leftIndex,
386
0
            fAfterPrefixInsert,
387
0
            kUndefinedField,
388
0
            status);
389
0
    }
390
0
    if (rightIndex - leftIndex > 0 && !fBeforeSuffixUnicodeSet.isBogus() &&
391
0
        fBeforeSuffixUnicodeSet.contains(output.codePointBefore(rightIndex))) {
392
        // TODO: Should we use the CURRENCY field here?
393
0
        length += output.insert(
394
0
            rightIndex + length,
395
0
            fBeforeSuffixInsert,
396
0
            kUndefinedField,
397
0
            status);
398
0
    }
399
400
    // Call super for the remaining logic
401
0
    length += ConstantMultiFieldModifier::apply(output, leftIndex, rightIndex + length, status);
402
0
    return length;
403
0
}
404
405
int32_t
406
CurrencySpacingEnabledModifier::applyCurrencySpacing(FormattedStringBuilder &output, int32_t prefixStart,
407
                                                     int32_t prefixLen, int32_t suffixStart,
408
                                                     int32_t suffixLen,
409
                                                     const DecimalFormatSymbols &symbols,
410
0
                                                     UErrorCode &status) {
411
0
    int length = 0;
412
0
    bool hasPrefix = (prefixLen > 0);
413
0
    bool hasSuffix = (suffixLen > 0);
414
0
    bool hasNumber = (suffixStart - prefixStart - prefixLen > 0); // could be empty string
415
0
    if (hasPrefix && hasNumber) {
416
0
        length += applyCurrencySpacingAffix(output, prefixStart + prefixLen, PREFIX, symbols, status);
417
0
    }
418
0
    if (hasSuffix && hasNumber) {
419
0
        length += applyCurrencySpacingAffix(output, suffixStart + length, SUFFIX, symbols, status);
420
0
    }
421
0
    return length;
422
0
}
423
424
int32_t
425
CurrencySpacingEnabledModifier::applyCurrencySpacingAffix(FormattedStringBuilder &output, int32_t index,
426
                                                          EAffix affix,
427
                                                          const DecimalFormatSymbols &symbols,
428
0
                                                          UErrorCode &status) {
429
    // NOTE: For prefix, output.fieldAt(index-1) gets the last field type in the prefix.
430
    // This works even if the last code point in the prefix is 2 code units because the
431
    // field value gets populated to both indices in the field array.
432
0
    Field affixField = (affix == PREFIX) ? output.fieldAt(index - 1) : output.fieldAt(index);
433
0
    if (affixField != Field(UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD)) {
434
0
        return 0;
435
0
    }
436
0
    int affixCp = (affix == PREFIX) ? output.codePointBefore(index) : output.codePointAt(index);
437
0
    UnicodeSet affixUniset = getUnicodeSet(symbols, IN_CURRENCY, affix, status);
438
0
    if (!affixUniset.contains(affixCp)) {
439
0
        return 0;
440
0
    }
441
0
    int numberCp = (affix == PREFIX) ? output.codePointAt(index) : output.codePointBefore(index);
442
0
    UnicodeSet numberUniset = getUnicodeSet(symbols, IN_NUMBER, affix, status);
443
0
    if (!numberUniset.contains(numberCp)) {
444
0
        return 0;
445
0
    }
446
0
    UnicodeString spacingString = getInsertString(symbols, affix, status);
447
448
    // NOTE: This next line *inserts* the spacing string, triggering an arraycopy.
449
    // It would be more efficient if this could be done before affixes were attached,
450
    // so that it could be prepended/appended instead of inserted.
451
    // However, the build code path is more efficient, and this is the most natural
452
    // place to put currency spacing in the non-build code path.
453
    // TODO: Should we use the CURRENCY field here?
454
0
    return output.insert(index, spacingString, kUndefinedField, status);
455
0
}
456
457
UnicodeSet
458
CurrencySpacingEnabledModifier::getUnicodeSet(const DecimalFormatSymbols &symbols, EPosition position,
459
0
                                              EAffix affix, UErrorCode &status) {
460
    // Ensure the static defaults are initialized:
461
0
    umtx_initOnce(gDefaultCurrencySpacingInitOnce, &initDefaultCurrencySpacing, status);
462
0
    if (U_FAILURE(status)) {
463
0
        return UnicodeSet();
464
0
    }
465
466
0
    const UnicodeString& pattern = symbols.getPatternForCurrencySpacing(
467
0
            position == IN_CURRENCY ? UNUM_CURRENCY_MATCH : UNUM_CURRENCY_SURROUNDING_MATCH,
468
0
            affix == SUFFIX,
469
0
            status);
470
0
    if (pattern.compare(u"[:digit:]", -1) == 0) {
471
0
        return *UNISET_DIGIT;
472
0
    } else if (pattern.compare(u"[[:^S:]&[:^Z:]]", -1) == 0) {
473
0
        return *UNISET_NOTSZ;
474
0
    } else {
475
0
        return UnicodeSet(pattern, status);
476
0
    }
477
0
}
478
479
UnicodeString
480
CurrencySpacingEnabledModifier::getInsertString(const DecimalFormatSymbols &symbols, EAffix affix,
481
0
                                                UErrorCode &status) {
482
0
    return symbols.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, affix == SUFFIX, status);
483
0
}
484
485
#endif /* #if !UCONFIG_NO_FORMATTING */