Coverage Report

Created: 2025-06-13 06:38

/src/icu/icu4c/source/i18n/numparse_currency.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2018 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_FORMATTING
7
8
// Allow implicit conversion from char16_t* to UnicodeString for this file:
9
// Helpful in toString methods and elsewhere.
10
#define UNISTR_FROM_STRING_EXPLICIT
11
12
#include "numparse_types.h"
13
#include "numparse_currency.h"
14
#include "ucurrimp.h"
15
#include "unicode/errorcode.h"
16
#include "numparse_utils.h"
17
#include "string_segment.h"
18
19
using namespace icu;
20
using namespace icu::numparse;
21
using namespace icu::numparse::impl;
22
23
24
CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs,
25
                                                 parse_flags_t parseFlags, UErrorCode& status)
26
55.4k
        : fCurrency1(currencySymbols.getCurrencySymbol(status)),
27
55.4k
          fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),
28
55.4k
          fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)),
29
55.4k
          afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),
30
55.4k
          beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),
31
55.4k
          fLocaleName(dfs.getLocale().getName(), -1, status) {
32
55.4k
    utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());
33
34
    // Pre-load the long names for the current locale and currency
35
    // if we are parsing without the full currency data.
36
55.4k
    if (!fUseFullCurrencyData) {
37
499k
        for (int32_t i=0; i<StandardPlural::COUNT; i++) {
38
443k
            auto plural = static_cast<StandardPlural::Form>(i);
39
443k
            fLocalLongNames[i] = currencySymbols.getPluralName(plural, status);
40
443k
        }
41
55.4k
    }
42
43
    // TODO: Figure out how to make this faster and re-enable.
44
    // Computing the "lead code points" set for fastpathing is too slow to use in production.
45
    // See https://unicode-org.atlassian.net/browse/ICU-13584
46
//    // Compute the full set of characters that could be the first in a currency to allow for
47
//    // efficient smoke test.
48
//    fLeadCodePoints.add(fCurrency1.char32At(0));
49
//    fLeadCodePoints.add(fCurrency2.char32At(0));
50
//    fLeadCodePoints.add(beforeSuffixInsert.char32At(0));
51
//    uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);
52
//    // Always apply case mapping closure for currencies
53
//    fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);
54
//    fLeadCodePoints.freeze();
55
55.4k
}
56
57
bool
58
722k
CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const {
59
722k
    if (result.currencyCode[0] != 0) {
60
748
        return false;
61
748
    }
62
63
    // Try to match a currency spacing separator.
64
721k
    int32_t initialOffset = segment.getOffset();
65
721k
    bool maybeMore = false;
66
721k
    if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) {
67
16.9k
        int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);
68
16.9k
        if (overlap == beforeSuffixInsert.length()) {
69
4
            segment.adjustOffset(overlap);
70
            // Note: let currency spacing be a weak match. Don't update chars consumed.
71
4
        }
72
16.9k
        maybeMore = maybeMore || overlap == segment.length();
73
16.9k
    }
74
75
    // Match the currency string, and reset if we didn't find one.
76
721k
    maybeMore = maybeMore || matchCurrency(segment, result, status);
77
721k
    if (result.currencyCode[0] == 0) {
78
721k
        segment.setOffset(initialOffset);
79
721k
        return maybeMore;
80
721k
    }
81
82
    // Try to match a currency spacing separator.
83
764
    if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) {
84
558
        int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);
85
558
        if (overlap == afterPrefixInsert.length()) {
86
2
            segment.adjustOffset(overlap);
87
            // Note: let currency spacing be a weak match. Don't update chars consumed.
88
2
        }
89
558
        maybeMore = maybeMore || overlap == segment.length();
90
558
    }
91
92
764
    return maybeMore;
93
721k
}
94
95
bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,
96
721k
                                            UErrorCode& status) const {
97
721k
    bool maybeMore = false;
98
99
721k
    int32_t overlap1;
100
721k
    if (!fCurrency1.isEmpty()) {
101
721k
        overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);
102
721k
    } else {
103
0
        overlap1 = -1;
104
0
    }
105
721k
    maybeMore = maybeMore || overlap1 == segment.length();
106
721k
    if (overlap1 == fCurrency1.length()) {
107
761
        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
108
761
        segment.adjustOffset(overlap1);
109
761
        result.setCharsConsumed(segment);
110
761
        return maybeMore;
111
761
    }
112
113
721k
    int32_t overlap2;
114
721k
    if (!fCurrency2.isEmpty()) {
115
        // ISO codes should be accepted case-insensitive.
116
        // https://unicode-org.atlassian.net/browse/ICU-13696
117
721k
        overlap2 = segment.getCommonPrefixLength(fCurrency2);
118
721k
    } else {
119
0
        overlap2 = -1;
120
0
    }
121
721k
    maybeMore = maybeMore || overlap2 == segment.length();
122
721k
    if (overlap2 == fCurrency2.length()) {
123
3
        utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
124
3
        segment.adjustOffset(overlap2);
125
3
        result.setCharsConsumed(segment);
126
3
        return maybeMore;
127
3
    }
128
129
721k
    if (fUseFullCurrencyData) {
130
        // Use the full currency data.
131
        // NOTE: This call site should be improved with #13584.
132
0
        const UnicodeString segmentString = segment.toTempUnicodeString();
133
134
        // Try to parse the currency
135
0
        ParsePosition ppos(0);
136
0
        int32_t partialMatchLen = 0;
137
0
        uprv_parseCurrency(
138
0
                fLocaleName.data(),
139
0
                segmentString,
140
0
                ppos,
141
0
                UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME
142
0
                &partialMatchLen,
143
0
                result.currencyCode,
144
0
                status);
145
0
        maybeMore = maybeMore || partialMatchLen == segment.length();
146
147
0
        if (U_SUCCESS(status) && ppos.getIndex() != 0) {
148
            // Complete match.
149
            // NOTE: The currency code should already be saved in the ParsedNumber.
150
0
            segment.adjustOffset(ppos.getIndex());
151
0
            result.setCharsConsumed(segment);
152
0
            return maybeMore;
153
0
        }
154
155
721k
    } else {
156
        // Use the locale long names.
157
721k
        int32_t longestFullMatch = 0;
158
6.49M
        for (int32_t i=0; i<StandardPlural::COUNT; i++) {
159
5.76M
            const UnicodeString& name = fLocalLongNames[i];
160
5.76M
            int32_t overlap = segment.getCommonPrefixLength(name);
161
5.76M
            if (overlap == name.length() && name.length() > longestFullMatch) {
162
0
                longestFullMatch = name.length();
163
0
            }
164
5.76M
            maybeMore = maybeMore || overlap > 0;
165
5.76M
        }
166
721k
        if (longestFullMatch > 0) {
167
0
            utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);
168
0
            segment.adjustOffset(longestFullMatch);
169
0
            result.setCharsConsumed(segment);
170
0
            return maybeMore;
171
0
        }
172
721k
    }
173
174
    // No match found.
175
721k
    return maybeMore;
176
721k
}
177
178
726k
bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const {
179
    // TODO: See constructor
180
726k
    return true;
181
    //return segment.startsWith(fLeadCodePoints);
182
726k
}
183
184
0
UnicodeString CombinedCurrencyMatcher::toString() const {
185
0
    return u"<CombinedCurrencyMatcher>";
186
0
}
187
188
189
#endif /* #if !UCONFIG_NO_FORMATTING */