/src/icu/source/i18n/numparse_currency.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2018 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | #include "unicode/utypes.h"  | 
5  |  |  | 
6  |  | #if !UCONFIG_NO_FORMATTING  | 
7  |  |  | 
8  |  | // Allow implicit conversion from char16_t* to UnicodeString for this file:  | 
9  |  | // Helpful in toString methods and elsewhere.  | 
10  |  | #define UNISTR_FROM_STRING_EXPLICIT  | 
11  |  |  | 
12  |  | #include "numparse_types.h"  | 
13  |  | #include "numparse_currency.h"  | 
14  |  | #include "ucurrimp.h"  | 
15  |  | #include "unicode/errorcode.h"  | 
16  |  | #include "numparse_utils.h"  | 
17  |  | #include "string_segment.h"  | 
18  |  |  | 
19  |  | using namespace icu;  | 
20  |  | using namespace icu::numparse;  | 
21  |  | using namespace icu::numparse::impl;  | 
22  |  |  | 
23  |  |  | 
24  |  | CombinedCurrencyMatcher::CombinedCurrencyMatcher(const CurrencySymbols& currencySymbols, const DecimalFormatSymbols& dfs,  | 
25  |  |                                                  parse_flags_t parseFlags, UErrorCode& status)  | 
26  | 0  |         : fCurrency1(currencySymbols.getCurrencySymbol(status)),  | 
27  | 0  |           fCurrency2(currencySymbols.getIntlCurrencySymbol(status)),  | 
28  | 0  |           fUseFullCurrencyData(0 == (parseFlags & PARSE_FLAG_NO_FOREIGN_CURRENCY)),  | 
29  | 0  |           afterPrefixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, false, status)),  | 
30  | 0  |           beforeSuffixInsert(dfs.getPatternForCurrencySpacing(UNUM_CURRENCY_INSERT, true, status)),  | 
31  | 0  |           fLocaleName(dfs.getLocale().getName(), -1, status) { | 
32  | 0  |     utils::copyCurrencyCode(fCurrencyCode, currencySymbols.getIsoCode());  | 
33  |  |  | 
34  |  |     // Pre-load the long names for the current locale and currency  | 
35  |  |     // if we are parsing without the full currency data.  | 
36  | 0  |     if (!fUseFullCurrencyData) { | 
37  | 0  |         for (int32_t i=0; i<StandardPlural::COUNT; i++) { | 
38  | 0  |             auto plural = static_cast<StandardPlural::Form>(i);  | 
39  | 0  |             fLocalLongNames[i] = currencySymbols.getPluralName(plural, status);  | 
40  | 0  |         }  | 
41  | 0  |     }  | 
42  |  |  | 
43  |  |     // TODO: Figure out how to make this faster and re-enable.  | 
44  |  |     // Computing the "lead code points" set for fastpathing is too slow to use in production.  | 
45  |  |     // See http://bugs.icu-project.org/trac/ticket/13584  | 
46  |  | //    // Compute the full set of characters that could be the first in a currency to allow for  | 
47  |  | //    // efficient smoke test.  | 
48  |  | //    fLeadCodePoints.add(fCurrency1.char32At(0));  | 
49  |  | //    fLeadCodePoints.add(fCurrency2.char32At(0));  | 
50  |  | //    fLeadCodePoints.add(beforeSuffixInsert.char32At(0));  | 
51  |  | //    uprv_currencyLeads(fLocaleName.data(), fLeadCodePoints, status);  | 
52  |  | //    // Always apply case mapping closure for currencies  | 
53  |  | //    fLeadCodePoints.closeOver(USET_ADD_CASE_MAPPINGS);  | 
54  |  | //    fLeadCodePoints.freeze();  | 
55  | 0  | }  | 
56  |  |  | 
57  |  | bool  | 
58  | 0  | CombinedCurrencyMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { | 
59  | 0  |     if (result.currencyCode[0] != 0) { | 
60  | 0  |         return false;  | 
61  | 0  |     }  | 
62  |  |  | 
63  |  |     // Try to match a currency spacing separator.  | 
64  | 0  |     int32_t initialOffset = segment.getOffset();  | 
65  | 0  |     bool maybeMore = false;  | 
66  | 0  |     if (result.seenNumber() && !beforeSuffixInsert.isEmpty()) { | 
67  | 0  |         int32_t overlap = segment.getCommonPrefixLength(beforeSuffixInsert);  | 
68  | 0  |         if (overlap == beforeSuffixInsert.length()) { | 
69  | 0  |             segment.adjustOffset(overlap);  | 
70  |  |             // Note: let currency spacing be a weak match. Don't update chars consumed.  | 
71  | 0  |         }  | 
72  | 0  |         maybeMore = maybeMore || overlap == segment.length();  | 
73  | 0  |     }  | 
74  |  |  | 
75  |  |     // Match the currency string, and reset if we didn't find one.  | 
76  | 0  |     maybeMore = maybeMore || matchCurrency(segment, result, status);  | 
77  | 0  |     if (result.currencyCode[0] == 0) { | 
78  | 0  |         segment.setOffset(initialOffset);  | 
79  | 0  |         return maybeMore;  | 
80  | 0  |     }  | 
81  |  |  | 
82  |  |     // Try to match a currency spacing separator.  | 
83  | 0  |     if (!result.seenNumber() && !afterPrefixInsert.isEmpty()) { | 
84  | 0  |         int32_t overlap = segment.getCommonPrefixLength(afterPrefixInsert);  | 
85  | 0  |         if (overlap == afterPrefixInsert.length()) { | 
86  | 0  |             segment.adjustOffset(overlap);  | 
87  |  |             // Note: let currency spacing be a weak match. Don't update chars consumed.  | 
88  | 0  |         }  | 
89  | 0  |         maybeMore = maybeMore || overlap == segment.length();  | 
90  | 0  |     }  | 
91  |  | 
  | 
92  | 0  |     return maybeMore;  | 
93  | 0  | }  | 
94  |  |  | 
95  |  | bool CombinedCurrencyMatcher::matchCurrency(StringSegment& segment, ParsedNumber& result,  | 
96  | 0  |                                             UErrorCode& status) const { | 
97  | 0  |     bool maybeMore = false;  | 
98  |  | 
  | 
99  | 0  |     int32_t overlap1;  | 
100  | 0  |     if (!fCurrency1.isEmpty()) { | 
101  | 0  |         overlap1 = segment.getCaseSensitivePrefixLength(fCurrency1);  | 
102  | 0  |     } else { | 
103  | 0  |         overlap1 = -1;  | 
104  | 0  |     }  | 
105  | 0  |     maybeMore = maybeMore || overlap1 == segment.length();  | 
106  | 0  |     if (overlap1 == fCurrency1.length()) { | 
107  | 0  |         utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);  | 
108  | 0  |         segment.adjustOffset(overlap1);  | 
109  | 0  |         result.setCharsConsumed(segment);  | 
110  | 0  |         return maybeMore;  | 
111  | 0  |     }  | 
112  |  |  | 
113  | 0  |     int32_t overlap2;  | 
114  | 0  |     if (!fCurrency2.isEmpty()) { | 
115  |  |         // ISO codes should be accepted case-insensitive.  | 
116  |  |         // https://unicode-org.atlassian.net/browse/ICU-13696  | 
117  | 0  |         overlap2 = segment.getCommonPrefixLength(fCurrency2);  | 
118  | 0  |     } else { | 
119  | 0  |         overlap2 = -1;  | 
120  | 0  |     }  | 
121  | 0  |     maybeMore = maybeMore || overlap2 == segment.length();  | 
122  | 0  |     if (overlap2 == fCurrency2.length()) { | 
123  | 0  |         utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);  | 
124  | 0  |         segment.adjustOffset(overlap2);  | 
125  | 0  |         result.setCharsConsumed(segment);  | 
126  | 0  |         return maybeMore;  | 
127  | 0  |     }  | 
128  |  |  | 
129  | 0  |     if (fUseFullCurrencyData) { | 
130  |  |         // Use the full currency data.  | 
131  |  |         // NOTE: This call site should be improved with #13584.  | 
132  | 0  |         const UnicodeString segmentString = segment.toTempUnicodeString();  | 
133  |  |  | 
134  |  |         // Try to parse the currency  | 
135  | 0  |         ParsePosition ppos(0);  | 
136  | 0  |         int32_t partialMatchLen = 0;  | 
137  | 0  |         uprv_parseCurrency(  | 
138  | 0  |                 fLocaleName.data(),  | 
139  | 0  |                 segmentString,  | 
140  | 0  |                 ppos,  | 
141  | 0  |                 UCURR_SYMBOL_NAME, // checks for both UCURR_SYMBOL_NAME and UCURR_LONG_NAME  | 
142  | 0  |                 &partialMatchLen,  | 
143  | 0  |                 result.currencyCode,  | 
144  | 0  |                 status);  | 
145  | 0  |         maybeMore = maybeMore || partialMatchLen == segment.length();  | 
146  |  | 
  | 
147  | 0  |         if (U_SUCCESS(status) && ppos.getIndex() != 0) { | 
148  |  |             // Complete match.  | 
149  |  |             // NOTE: The currency code should already be saved in the ParsedNumber.  | 
150  | 0  |             segment.adjustOffset(ppos.getIndex());  | 
151  | 0  |             result.setCharsConsumed(segment);  | 
152  | 0  |             return maybeMore;  | 
153  | 0  |         }  | 
154  |  | 
  | 
155  | 0  |     } else { | 
156  |  |         // Use the locale long names.  | 
157  | 0  |         int32_t longestFullMatch = 0;  | 
158  | 0  |         for (int32_t i=0; i<StandardPlural::COUNT; i++) { | 
159  | 0  |             const UnicodeString& name = fLocalLongNames[i];  | 
160  | 0  |             int32_t overlap = segment.getCommonPrefixLength(name);  | 
161  | 0  |             if (overlap == name.length() && name.length() > longestFullMatch) { | 
162  | 0  |                 longestFullMatch = name.length();  | 
163  | 0  |             }  | 
164  | 0  |             maybeMore = maybeMore || overlap > 0;  | 
165  | 0  |         }  | 
166  | 0  |         if (longestFullMatch > 0) { | 
167  | 0  |             utils::copyCurrencyCode(result.currencyCode, fCurrencyCode);  | 
168  | 0  |             segment.adjustOffset(longestFullMatch);  | 
169  | 0  |             result.setCharsConsumed(segment);  | 
170  | 0  |             return maybeMore;  | 
171  | 0  |         }  | 
172  | 0  |     }  | 
173  |  |  | 
174  |  |     // No match found.  | 
175  | 0  |     return maybeMore;  | 
176  | 0  | }  | 
177  |  |  | 
178  | 0  | bool CombinedCurrencyMatcher::smokeTest(const StringSegment&) const { | 
179  |  |     // TODO: See constructor  | 
180  | 0  |     return true;  | 
181  |  |     //return segment.startsWith(fLeadCodePoints);  | 
182  | 0  | }  | 
183  |  |  | 
184  | 0  | UnicodeString CombinedCurrencyMatcher::toString() const { | 
185  | 0  |     return u"<CombinedCurrencyMatcher>";  | 
186  | 0  | }  | 
187  |  |  | 
188  |  |  | 
189  |  | #endif /* #if !UCONFIG_NO_FORMATTING */  |