/src/icu/source/i18n/number_compact.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2017 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | #include "unicode/utypes.h"  | 
5  |  |  | 
6  |  | #if !UCONFIG_NO_FORMATTING  | 
7  |  |  | 
8  |  | #include "unicode/ustring.h"  | 
9  |  | #include "unicode/ures.h"  | 
10  |  | #include "cstring.h"  | 
11  |  | #include "charstr.h"  | 
12  |  | #include "resource.h"  | 
13  |  | #include "number_compact.h"  | 
14  |  | #include "number_microprops.h"  | 
15  |  | #include "uresimp.h"  | 
16  |  |  | 
17  |  | using namespace icu;  | 
18  |  | using namespace icu::number;  | 
19  |  | using namespace icu::number::impl;  | 
20  |  |  | 
21  |  | namespace { | 
22  |  |  | 
23  |  | // A dummy object used when a "0" compact decimal entry is encountered. This is necessary  | 
24  |  | // in order to prevent falling back to root. Object equality ("==") is intended. | 
25  |  | const UChar *USE_FALLBACK = u"<USE FALLBACK>";  | 
26  |  |  | 
27  |  | /** Produces a string like "NumberElements/latn/patternsShort/decimalFormat". */  | 
28  |  | void getResourceBundleKey(const char *nsName, CompactStyle compactStyle, CompactType compactType,  | 
29  | 0  |                                  CharString &sb, UErrorCode &status) { | 
30  | 0  |     sb.clear();  | 
31  | 0  |     sb.append("NumberElements/", status); | 
32  | 0  |     sb.append(nsName, status);  | 
33  | 0  |     sb.append(compactStyle == CompactStyle::UNUM_SHORT ? "/patternsShort" : "/patternsLong", status);  | 
34  | 0  |     sb.append(compactType == CompactType::TYPE_DECIMAL ? "/decimalFormat" : "/currencyFormat", status);  | 
35  | 0  | }  | 
36  |  |  | 
37  | 0  | int32_t getIndex(int32_t magnitude, StandardPlural::Form plural) { | 
38  | 0  |     return magnitude * StandardPlural::COUNT + plural;  | 
39  | 0  | }  | 
40  |  |  | 
41  | 0  | int32_t countZeros(const UChar *patternString, int32_t patternLength) { | 
42  |  |     // NOTE: This strategy for computing the number of zeros is a hack for efficiency.  | 
43  |  |     // It could break if there are any 0s that aren't part of the main pattern.  | 
44  | 0  |     int32_t numZeros = 0;  | 
45  | 0  |     for (int32_t i = 0; i < patternLength; i++) { | 
46  | 0  |         if (patternString[i] == u'0') { | 
47  | 0  |             numZeros++;  | 
48  | 0  |         } else if (numZeros > 0) { | 
49  | 0  |             break; // zeros should always be contiguous  | 
50  | 0  |         }  | 
51  | 0  |     }  | 
52  | 0  |     return numZeros;  | 
53  | 0  | }  | 
54  |  |  | 
55  |  | } // namespace  | 
56  |  |  | 
57  |  | // NOTE: patterns and multipliers both get zero-initialized.  | 
58  | 0  | CompactData::CompactData() : patterns(), multipliers(), largestMagnitude(0), isEmpty(true) { | 
59  | 0  | }  | 
60  |  |  | 
61  |  | void CompactData::populate(const Locale &locale, const char *nsName, CompactStyle compactStyle,  | 
62  | 0  |                            CompactType compactType, UErrorCode &status) { | 
63  | 0  |     CompactDataSink sink(*this);  | 
64  | 0  |     LocalUResourceBundlePointer rb(ures_open(nullptr, locale.getName(), &status));  | 
65  | 0  |     if (U_FAILURE(status)) { return; } | 
66  |  |  | 
67  | 0  |     bool nsIsLatn = strcmp(nsName, "latn") == 0;  | 
68  | 0  |     bool compactIsShort = compactStyle == CompactStyle::UNUM_SHORT;  | 
69  |  |  | 
70  |  |     // Fall back to latn numbering system and/or short compact style.  | 
71  | 0  |     CharString resourceKey;  | 
72  | 0  |     getResourceBundleKey(nsName, compactStyle, compactType, resourceKey, status);  | 
73  | 0  |     UErrorCode localStatus = U_ZERO_ERROR;  | 
74  | 0  |     ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);  | 
75  | 0  |     if (isEmpty && !nsIsLatn) { | 
76  | 0  |         getResourceBundleKey("latn", compactStyle, compactType, resourceKey, status); | 
77  | 0  |         localStatus = U_ZERO_ERROR;  | 
78  | 0  |         ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);  | 
79  | 0  |     }  | 
80  | 0  |     if (isEmpty && !compactIsShort) { | 
81  | 0  |         getResourceBundleKey(nsName, CompactStyle::UNUM_SHORT, compactType, resourceKey, status);  | 
82  | 0  |         localStatus = U_ZERO_ERROR;  | 
83  | 0  |         ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);  | 
84  | 0  |     }  | 
85  | 0  |     if (isEmpty && !nsIsLatn && !compactIsShort) { | 
86  | 0  |         getResourceBundleKey("latn", CompactStyle::UNUM_SHORT, compactType, resourceKey, status); | 
87  | 0  |         localStatus = U_ZERO_ERROR;  | 
88  | 0  |         ures_getAllItemsWithFallback(rb.getAlias(), resourceKey.data(), sink, localStatus);  | 
89  | 0  |     }  | 
90  |  |  | 
91  |  |     // The last fallback should be guaranteed to return data.  | 
92  | 0  |     if (isEmpty) { | 
93  | 0  |         status = U_INTERNAL_PROGRAM_ERROR;  | 
94  | 0  |     }  | 
95  | 0  | }  | 
96  |  |  | 
97  | 0  | int32_t CompactData::getMultiplier(int32_t magnitude) const { | 
98  | 0  |     if (magnitude < 0) { | 
99  | 0  |         return 0;  | 
100  | 0  |     }  | 
101  | 0  |     if (magnitude > largestMagnitude) { | 
102  | 0  |         magnitude = largestMagnitude;  | 
103  | 0  |     }  | 
104  | 0  |     return multipliers[magnitude];  | 
105  | 0  | }  | 
106  |  |  | 
107  |  | const UChar *CompactData::getPattern(  | 
108  |  |         int32_t magnitude,  | 
109  |  |         const PluralRules *rules,  | 
110  | 0  |         const DecimalQuantity &dq) const { | 
111  | 0  |     if (magnitude < 0) { | 
112  | 0  |         return nullptr;  | 
113  | 0  |     }  | 
114  | 0  |     if (magnitude > largestMagnitude) { | 
115  | 0  |         magnitude = largestMagnitude;  | 
116  | 0  |     }  | 
117  | 0  |     const UChar *patternString = nullptr;  | 
118  | 0  |     if (dq.hasIntegerValue()) { | 
119  | 0  |         int64_t i = dq.toLong(true);  | 
120  | 0  |         if (i == 0) { | 
121  | 0  |             patternString = patterns[getIndex(magnitude, StandardPlural::Form::EQ_0)];  | 
122  | 0  |         } else if (i == 1) { | 
123  | 0  |             patternString = patterns[getIndex(magnitude, StandardPlural::Form::EQ_1)];  | 
124  | 0  |         }  | 
125  | 0  |         if (patternString != nullptr) { | 
126  | 0  |             return patternString;  | 
127  | 0  |         }  | 
128  | 0  |     }  | 
129  | 0  |     StandardPlural::Form plural = utils::getStandardPlural(rules, dq);  | 
130  | 0  |     patternString = patterns[getIndex(magnitude, plural)];  | 
131  | 0  |     if (patternString == nullptr && plural != StandardPlural::OTHER) { | 
132  |  |         // Fall back to "other" plural variant  | 
133  | 0  |         patternString = patterns[getIndex(magnitude, StandardPlural::OTHER)];  | 
134  | 0  |     }  | 
135  | 0  |     if (patternString == USE_FALLBACK) { // == is intended | 
136  |  |         // Return null if USE_FALLBACK is present  | 
137  | 0  |         patternString = nullptr;  | 
138  | 0  |     }  | 
139  | 0  |     return patternString;  | 
140  | 0  | }  | 
141  |  |  | 
142  | 0  | void CompactData::getUniquePatterns(UVector &output, UErrorCode &status) const { | 
143  | 0  |     U_ASSERT(output.isEmpty());  | 
144  |  |     // NOTE: In C++, this is done more manually with a UVector.  | 
145  |  |     // In Java, we can take advantage of JDK HashSet.  | 
146  | 0  |     for (auto pattern : patterns) { | 
147  | 0  |         if (pattern == nullptr || pattern == USE_FALLBACK) { | 
148  | 0  |             continue;  | 
149  | 0  |         }  | 
150  |  |  | 
151  |  |         // Insert pattern into the UVector if the UVector does not already contain the pattern.  | 
152  |  |         // Search the UVector from the end since identical patterns are likely to be adjacent.  | 
153  | 0  |         for (int32_t i = output.size() - 1; i >= 0; i--) { | 
154  | 0  |             if (u_strcmp(pattern, static_cast<const UChar *>(output[i])) == 0) { | 
155  | 0  |                 goto continue_outer;  | 
156  | 0  |             }  | 
157  | 0  |         }  | 
158  |  |  | 
159  |  |         // The string was not found; add it to the UVector.  | 
160  |  |         // ANDY: This requires a const_cast.  Why?  | 
161  | 0  |         output.addElementX(const_cast<UChar *>(pattern), status);  | 
162  |  | 
  | 
163  | 0  |         continue_outer:  | 
164  | 0  |         continue;  | 
165  | 0  |     }  | 
166  | 0  | }  | 
167  |  |  | 
168  |  | void CompactData::CompactDataSink::put(const char *key, ResourceValue &value, UBool /*noFallback*/,  | 
169  | 0  |                                        UErrorCode &status) { | 
170  |  |     // traverse into the table of powers of ten  | 
171  | 0  |     ResourceTable powersOfTenTable = value.getTable(status);  | 
172  | 0  |     if (U_FAILURE(status)) { return; } | 
173  | 0  |     for (int i3 = 0; powersOfTenTable.getKeyAndValue(i3, key, value); ++i3) { | 
174  |  |  | 
175  |  |         // Assumes that the keys are always of the form "10000" where the magnitude is the  | 
176  |  |         // length of the key minus one.  We expect magnitudes to be less than MAX_DIGITS.  | 
177  | 0  |         auto magnitude = static_cast<int8_t> (strlen(key) - 1);  | 
178  | 0  |         int8_t multiplier = data.multipliers[magnitude];  | 
179  | 0  |         U_ASSERT(magnitude < COMPACT_MAX_DIGITS);  | 
180  |  |  | 
181  |  |         // Iterate over the plural variants ("one", "other", etc) | 
182  | 0  |         ResourceTable pluralVariantsTable = value.getTable(status);  | 
183  | 0  |         if (U_FAILURE(status)) { return; } | 
184  | 0  |         for (int i4 = 0; pluralVariantsTable.getKeyAndValue(i4, key, value); ++i4) { | 
185  |  |             // Skip this magnitude/plural if we already have it from a child locale.  | 
186  |  |             // Note: This also skips USE_FALLBACK entries.  | 
187  | 0  |             StandardPlural::Form plural = StandardPlural::fromString(key, status);  | 
188  | 0  |             if (U_FAILURE(status)) { return; } | 
189  | 0  |             if (data.patterns[getIndex(magnitude, plural)] != nullptr) { | 
190  | 0  |                 continue;  | 
191  | 0  |             }  | 
192  |  |  | 
193  |  |             // The value "0" means that we need to use the default pattern and not fall back  | 
194  |  |             // to parent locales. Example locale where this is relevant: 'it'.  | 
195  | 0  |             int32_t patternLength;  | 
196  | 0  |             const UChar *patternString = value.getString(patternLength, status);  | 
197  | 0  |             if (U_FAILURE(status)) { return; } | 
198  | 0  |             if (u_strcmp(patternString, u"0") == 0) { | 
199  | 0  |                 patternString = USE_FALLBACK;  | 
200  | 0  |                 patternLength = 0;  | 
201  | 0  |             }  | 
202  |  |  | 
203  |  |             // Save the pattern string. We will parse it lazily.  | 
204  | 0  |             data.patterns[getIndex(magnitude, plural)] = patternString;  | 
205  |  |  | 
206  |  |             // If necessary, compute the multiplier: the difference between the magnitude  | 
207  |  |             // and the number of zeros in the pattern.  | 
208  | 0  |             if (multiplier == 0) { | 
209  | 0  |                 int32_t numZeros = countZeros(patternString, patternLength);  | 
210  | 0  |                 if (numZeros > 0) { // numZeros==0 in certain cases, like Somali "Kun" | 
211  | 0  |                     multiplier = static_cast<int8_t> (numZeros - magnitude - 1);  | 
212  | 0  |                 }  | 
213  | 0  |             }  | 
214  | 0  |         }  | 
215  |  |  | 
216  |  |         // Save the multiplier.  | 
217  | 0  |         if (data.multipliers[magnitude] == 0) { | 
218  | 0  |             data.multipliers[magnitude] = multiplier;  | 
219  | 0  |             if (magnitude > data.largestMagnitude) { | 
220  | 0  |                 data.largestMagnitude = magnitude;  | 
221  | 0  |             }  | 
222  | 0  |             data.isEmpty = false;  | 
223  | 0  |         } else { | 
224  | 0  |             U_ASSERT(data.multipliers[magnitude] == multiplier);  | 
225  | 0  |         }  | 
226  | 0  |     }  | 
227  | 0  | }  | 
228  |  |  | 
229  |  | ///////////////////////////////////////////////////////////  | 
230  |  | /// END OF CompactData.java; BEGIN CompactNotation.java ///  | 
231  |  | ///////////////////////////////////////////////////////////  | 
232  |  |  | 
233  |  | CompactHandler::CompactHandler(  | 
234  |  |         CompactStyle compactStyle,  | 
235  |  |         const Locale &locale,  | 
236  |  |         const char *nsName,  | 
237  |  |         CompactType compactType,  | 
238  |  |         const PluralRules *rules,  | 
239  |  |         MutablePatternModifier *buildReference,  | 
240  |  |         bool safe,  | 
241  |  |         const MicroPropsGenerator *parent,  | 
242  |  |         UErrorCode &status)  | 
243  | 0  |         : rules(rules), parent(parent), safe(safe) { | 
244  | 0  |     data.populate(locale, nsName, compactStyle, compactType, status);  | 
245  | 0  |     if (safe) { | 
246  |  |         // Safe code path  | 
247  | 0  |         precomputeAllModifiers(*buildReference, status);  | 
248  | 0  |     } else { | 
249  |  |         // Unsafe code path  | 
250  |  |         // Store the MutablePatternModifier reference.  | 
251  | 0  |         unsafePatternModifier = buildReference;  | 
252  | 0  |     }  | 
253  | 0  | }  | 
254  |  |  | 
255  | 0  | CompactHandler::~CompactHandler() { | 
256  | 0  |     for (int32_t i = 0; i < precomputedModsLength; i++) { | 
257  | 0  |         delete precomputedMods[i].mod;  | 
258  | 0  |     }  | 
259  | 0  | }  | 
260  |  |  | 
261  | 0  | void CompactHandler::precomputeAllModifiers(MutablePatternModifier &buildReference, UErrorCode &status) { | 
262  | 0  |     if (U_FAILURE(status)) { return; } | 
263  |  |  | 
264  |  |     // Initial capacity of 12 for 0K, 00K, 000K, ...M, ...B, and ...T  | 
265  | 0  |     UVector allPatterns(12, status);  | 
266  | 0  |     if (U_FAILURE(status)) { return; } | 
267  | 0  |     data.getUniquePatterns(allPatterns, status);  | 
268  | 0  |     if (U_FAILURE(status)) { return; } | 
269  |  |  | 
270  |  |     // C++ only: ensure that precomputedMods has room.  | 
271  | 0  |     precomputedModsLength = allPatterns.size();  | 
272  | 0  |     if (precomputedMods.getCapacity() < precomputedModsLength) { | 
273  | 0  |         precomputedMods.resize(allPatterns.size(), status);  | 
274  | 0  |         if (U_FAILURE(status)) { return; } | 
275  | 0  |     }  | 
276  |  |  | 
277  | 0  |     for (int32_t i = 0; i < precomputedModsLength; i++) { | 
278  | 0  |         auto patternString = static_cast<const UChar *>(allPatterns[i]);  | 
279  | 0  |         UnicodeString hello(patternString);  | 
280  | 0  |         CompactModInfo &info = precomputedMods[i];  | 
281  | 0  |         ParsedPatternInfo patternInfo;  | 
282  | 0  |         PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);  | 
283  | 0  |         if (U_FAILURE(status)) { return; } | 
284  | 0  |         buildReference.setPatternInfo(&patternInfo, {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD}); | 
285  | 0  |         info.mod = buildReference.createImmutable(status);  | 
286  | 0  |         if (U_FAILURE(status)) { return; } | 
287  | 0  |         info.patternString = patternString;  | 
288  | 0  |     }  | 
289  | 0  | }  | 
290  |  |  | 
291  |  | void CompactHandler::processQuantity(DecimalQuantity &quantity, MicroProps µs,  | 
292  | 0  |                                      UErrorCode &status) const { | 
293  | 0  |     parent->processQuantity(quantity, micros, status);  | 
294  | 0  |     if (U_FAILURE(status)) { return; } | 
295  |  |  | 
296  |  |     // Treat zero, NaN, and infinity as if they had magnitude 0  | 
297  | 0  |     int32_t magnitude;  | 
298  | 0  |     int32_t multiplier = 0;  | 
299  | 0  |     if (quantity.isZeroish()) { | 
300  | 0  |         magnitude = 0;  | 
301  | 0  |         micros.rounder.apply(quantity, status);  | 
302  | 0  |     } else { | 
303  |  |         // TODO: Revisit chooseMultiplierAndApply  | 
304  | 0  |         multiplier = micros.rounder.chooseMultiplierAndApply(quantity, data, status);  | 
305  | 0  |         magnitude = quantity.isZeroish() ? 0 : quantity.getMagnitude();  | 
306  | 0  |         magnitude -= multiplier;  | 
307  | 0  |     }  | 
308  |  | 
  | 
309  | 0  |     const UChar *patternString = data.getPattern(magnitude, rules, quantity);  | 
310  | 0  |     if (patternString == nullptr) { | 
311  |  |         // Use the default (non-compact) modifier.  | 
312  |  |         // No need to take any action.  | 
313  | 0  |     } else if (safe) { | 
314  |  |         // Safe code path.  | 
315  |  |         // Java uses a hash set here for O(1) lookup.  C++ uses a linear search.  | 
316  |  |         // TODO: Benchmark this and maybe change to a binary search or hash table.  | 
317  | 0  |         int32_t i = 0;  | 
318  | 0  |         for (; i < precomputedModsLength; i++) { | 
319  | 0  |             const CompactModInfo &info = precomputedMods[i];  | 
320  | 0  |             if (u_strcmp(patternString, info.patternString) == 0) { | 
321  | 0  |                 info.mod->applyToMicros(micros, quantity, status);  | 
322  | 0  |                 break;  | 
323  | 0  |             }  | 
324  | 0  |         }  | 
325  |  |         // It should be guaranteed that we found the entry.  | 
326  | 0  |         U_ASSERT(i < precomputedModsLength);  | 
327  | 0  |     } else { | 
328  |  |         // Unsafe code path.  | 
329  |  |         // Overwrite the PatternInfo in the existing modMiddle.  | 
330  |  |         // C++ Note: Use unsafePatternInfo for proper lifecycle.  | 
331  | 0  |         ParsedPatternInfo &patternInfo = const_cast<CompactHandler *>(this)->unsafePatternInfo;  | 
332  | 0  |         PatternParser::parseToPatternInfo(UnicodeString(patternString), patternInfo, status);  | 
333  | 0  |         unsafePatternModifier->setPatternInfo(  | 
334  | 0  |             &unsafePatternInfo,  | 
335  | 0  |             {UFIELD_CATEGORY_NUMBER, UNUM_COMPACT_FIELD}); | 
336  | 0  |         unsafePatternModifier->setNumberProperties(quantity.signum(), StandardPlural::Form::COUNT);  | 
337  | 0  |         micros.modMiddle = unsafePatternModifier;  | 
338  | 0  |     }  | 
339  |  |  | 
340  |  |     // Change the exponent only after we select appropriate plural form  | 
341  |  |     // for formatting purposes so that we preserve expected formatted  | 
342  |  |     // string behavior.  | 
343  | 0  |     quantity.adjustExponent(-1 * multiplier);  | 
344  |  |  | 
345  |  |     // We already performed rounding. Do not perform it again.  | 
346  | 0  |     micros.rounder = RoundingImpl::passThrough();  | 
347  | 0  | }  | 
348  |  |  | 
349  |  | #endif /* #if !UCONFIG_NO_FORMATTING */  |