/src/icu/source/i18n/measunit_extra.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2020 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  |  | 
4  |  | // Extra functions for MeasureUnit not needed for all clients.  | 
5  |  | // Separate .o file so that it can be removed for modularity.  | 
6  |  |  | 
7  |  | #include "unicode/utypes.h"  | 
8  |  |  | 
9  |  | #if !UCONFIG_NO_FORMATTING  | 
10  |  |  | 
11  |  | // Allow implicit conversion from char16_t* to UnicodeString for this file:  | 
12  |  | // Helpful in toString methods and elsewhere.  | 
13  |  | #define UNISTR_FROM_STRING_EXPLICIT  | 
14  |  |  | 
15  |  | #include "charstr.h"  | 
16  |  | #include "cmemory.h"  | 
17  |  | #include "cstring.h"  | 
18  |  | #include "measunit_impl.h"  | 
19  |  | #include "resource.h"  | 
20  |  | #include "uarrsort.h"  | 
21  |  | #include "uassert.h"  | 
22  |  | #include "ucln_in.h"  | 
23  |  | #include "umutex.h"  | 
24  |  | #include "unicode/bytestrie.h"  | 
25  |  | #include "unicode/bytestriebuilder.h"  | 
26  |  | #include "unicode/localpointer.h"  | 
27  |  | #include "unicode/measunit.h"  | 
28  |  | #include "unicode/stringpiece.h"  | 
29  |  | #include "unicode/stringtriebuilder.h"  | 
30  |  | #include "unicode/ures.h"  | 
31  |  | #include "unicode/ustringtrie.h"  | 
32  |  | #include "uresimp.h"  | 
33  |  | #include "util.h"  | 
34  |  | #include <cstdlib>  | 
35  |  |  | 
36  |  | U_NAMESPACE_BEGIN  | 
37  |  |  | 
38  |  |  | 
39  |  | namespace { | 
40  |  |  | 
41  |  | // TODO: Propose a new error code for this?  | 
42  |  | constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;  | 
43  |  |  | 
44  |  | // Trie value offset for SI or binary prefixes. This is big enough to ensure we only  | 
45  |  | // insert positive integers into the trie.  | 
46  |  | constexpr int32_t kPrefixOffset = 64;  | 
47  |  | static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0,  | 
48  |  |               "kPrefixOffset is too small for minimum UMeasurePrefix value");  | 
49  |  | static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0,  | 
50  |  |               "kPrefixOffset is too small for minimum UMeasurePrefix value");  | 
51  |  |  | 
52  |  | // Trie value offset for compound parts, e.g. "-per-", "-", "-and-".  | 
53  |  | constexpr int32_t kCompoundPartOffset = 128;  | 
54  |  | static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN,  | 
55  |  |               "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");  | 
56  |  | static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI,  | 
57  |  |               "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");  | 
58  |  |  | 
59  |  | enum CompoundPart { | 
60  |  |     // Represents "-per-"  | 
61  |  |     COMPOUND_PART_PER = kCompoundPartOffset,  | 
62  |  |     // Represents "-"  | 
63  |  |     COMPOUND_PART_TIMES,  | 
64  |  |     // Represents "-and-"  | 
65  |  |     COMPOUND_PART_AND,  | 
66  |  | };  | 
67  |  |  | 
68  |  | // Trie value offset for "per-".  | 
69  |  | constexpr int32_t kInitialCompoundPartOffset = 192;  | 
70  |  |  | 
71  |  | enum InitialCompoundPart { | 
72  |  |     // Represents "per-", the only compound part that can appear at the start of  | 
73  |  |     // an identifier.  | 
74  |  |     INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,  | 
75  |  | };  | 
76  |  |  | 
77  |  | // Trie value offset for powers like "square-", "cubic-", "pow2-" etc.  | 
78  |  | constexpr int32_t kPowerPartOffset = 256;  | 
79  |  |  | 
80  |  | enum PowerPart { | 
81  |  |     POWER_PART_P2 = kPowerPartOffset + 2,  | 
82  |  |     POWER_PART_P3,  | 
83  |  |     POWER_PART_P4,  | 
84  |  |     POWER_PART_P5,  | 
85  |  |     POWER_PART_P6,  | 
86  |  |     POWER_PART_P7,  | 
87  |  |     POWER_PART_P8,  | 
88  |  |     POWER_PART_P9,  | 
89  |  |     POWER_PART_P10,  | 
90  |  |     POWER_PART_P11,  | 
91  |  |     POWER_PART_P12,  | 
92  |  |     POWER_PART_P13,  | 
93  |  |     POWER_PART_P14,  | 
94  |  |     POWER_PART_P15,  | 
95  |  | };  | 
96  |  |  | 
97  |  | // Trie value offset for simple units, e.g. "gram", "nautical-mile",  | 
98  |  | // "fluid-ounce-imperial".  | 
99  |  | constexpr int32_t kSimpleUnitOffset = 512;  | 
100  |  |  | 
101  |  | const struct UnitPrefixStrings { | 
102  |  |     const char* const string;  | 
103  |  |     UMeasurePrefix value;  | 
104  |  | } gUnitPrefixStrings[] = { | 
105  |  |     // SI prefixes  | 
106  |  |     { "yotta", UMEASURE_PREFIX_YOTTA }, | 
107  |  |     { "zetta", UMEASURE_PREFIX_ZETTA }, | 
108  |  |     { "exa", UMEASURE_PREFIX_EXA }, | 
109  |  |     { "peta", UMEASURE_PREFIX_PETA }, | 
110  |  |     { "tera", UMEASURE_PREFIX_TERA }, | 
111  |  |     { "giga", UMEASURE_PREFIX_GIGA }, | 
112  |  |     { "mega", UMEASURE_PREFIX_MEGA }, | 
113  |  |     { "kilo", UMEASURE_PREFIX_KILO }, | 
114  |  |     { "hecto", UMEASURE_PREFIX_HECTO }, | 
115  |  |     { "deka", UMEASURE_PREFIX_DEKA }, | 
116  |  |     { "deci", UMEASURE_PREFIX_DECI }, | 
117  |  |     { "centi", UMEASURE_PREFIX_CENTI }, | 
118  |  |     { "milli", UMEASURE_PREFIX_MILLI }, | 
119  |  |     { "micro", UMEASURE_PREFIX_MICRO }, | 
120  |  |     { "nano", UMEASURE_PREFIX_NANO }, | 
121  |  |     { "pico", UMEASURE_PREFIX_PICO }, | 
122  |  |     { "femto", UMEASURE_PREFIX_FEMTO }, | 
123  |  |     { "atto", UMEASURE_PREFIX_ATTO }, | 
124  |  |     { "zepto", UMEASURE_PREFIX_ZEPTO }, | 
125  |  |     { "yocto", UMEASURE_PREFIX_YOCTO }, | 
126  |  |     // Binary prefixes  | 
127  |  |     { "yobi", UMEASURE_PREFIX_YOBI }, | 
128  |  |     { "zebi", UMEASURE_PREFIX_ZEBI }, | 
129  |  |     { "exbi", UMEASURE_PREFIX_EXBI }, | 
130  |  |     { "pebi", UMEASURE_PREFIX_PEBI }, | 
131  |  |     { "tebi", UMEASURE_PREFIX_TEBI }, | 
132  |  |     { "gibi", UMEASURE_PREFIX_GIBI }, | 
133  |  |     { "mebi", UMEASURE_PREFIX_MEBI }, | 
134  |  |     { "kibi", UMEASURE_PREFIX_KIBI }, | 
135  |  | };  | 
136  |  |  | 
137  |  | /**  | 
138  |  |  * A ResourceSink that collects simple unit identifiers from the keys of the  | 
139  |  |  * convertUnits table into an array, and adds these values to a TrieBuilder,  | 
140  |  |  * with associated values being their index into this array plus a specified  | 
141  |  |  * offset.  | 
142  |  |  *  | 
143  |  |  * Example code:  | 
144  |  |  *  | 
145  |  |  *     UErrorCode status = U_ZERO_ERROR;  | 
146  |  |  *     BytesTrieBuilder b(status);  | 
147  |  |  *     int32_t ARR_SIZE = 200;  | 
148  |  |  *     const char *unitIdentifiers[ARR_SIZE];  | 
149  |  |  *     int32_t *unitCategories[ARR_SIZE];  | 
150  |  |  *     SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers,  | 
151  |  |  *                                              unitCategories, ARR_SIZE, b, kTrieValueOffset);  | 
152  |  |  *     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));  | 
153  |  |  *     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);  | 
154  |  |  */  | 
155  |  | class SimpleUnitIdentifiersSink : public icu::ResourceSink { | 
156  |  |   public:  | 
157  |  |     /**  | 
158  |  |      * Constructor.  | 
159  |  |      * @param quantitiesTrieData The data for constructing a quantitiesTrie,  | 
160  |  |      *     which maps from a simple unit identifier to an index into the  | 
161  |  |      *     gCategories array.  | 
162  |  |      * @param out Array of char* to which pointers to the simple unit  | 
163  |  |      *     identifiers will be saved. (Does not take ownership.)  | 
164  |  |      * @param outCategories Array of int32_t to which category indexes will be  | 
165  |  |      *     saved: this corresponds to simple unit IDs saved to `out`, mapping  | 
166  |  |      *     from the ID to the value produced by the quantitiesTrie (which is an  | 
167  |  |      *     index into the gCategories array).  | 
168  |  |      * @param outSize The size of `out` and `outCategories`.  | 
169  |  |      * @param trieBuilder The trie builder to which the simple unit identifier  | 
170  |  |      *     should be added. The trie builder must outlive this resource sink.  | 
171  |  |      * @param trieValueOffset This is added to the index of the identifier in  | 
172  |  |      *     the `out` array, before adding to `trieBuilder` as the value  | 
173  |  |      *     associated with the identifier.  | 
174  |  |      */  | 
175  |  |     explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out,  | 
176  |  |                                        int32_t *outCategories, int32_t outSize,  | 
177  |  |                                        BytesTrieBuilder &trieBuilder, int32_t trieValueOffset)  | 
178  | 0  |         : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder),  | 
179  | 0  |           trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {} | 
180  |  |  | 
181  |  |     /**  | 
182  |  |      * Adds the table keys found in value to the output vector.  | 
183  |  |      * @param key The key of the resource passed to `value`: the second  | 
184  |  |      *     parameter of the ures_getAllItemsWithFallback() call.  | 
185  |  |      * @param value Should be a ResourceTable value, if  | 
186  |  |      *     ures_getAllItemsWithFallback() was called correctly for this sink.  | 
187  |  |      * @param noFallback Ignored.  | 
188  |  |      * @param status The standard ICU error code output parameter.  | 
189  |  |      */  | 
190  | 0  |     void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) { | 
191  | 0  |         ResourceTable table = value.getTable(status);  | 
192  | 0  |         if (U_FAILURE(status)) return;  | 
193  |  |  | 
194  | 0  |         if (outIndex + table.getSize() > outSize) { | 
195  | 0  |             status = U_INDEX_OUTOFBOUNDS_ERROR;  | 
196  | 0  |             return;  | 
197  | 0  |         }  | 
198  |  |  | 
199  | 0  |         BytesTrie quantitiesTrie(quantitiesTrieData.data());  | 
200  |  |  | 
201  |  |         // Collect keys from the table resource.  | 
202  | 0  |         const char *simpleUnitID;  | 
203  | 0  |         for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) { | 
204  | 0  |             U_ASSERT(i < table.getSize());  | 
205  | 0  |             U_ASSERT(outIndex < outSize);  | 
206  | 0  |             if (uprv_strcmp(simpleUnitID, "kilogram") == 0) { | 
207  |  |                 // For parsing, we use "gram", the prefixless metric mass unit. We  | 
208  |  |                 // thus ignore the SI Base Unit of Mass: it exists due to being the  | 
209  |  |                 // mass conversion target unit, but not needed for MeasureUnit  | 
210  |  |                 // parsing.  | 
211  | 0  |                 continue;  | 
212  | 0  |             }  | 
213  | 0  |             outArray[outIndex] = simpleUnitID;  | 
214  | 0  |             trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status);  | 
215  |  |  | 
216  |  |             // Find the base target unit for this simple unit  | 
217  | 0  |             ResourceTable table = value.getTable(status);  | 
218  | 0  |             if (U_FAILURE(status)) { return; } | 
219  | 0  |             if (!table.findValue("target", value)) { | 
220  | 0  |                 status = U_INVALID_FORMAT_ERROR;  | 
221  | 0  |                 break;  | 
222  | 0  |             }  | 
223  | 0  |             int32_t len;  | 
224  | 0  |             const UChar* uTarget = value.getString(len, status);  | 
225  | 0  |             CharString target;  | 
226  | 0  |             target.appendInvariantChars(uTarget, len, status);  | 
227  | 0  |             if (U_FAILURE(status)) { return; } | 
228  | 0  |             quantitiesTrie.reset();  | 
229  | 0  |             UStringTrieResult result = quantitiesTrie.next(target.data(), target.length());  | 
230  | 0  |             if (!USTRINGTRIE_HAS_VALUE(result)) { | 
231  | 0  |                 status = U_INVALID_FORMAT_ERROR;  | 
232  | 0  |                 break;  | 
233  | 0  |             }  | 
234  | 0  |             outCategories[outIndex] = quantitiesTrie.getValue();  | 
235  |  | 
  | 
236  | 0  |             outIndex++;  | 
237  | 0  |         }  | 
238  | 0  |     }  | 
239  |  |  | 
240  |  |   private:  | 
241  |  |     const char **outArray;  | 
242  |  |     int32_t *outCategories;  | 
243  |  |     int32_t outSize;  | 
244  |  |     BytesTrieBuilder &trieBuilder;  | 
245  |  |     int32_t trieValueOffset;  | 
246  |  |  | 
247  |  |     StringPiece quantitiesTrieData;  | 
248  |  |  | 
249  |  |     int32_t outIndex;  | 
250  |  | };  | 
251  |  |  | 
252  |  | /**  | 
253  |  |  * A ResourceSink that collects information from `unitQuantities` in the `units`  | 
254  |  |  * resource to provide key->value lookups from base unit to category, as well as  | 
255  |  |  * preserving ordering information for these categories. See `units.txt`.  | 
256  |  |  *  | 
257  |  |  * For example: "kilogram" -> "mass", "meter-per-second" -> "speed".  | 
258  |  |  *  | 
259  |  |  * In C++ unitQuantity values are collected in order into a UChar* array, while  | 
260  |  |  * unitQuantity keys are added added to a TrieBuilder, with associated values  | 
261  |  |  * being the index into the aforementioned UChar* array.  | 
262  |  |  */  | 
263  |  | class CategoriesSink : public icu::ResourceSink { | 
264  |  |   public:  | 
265  |  |     /**  | 
266  |  |      * Constructor.  | 
267  |  |      * @param out Array of UChar* to which unitQuantity values will be saved.  | 
268  |  |      *     The pointers returned  not owned: they point directly at the resource  | 
269  |  |      *     strings in static memory.  | 
270  |  |      * @param outSize The size of the `out` array.  | 
271  |  |      * @param trieBuilder The trie builder to which the keys (base units) of  | 
272  |  |      *     each unitQuantity will be added, each with value being the offset  | 
273  |  |      *     into `out`.  | 
274  |  |      */  | 
275  |  |     explicit CategoriesSink(const UChar **out, int32_t &outSize, BytesTrieBuilder &trieBuilder)  | 
276  | 0  |         : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {} | 
277  |  |  | 
278  | 0  |     void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) { | 
279  | 0  |         ResourceArray array = value.getArray(status);  | 
280  | 0  |         if (U_FAILURE(status)) { | 
281  | 0  |             return;  | 
282  | 0  |         }  | 
283  |  |  | 
284  | 0  |         if (outIndex + array.getSize() > outSize) { | 
285  | 0  |             status = U_INDEX_OUTOFBOUNDS_ERROR;  | 
286  | 0  |             return;  | 
287  | 0  |         }  | 
288  |  |  | 
289  | 0  |         for (int32_t i = 0; array.getValue(i, value); ++i) { | 
290  | 0  |             U_ASSERT(outIndex < outSize);  | 
291  | 0  |             ResourceTable table = value.getTable(status);  | 
292  | 0  |             if (U_FAILURE(status)) { | 
293  | 0  |                 return;  | 
294  | 0  |             }  | 
295  | 0  |             if (table.getSize() != 1) { | 
296  | 0  |                 status = U_INVALID_FORMAT_ERROR;  | 
297  | 0  |                 return;  | 
298  | 0  |             }  | 
299  | 0  |             const char *key;  | 
300  | 0  |             table.getKeyAndValue(0, key, value);  | 
301  | 0  |             int32_t uTmpLen;  | 
302  | 0  |             outQuantitiesArray[outIndex] = value.getString(uTmpLen, status);  | 
303  | 0  |             trieBuilder.add(key, outIndex, status);  | 
304  | 0  |             outIndex++;  | 
305  | 0  |         }  | 
306  | 0  |     }  | 
307  |  |  | 
308  |  |   private:  | 
309  |  |     const UChar **outQuantitiesArray;  | 
310  |  |     int32_t &outSize;  | 
311  |  |     BytesTrieBuilder &trieBuilder;  | 
312  |  |  | 
313  |  |     int32_t outIndex;  | 
314  |  | };  | 
315  |  |  | 
316  |  | icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER;  | 
317  |  |  | 
318  |  | // Array of simple unit IDs.  | 
319  |  | //  | 
320  |  | // The array memory itself is owned by this pointer, but the individual char* in  | 
321  |  | // that array point at static memory. (Note that these char* are also returned  | 
322  |  | // by SingleUnitImpl::getSimpleUnitID().)  | 
323  |  | const char **gSimpleUnits = nullptr;  | 
324  |  |  | 
325  |  | // Maps from the value associated with each simple unit ID to an index into the  | 
326  |  | // gCategories array.  | 
327  |  | int32_t *gSimpleUnitCategories = nullptr;  | 
328  |  |  | 
329  |  | char *gSerializedUnitExtrasStemTrie = nullptr;  | 
330  |  |  | 
331  |  | // Array of UChar* pointing at the unit categories (aka "quantities", aka  | 
332  |  | // "types"), as found in the `unitQuantities` resource. The array memory itself  | 
333  |  | // is owned by this pointer, but the individual UChar* in that array point at  | 
334  |  | // static memory.  | 
335  |  | const UChar **gCategories = nullptr;  | 
336  |  | // Number of items in `gCategories`.  | 
337  |  | int32_t gCategoriesCount = 0;  | 
338  |  | // TODO: rather save an index into gCategories?  | 
339  |  | const char *kConsumption = "consumption";  | 
340  |  | size_t kConsumptionLen = strlen("consumption"); | 
341  |  | // Serialized BytesTrie for mapping from base units to indices into gCategories.  | 
342  |  | char *gSerializedUnitCategoriesTrie = nullptr;  | 
343  |  |  | 
344  | 0  | UBool U_CALLCONV cleanupUnitExtras() { | 
345  | 0  |     uprv_free(gSerializedUnitCategoriesTrie);  | 
346  | 0  |     gSerializedUnitCategoriesTrie = nullptr;  | 
347  | 0  |     uprv_free(gCategories);  | 
348  | 0  |     gCategories = nullptr;  | 
349  | 0  |     uprv_free(gSerializedUnitExtrasStemTrie);  | 
350  | 0  |     gSerializedUnitExtrasStemTrie = nullptr;  | 
351  | 0  |     uprv_free(gSimpleUnitCategories);  | 
352  | 0  |     gSimpleUnitCategories = nullptr;  | 
353  | 0  |     uprv_free(gSimpleUnits);  | 
354  | 0  |     gSimpleUnits = nullptr;  | 
355  | 0  |     gUnitExtrasInitOnce.reset();  | 
356  | 0  |     return TRUE;  | 
357  | 0  | }  | 
358  |  |  | 
359  | 0  | void U_CALLCONV initUnitExtras(UErrorCode& status) { | 
360  | 0  |     ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);  | 
361  | 0  |     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));  | 
362  |  |  | 
363  |  |     // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories.  | 
364  | 0  |     const char *CATEGORY_TABLE_NAME = "unitQuantities";  | 
365  | 0  |     LocalUResourceBundlePointer unitQuantities(  | 
366  | 0  |         ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status));  | 
367  | 0  |     if (U_FAILURE(status)) { return; } | 
368  | 0  |     gCategoriesCount = unitQuantities.getAlias()->fSize;  | 
369  | 0  |     size_t quantitiesMallocSize = sizeof(UChar *) * gCategoriesCount;  | 
370  | 0  |     gCategories = static_cast<const UChar **>(uprv_malloc(quantitiesMallocSize));  | 
371  | 0  |     if (gCategories == nullptr) { | 
372  | 0  |         status = U_MEMORY_ALLOCATION_ERROR;  | 
373  | 0  |         return;  | 
374  | 0  |     }  | 
375  | 0  |     uprv_memset(gCategories, 0, quantitiesMallocSize);  | 
376  | 0  |     BytesTrieBuilder quantitiesBuilder(status);  | 
377  | 0  |     CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder);  | 
378  | 0  |     ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status);  | 
379  | 0  |     StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);  | 
380  | 0  |     if (U_FAILURE(status)) { return; } | 
381  |  |     // Copy the result into the global constant pointer  | 
382  | 0  |     size_t numBytesQuantities = resultQuantities.length();  | 
383  | 0  |     gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities));  | 
384  | 0  |     if (gSerializedUnitCategoriesTrie == nullptr) { | 
385  | 0  |         status = U_MEMORY_ALLOCATION_ERROR;  | 
386  | 0  |         return;  | 
387  | 0  |     }  | 
388  | 0  |     uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities);  | 
389  |  |  | 
390  |  |     // Build the BytesTrie that Parser needs for parsing unit identifiers.  | 
391  |  | 
  | 
392  | 0  |     BytesTrieBuilder b(status);  | 
393  | 0  |     if (U_FAILURE(status)) { return; } | 
394  |  |  | 
395  |  |     // Add SI and binary prefixes  | 
396  | 0  |     for (const auto& unitPrefixInfo : gUnitPrefixStrings) { | 
397  | 0  |         b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status);  | 
398  | 0  |     }  | 
399  | 0  |     if (U_FAILURE(status)) { return; } | 
400  |  |  | 
401  |  |     // Add syntax parts (compound, power prefixes)  | 
402  | 0  |     b.add("-per-", COMPOUND_PART_PER, status); | 
403  | 0  |     b.add("-", COMPOUND_PART_TIMES, status); | 
404  | 0  |     b.add("-and-", COMPOUND_PART_AND, status); | 
405  | 0  |     b.add("per-", INITIAL_COMPOUND_PART_PER, status); | 
406  | 0  |     b.add("square-", POWER_PART_P2, status); | 
407  | 0  |     b.add("cubic-", POWER_PART_P3, status); | 
408  | 0  |     b.add("pow2-", POWER_PART_P2, status); | 
409  | 0  |     b.add("pow3-", POWER_PART_P3, status); | 
410  | 0  |     b.add("pow4-", POWER_PART_P4, status); | 
411  | 0  |     b.add("pow5-", POWER_PART_P5, status); | 
412  | 0  |     b.add("pow6-", POWER_PART_P6, status); | 
413  | 0  |     b.add("pow7-", POWER_PART_P7, status); | 
414  | 0  |     b.add("pow8-", POWER_PART_P8, status); | 
415  | 0  |     b.add("pow9-", POWER_PART_P9, status); | 
416  | 0  |     b.add("pow10-", POWER_PART_P10, status); | 
417  | 0  |     b.add("pow11-", POWER_PART_P11, status); | 
418  | 0  |     b.add("pow12-", POWER_PART_P12, status); | 
419  | 0  |     b.add("pow13-", POWER_PART_P13, status); | 
420  | 0  |     b.add("pow14-", POWER_PART_P14, status); | 
421  | 0  |     b.add("pow15-", POWER_PART_P15, status); | 
422  | 0  |     if (U_FAILURE(status)) { return; } | 
423  |  |  | 
424  |  |     // Add sanctioned simple units by offset: simple units all have entries in  | 
425  |  |     // units/convertUnits resources.  | 
426  | 0  |     LocalUResourceBundlePointer convertUnits(  | 
427  | 0  |         ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status));  | 
428  | 0  |     if (U_FAILURE(status)) { return; } | 
429  |  |  | 
430  |  |     // Allocate enough space: with identifierSink below skipping kilogram, we're  | 
431  |  |     // probably allocating one more than needed.  | 
432  | 0  |     int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;  | 
433  | 0  |     int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;  | 
434  | 0  |     gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));  | 
435  | 0  |     if (gSimpleUnits == nullptr) { | 
436  | 0  |         status = U_MEMORY_ALLOCATION_ERROR;  | 
437  | 0  |         return;  | 
438  | 0  |     }  | 
439  | 0  |     uprv_memset(gSimpleUnits, 0, arrayMallocSize);  | 
440  | 0  |     arrayMallocSize = sizeof(int32_t) * simpleUnitsCount;  | 
441  | 0  |     gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize));  | 
442  | 0  |     if (gSimpleUnitCategories == nullptr) { | 
443  | 0  |         status = U_MEMORY_ALLOCATION_ERROR;  | 
444  | 0  |         return;  | 
445  | 0  |     }  | 
446  | 0  |     uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize);  | 
447  |  |  | 
448  |  |     // Populate gSimpleUnits and build the associated trie.  | 
449  | 0  |     SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories,  | 
450  | 0  |                                              simpleUnitsCount, b, kSimpleUnitOffset);  | 
451  | 0  |     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);  | 
452  |  |  | 
453  |  |     // Build the CharsTrie  | 
454  |  |     // TODO: Use SLOW or FAST here?  | 
455  | 0  |     StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);  | 
456  | 0  |     if (U_FAILURE(status)) { return; } | 
457  |  |  | 
458  |  |     // Copy the result into the global constant pointer  | 
459  | 0  |     size_t numBytes = result.length();  | 
460  | 0  |     gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes));  | 
461  | 0  |     if (gSerializedUnitExtrasStemTrie == nullptr) { | 
462  | 0  |         status = U_MEMORY_ALLOCATION_ERROR;  | 
463  | 0  |         return;  | 
464  | 0  |     }  | 
465  | 0  |     uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes);  | 
466  | 0  | }  | 
467  |  |  | 
468  |  | class Token { | 
469  |  | public:  | 
470  | 0  |     Token(int32_t match) : fMatch(match) {} | 
471  |  |  | 
472  |  |     enum Type { | 
473  |  |         TYPE_UNDEFINED,  | 
474  |  |         TYPE_PREFIX,  | 
475  |  |         // Token type for "-per-", "-", and "-and-".  | 
476  |  |         TYPE_COMPOUND_PART,  | 
477  |  |         // Token type for "per-".  | 
478  |  |         TYPE_INITIAL_COMPOUND_PART,  | 
479  |  |         TYPE_POWER_PART,  | 
480  |  |         TYPE_SIMPLE_UNIT,  | 
481  |  |     };  | 
482  |  |  | 
483  |  |     // Calling getType() is invalid, resulting in an assertion failure, if Token  | 
484  |  |     // value isn't positive.  | 
485  | 0  |     Type getType() const { | 
486  | 0  |         U_ASSERT(fMatch > 0);  | 
487  | 0  |         if (fMatch < kCompoundPartOffset) { | 
488  | 0  |             return TYPE_PREFIX;  | 
489  | 0  |         }  | 
490  | 0  |         if (fMatch < kInitialCompoundPartOffset) { | 
491  | 0  |             return TYPE_COMPOUND_PART;  | 
492  | 0  |         }  | 
493  | 0  |         if (fMatch < kPowerPartOffset) { | 
494  | 0  |             return TYPE_INITIAL_COMPOUND_PART;  | 
495  | 0  |         }  | 
496  | 0  |         if (fMatch < kSimpleUnitOffset) { | 
497  | 0  |             return TYPE_POWER_PART;  | 
498  | 0  |         }  | 
499  | 0  |         return TYPE_SIMPLE_UNIT;  | 
500  | 0  |     }  | 
501  |  |  | 
502  | 0  |     UMeasurePrefix getUnitPrefix() const { | 
503  | 0  |         U_ASSERT(getType() == TYPE_PREFIX);  | 
504  | 0  |         return static_cast<UMeasurePrefix>(fMatch - kPrefixOffset);  | 
505  | 0  |     }  | 
506  |  |  | 
507  |  |     // Valid only for tokens with type TYPE_COMPOUND_PART.  | 
508  | 0  |     int32_t getMatch() const { | 
509  | 0  |         U_ASSERT(getType() == TYPE_COMPOUND_PART);  | 
510  | 0  |         return fMatch;  | 
511  | 0  |     }  | 
512  |  |  | 
513  | 0  |     int32_t getInitialCompoundPart() const { | 
514  | 0  |         // Even if there is only one InitialCompoundPart value, we have this  | 
515  | 0  |         // function for the simplicity of code consistency.  | 
516  | 0  |         U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);  | 
517  | 0  |         // Defensive: if this assert fails, code using this function also needs  | 
518  | 0  |         // to change.  | 
519  | 0  |         U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);  | 
520  | 0  |         return fMatch;  | 
521  | 0  |     }  | 
522  |  |  | 
523  | 0  |     int8_t getPower() const { | 
524  | 0  |         U_ASSERT(getType() == TYPE_POWER_PART);  | 
525  | 0  |         return static_cast<int8_t>(fMatch - kPowerPartOffset);  | 
526  | 0  |     }  | 
527  |  |  | 
528  | 0  |     int32_t getSimpleUnitIndex() const { | 
529  | 0  |         U_ASSERT(getType() == TYPE_SIMPLE_UNIT);  | 
530  | 0  |         return fMatch - kSimpleUnitOffset;  | 
531  | 0  |     }  | 
532  |  |  | 
533  |  | private:  | 
534  |  |     int32_t fMatch;  | 
535  |  | };  | 
536  |  |  | 
537  |  | class Parser { | 
538  |  | public:  | 
539  |  |     /**  | 
540  |  |      * Factory function for parsing the given identifier.  | 
541  |  |      *  | 
542  |  |      * @param source The identifier to parse. This function does not make a copy  | 
543  |  |      * of source: the underlying string that source points at, must outlive the  | 
544  |  |      * parser.  | 
545  |  |      * @param status ICU error code.  | 
546  |  |      */  | 
547  | 0  |     static Parser from(StringPiece source, UErrorCode& status) { | 
548  | 0  |         if (U_FAILURE(status)) { | 
549  | 0  |             return Parser();  | 
550  | 0  |         }  | 
551  | 0  |         umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);  | 
552  | 0  |         if (U_FAILURE(status)) { | 
553  | 0  |             return Parser();  | 
554  | 0  |         }  | 
555  | 0  |         return Parser(source);  | 
556  | 0  |     }  | 
557  |  |  | 
558  | 0  |     MeasureUnitImpl parse(UErrorCode& status) { | 
559  | 0  |         MeasureUnitImpl result;  | 
560  |  | 
  | 
561  | 0  |         if (U_FAILURE(status)) { | 
562  | 0  |             return result;  | 
563  | 0  |         }  | 
564  | 0  |         if (fSource.empty()) { | 
565  |  |             // The dimenionless unit: nothing to parse. leave result as is.  | 
566  | 0  |             return result;  | 
567  | 0  |         }  | 
568  |  |  | 
569  | 0  |         while (hasNext()) { | 
570  | 0  |             bool sawAnd = false;  | 
571  |  | 
  | 
572  | 0  |             SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status);  | 
573  | 0  |             if (U_FAILURE(status)) { | 
574  | 0  |                 return result;  | 
575  | 0  |             }  | 
576  |  |  | 
577  | 0  |             bool added = result.appendSingleUnit(singleUnit, status);  | 
578  | 0  |             if (U_FAILURE(status)) { | 
579  | 0  |                 return result;  | 
580  | 0  |             }  | 
581  |  |  | 
582  | 0  |             if (sawAnd && !added) { | 
583  |  |                 // Two similar units are not allowed in a mixed unit.  | 
584  | 0  |                 status = kUnitIdentifierSyntaxError;  | 
585  | 0  |                 return result;  | 
586  | 0  |             }  | 
587  |  |  | 
588  | 0  |             if (result.singleUnits.length() >= 2) { | 
589  |  |                 // nextSingleUnit fails appropriately for "per" and "and" in the  | 
590  |  |                 // same identifier. It doesn't fail for other compound units  | 
591  |  |                 // (COMPOUND_PART_TIMES). Consequently we take care of that  | 
592  |  |                 // here.  | 
593  | 0  |                 UMeasureUnitComplexity complexity =  | 
594  | 0  |                     sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;  | 
595  | 0  |                 if (result.singleUnits.length() == 2) { | 
596  |  |                     // After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND`  | 
597  | 0  |                     U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND);  | 
598  | 0  |                     result.complexity = complexity;  | 
599  | 0  |                 } else if (result.complexity != complexity) { | 
600  |  |                     // Can't have mixed compound units  | 
601  | 0  |                     status = kUnitIdentifierSyntaxError;  | 
602  | 0  |                     return result;  | 
603  | 0  |                 }  | 
604  | 0  |             }  | 
605  | 0  |         }  | 
606  |  |  | 
607  | 0  |         return result;  | 
608  | 0  |     }  | 
609  |  |  | 
610  |  | private:  | 
611  |  |     // Tracks parser progress: the offset into fSource.  | 
612  |  |     int32_t fIndex = 0;  | 
613  |  |  | 
614  |  |     // Since we're not owning this memory, whatever is passed to the constructor  | 
615  |  |     // should live longer than this Parser - and the parser shouldn't return any  | 
616  |  |     // references to that string.  | 
617  |  |     StringPiece fSource;  | 
618  |  |     BytesTrie fTrie;  | 
619  |  |  | 
620  |  |     // Set to true when we've seen a "-per-" or a "per-", after which all units  | 
621  |  |     // are in the denominator. Until we find an "-and-", at which point the  | 
622  |  |     // identifier is invalid pending TODO(CLDR-13700).  | 
623  |  |     bool fAfterPer = false;  | 
624  |  |  | 
625  | 0  |     Parser() : fSource(""), fTrie(u"") {} | 
626  |  |  | 
627  |  |     Parser(StringPiece source)  | 
628  | 0  |         : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {} | 
629  |  |  | 
630  | 0  |     inline bool hasNext() const { | 
631  | 0  |         return fIndex < fSource.length();  | 
632  | 0  |     }  | 
633  |  |  | 
634  |  |     // Returns the next Token parsed from fSource, advancing fIndex to the end  | 
635  |  |     // of that token in fSource. In case of U_FAILURE(status), the token  | 
636  |  |     // returned will cause an abort if getType() is called on it.  | 
637  | 0  |     Token nextToken(UErrorCode& status) { | 
638  | 0  |         fTrie.reset();  | 
639  | 0  |         int32_t match = -1;  | 
640  |  |         // Saves the position in the fSource string for the end of the most  | 
641  |  |         // recent matching token.  | 
642  | 0  |         int32_t previ = -1;  | 
643  |  |         // Find the longest token that matches a value in the trie:  | 
644  | 0  |         while (fIndex < fSource.length()) { | 
645  | 0  |             auto result = fTrie.next(fSource.data()[fIndex++]);  | 
646  | 0  |             if (result == USTRINGTRIE_NO_MATCH) { | 
647  | 0  |                 break;  | 
648  | 0  |             } else if (result == USTRINGTRIE_NO_VALUE) { | 
649  | 0  |                 continue;  | 
650  | 0  |             }  | 
651  | 0  |             U_ASSERT(USTRINGTRIE_HAS_VALUE(result));  | 
652  | 0  |             match = fTrie.getValue();  | 
653  | 0  |             previ = fIndex;  | 
654  | 0  |             if (result == USTRINGTRIE_FINAL_VALUE) { | 
655  | 0  |                 break;  | 
656  | 0  |             }  | 
657  | 0  |             U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);  | 
658  |  |             // continue;  | 
659  | 0  |         }  | 
660  |  | 
  | 
661  | 0  |         if (match < 0) { | 
662  | 0  |             status = kUnitIdentifierSyntaxError;  | 
663  | 0  |         } else { | 
664  | 0  |             fIndex = previ;  | 
665  | 0  |         }  | 
666  | 0  |         return Token(match);  | 
667  | 0  |     }  | 
668  |  |  | 
669  |  |     /**  | 
670  |  |      * Returns the next "single unit" via result.  | 
671  |  |      *  | 
672  |  |      * If a "-per-" was parsed, the result will have appropriate negative  | 
673  |  |      * dimensionality.  | 
674  |  |      *  | 
675  |  |      * Returns an error if we parse both compound units and "-and-", since mixed  | 
676  |  |      * compound units are not yet supported - TODO(CLDR-13700).  | 
677  |  |      *  | 
678  |  |      * @param result Will be overwritten by the result, if status shows success.  | 
679  |  |      * @param sawAnd If an "-and-" was parsed prior to finding the "single  | 
680  |  |      * unit", sawAnd is set to true. If not, it is left as is.  | 
681  |  |      * @param status ICU error code.  | 
682  |  |      */  | 
683  | 0  |     SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) { | 
684  | 0  |         SingleUnitImpl result;  | 
685  | 0  |         if (U_FAILURE(status)) { | 
686  | 0  |             return result;  | 
687  | 0  |         }  | 
688  |  |  | 
689  |  |         // state:  | 
690  |  |         // 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit)  | 
691  |  |         // 1 = power token seen (will not accept another power token)  | 
692  |  |         // 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token)  | 
693  | 0  |         int32_t state = 0;  | 
694  |  | 
  | 
695  | 0  |         bool atStart = fIndex == 0;  | 
696  | 0  |         Token token = nextToken(status);  | 
697  | 0  |         if (U_FAILURE(status)) { | 
698  | 0  |             return result;  | 
699  | 0  |         }  | 
700  |  |  | 
701  | 0  |         if (atStart) { | 
702  |  |             // Identifiers optionally start with "per-".  | 
703  | 0  |             if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) { | 
704  | 0  |                 U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);  | 
705  | 0  |                 fAfterPer = true;  | 
706  | 0  |                 result.dimensionality = -1;  | 
707  |  | 
  | 
708  | 0  |                 token = nextToken(status);  | 
709  | 0  |                 if (U_FAILURE(status)) { | 
710  | 0  |                     return result;  | 
711  | 0  |                 }  | 
712  | 0  |             }  | 
713  | 0  |         } else { | 
714  |  |             // All other SingleUnit's are separated from previous SingleUnit's  | 
715  |  |             // via a compound part:  | 
716  | 0  |             if (token.getType() != Token::TYPE_COMPOUND_PART) { | 
717  | 0  |                 status = kUnitIdentifierSyntaxError;  | 
718  | 0  |                 return result;  | 
719  | 0  |             }  | 
720  |  |  | 
721  | 0  |             switch (token.getMatch()) { | 
722  | 0  |             case COMPOUND_PART_PER:  | 
723  | 0  |                 if (sawAnd) { | 
724  |  |                     // Mixed compound units not yet supported,  | 
725  |  |                     // TODO(CLDR-13700).  | 
726  | 0  |                     status = kUnitIdentifierSyntaxError;  | 
727  | 0  |                     return result;  | 
728  | 0  |                 }  | 
729  | 0  |                 fAfterPer = true;  | 
730  | 0  |                 result.dimensionality = -1;  | 
731  | 0  |                 break;  | 
732  |  |  | 
733  | 0  |             case COMPOUND_PART_TIMES:  | 
734  | 0  |                 if (fAfterPer) { | 
735  | 0  |                     result.dimensionality = -1;  | 
736  | 0  |                 }  | 
737  | 0  |                 break;  | 
738  |  |  | 
739  | 0  |             case COMPOUND_PART_AND:  | 
740  | 0  |                 if (fAfterPer) { | 
741  |  |                     // Can't start with "-and-", and mixed compound units  | 
742  |  |                     // not yet supported, TODO(CLDR-13700).  | 
743  | 0  |                     status = kUnitIdentifierSyntaxError;  | 
744  | 0  |                     return result;  | 
745  | 0  |                 }  | 
746  | 0  |                 sawAnd = true;  | 
747  | 0  |                 break;  | 
748  | 0  |             }  | 
749  |  |  | 
750  | 0  |             token = nextToken(status);  | 
751  | 0  |             if (U_FAILURE(status)) { | 
752  | 0  |                 return result;  | 
753  | 0  |             }  | 
754  | 0  |         }  | 
755  |  |  | 
756  |  |         // Read tokens until we have a complete SingleUnit or we reach the end.  | 
757  | 0  |         while (true) { | 
758  | 0  |             switch (token.getType()) { | 
759  | 0  |                 case Token::TYPE_POWER_PART:  | 
760  | 0  |                     if (state > 0) { | 
761  | 0  |                         status = kUnitIdentifierSyntaxError;  | 
762  | 0  |                         return result;  | 
763  | 0  |                     }  | 
764  | 0  |                     result.dimensionality *= token.getPower();  | 
765  | 0  |                     state = 1;  | 
766  | 0  |                     break;  | 
767  |  |  | 
768  | 0  |                 case Token::TYPE_PREFIX:  | 
769  | 0  |                     if (state > 1) { | 
770  | 0  |                         status = kUnitIdentifierSyntaxError;  | 
771  | 0  |                         return result;  | 
772  | 0  |                     }  | 
773  | 0  |                     result.unitPrefix = token.getUnitPrefix();  | 
774  | 0  |                     state = 2;  | 
775  | 0  |                     break;  | 
776  |  |  | 
777  | 0  |                 case Token::TYPE_SIMPLE_UNIT:  | 
778  | 0  |                     result.index = token.getSimpleUnitIndex();  | 
779  | 0  |                     return result;  | 
780  |  |  | 
781  | 0  |                 default:  | 
782  | 0  |                     status = kUnitIdentifierSyntaxError;  | 
783  | 0  |                     return result;  | 
784  | 0  |             }  | 
785  |  |  | 
786  | 0  |             if (!hasNext()) { | 
787  |  |                 // We ran out of tokens before finding a complete single unit.  | 
788  | 0  |                 status = kUnitIdentifierSyntaxError;  | 
789  | 0  |                 return result;  | 
790  | 0  |             }  | 
791  | 0  |             token = nextToken(status);  | 
792  | 0  |             if (U_FAILURE(status)) { | 
793  | 0  |                 return result;  | 
794  | 0  |             }  | 
795  | 0  |         }  | 
796  |  |  | 
797  | 0  |         return result;  | 
798  | 0  |     }  | 
799  |  | };  | 
800  |  |  | 
801  |  | // Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray.  | 
802  |  | int32_t U_CALLCONV  | 
803  | 0  | compareSingleUnits(const void* /*context*/, const void* left, const void* right) { | 
804  | 0  |     auto realLeft = static_cast<const SingleUnitImpl* const*>(left);  | 
805  | 0  |     auto realRight = static_cast<const SingleUnitImpl* const*>(right);  | 
806  | 0  |     return (*realLeft)->compareTo(**realRight);  | 
807  | 0  | }  | 
808  |  |  | 
809  |  | // Returns an index into the gCategories array, for the "unitQuantity" (aka  | 
810  |  | // "type" or "category") associated with the given base unit identifier. Returns  | 
811  |  | // -1 on failure, together with U_UNSUPPORTED_ERROR.  | 
812  | 0  | int32_t getUnitCategoryIndex(StringPiece baseUnitIdentifier, UErrorCode &status) { | 
813  | 0  |     umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);  | 
814  | 0  |     if (U_FAILURE(status)) { | 
815  | 0  |         return -1;  | 
816  | 0  |     }  | 
817  | 0  |     BytesTrie trie(gSerializedUnitCategoriesTrie);  | 
818  | 0  |     UStringTrieResult result = trie.next(baseUnitIdentifier.data(), baseUnitIdentifier.length());  | 
819  | 0  |     if (!USTRINGTRIE_HAS_VALUE(result)) { | 
820  | 0  |         status = U_UNSUPPORTED_ERROR;  | 
821  | 0  |         return -1;  | 
822  | 0  |     }  | 
823  | 0  |     return trie.getValue();  | 
824  | 0  | }  | 
825  |  |  | 
826  |  | } // namespace  | 
827  |  |  | 
828  |  | U_CAPI int32_t U_EXPORT2  | 
829  | 0  | umeas_getPrefixPower(UMeasurePrefix unitPrefix) { | 
830  | 0  |     if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&  | 
831  | 0  |         unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { | 
832  | 0  |         return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN;  | 
833  | 0  |     }  | 
834  | 0  |     U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&  | 
835  | 0  |              unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);  | 
836  | 0  |     return unitPrefix - UMEASURE_PREFIX_ONE;  | 
837  | 0  | }  | 
838  |  |  | 
839  |  | U_CAPI int32_t U_EXPORT2  | 
840  | 0  | umeas_getPrefixBase(UMeasurePrefix unitPrefix) { | 
841  | 0  |     if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&  | 
842  | 0  |         unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { | 
843  | 0  |         return 1024;  | 
844  | 0  |     }  | 
845  | 0  |     U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&  | 
846  | 0  |              unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);  | 
847  | 0  |     return 10;  | 
848  | 0  | }  | 
849  |  |  | 
850  | 0  | CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status) { | 
851  | 0  |     CharString result;  | 
852  | 0  |     U_ASSERT(result.length() == 0);  | 
853  | 0  |     if (U_FAILURE(status)) { | 
854  | 0  |         return result;  | 
855  | 0  |     }  | 
856  | 0  |     UErrorCode localStatus = U_ZERO_ERROR;  | 
857  | 0  |     int32_t idx = getUnitCategoryIndex(baseUnitIdentifier, localStatus);  | 
858  | 0  |     if (U_FAILURE(localStatus)) { | 
859  |  |         // TODO(icu-units#130): support inverting any unit, with correct  | 
860  |  |         // fallback logic: inversion and fallback may depend on presence or  | 
861  |  |         // absence of a usage for that category.  | 
862  | 0  |         if (uprv_strcmp(baseUnitIdentifier.data(), "meter-per-cubic-meter") == 0) { | 
863  | 0  |             result.append(kConsumption, (int32_t)kConsumptionLen, status);  | 
864  | 0  |             return result;  | 
865  | 0  |         }  | 
866  | 0  |         status = U_INVALID_FORMAT_ERROR;  | 
867  | 0  |         return result;  | 
868  | 0  |     }  | 
869  | 0  |     if (idx < 0 || idx >= gCategoriesCount) { | 
870  | 0  |         status = U_INVALID_FORMAT_ERROR;  | 
871  | 0  |         return result;  | 
872  | 0  |     }  | 
873  | 0  |     result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status);  | 
874  | 0  |     return result;  | 
875  | 0  | }  | 
876  |  |  | 
877  |  | // In ICU4J, this is MeasureUnit.getSingleUnitImpl().  | 
878  | 0  | SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) { | 
879  | 0  |     MeasureUnitImpl temp;  | 
880  | 0  |     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);  | 
881  | 0  |     if (U_FAILURE(status)) { | 
882  | 0  |         return {}; | 
883  | 0  |     }  | 
884  | 0  |     if (impl.singleUnits.length() == 0) { | 
885  | 0  |         return {}; | 
886  | 0  |     }  | 
887  | 0  |     if (impl.singleUnits.length() == 1) { | 
888  | 0  |         return *impl.singleUnits[0];  | 
889  | 0  |     }  | 
890  | 0  |     status = U_ILLEGAL_ARGUMENT_ERROR;  | 
891  | 0  |     return {}; | 
892  | 0  | }  | 
893  |  |  | 
894  | 0  | MeasureUnit SingleUnitImpl::build(UErrorCode& status) const { | 
895  | 0  |     MeasureUnitImpl temp;  | 
896  | 0  |     temp.appendSingleUnit(*this, status);  | 
897  |  |     // TODO(icu-units#28): the MeasureUnitImpl::build() method uses  | 
898  |  |     // findBySubtype, which is relatively slow.  | 
899  |  |     // - At the time of loading the simple unit IDs, we could also save a  | 
900  |  |     //   mapping to the builtin MeasureUnit type and subtype they correspond to.  | 
901  |  |     // - This method could then check dimensionality and index, and if both are  | 
902  |  |     //   1, directly return MeasureUnit instances very quickly.  | 
903  | 0  |     return std::move(temp).build(status);  | 
904  | 0  | }  | 
905  |  |  | 
906  | 0  | const char *SingleUnitImpl::getSimpleUnitID() const { | 
907  | 0  |     return gSimpleUnits[index];  | 
908  | 0  | }  | 
909  |  |  | 
910  | 0  | void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const { | 
911  | 0  |     int32_t absPower = std::abs(this->dimensionality);  | 
912  |  | 
  | 
913  | 0  |     U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units";  | 
914  |  |       | 
915  | 0  |     if (absPower == 1) { | 
916  |  |         // no-op  | 
917  | 0  |     } else if (absPower == 2) { | 
918  | 0  |         result.append(StringPiece("square-"), status); | 
919  | 0  |     } else if (absPower == 3) { | 
920  | 0  |         result.append(StringPiece("cubic-"), status); | 
921  | 0  |     } else if (absPower <= 15) { | 
922  | 0  |         result.append(StringPiece("pow"), status); | 
923  | 0  |         result.appendNumber(absPower, status);  | 
924  | 0  |         result.append(StringPiece("-"), status); | 
925  | 0  |     } else { | 
926  | 0  |         status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error  | 
927  | 0  |         return;  | 
928  | 0  |     }  | 
929  |  |  | 
930  | 0  |     if (U_FAILURE(status)) { | 
931  | 0  |         return;  | 
932  | 0  |     }  | 
933  |  |  | 
934  | 0  |     if (this->unitPrefix != UMEASURE_PREFIX_ONE) { | 
935  | 0  |         bool found = false;  | 
936  | 0  |         for (const auto &unitPrefixInfo : gUnitPrefixStrings) { | 
937  |  |             // TODO: consider using binary search? If we do this, add a unit  | 
938  |  |             // test to ensure gUnitPrefixStrings is sorted?  | 
939  | 0  |             if (unitPrefixInfo.value == this->unitPrefix) { | 
940  | 0  |                 result.append(unitPrefixInfo.string, status);  | 
941  | 0  |                 found = true;  | 
942  | 0  |                 break;  | 
943  | 0  |             }  | 
944  | 0  |         }  | 
945  | 0  |         if (!found) { | 
946  | 0  |             status = U_UNSUPPORTED_ERROR;  | 
947  | 0  |             return;  | 
948  | 0  |         }  | 
949  | 0  |     }  | 
950  |  |  | 
951  | 0  |     result.append(StringPiece(this->getSimpleUnitID()), status);  | 
952  | 0  | }  | 
953  |  |  | 
954  | 0  | int32_t SingleUnitImpl::getUnitCategoryIndex() const { | 
955  | 0  |     return gSimpleUnitCategories[index];  | 
956  | 0  | }  | 
957  |  |  | 
958  | 0  | MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) { | 
959  | 0  |     this->appendSingleUnit(singleUnit, status);  | 
960  | 0  | }  | 
961  |  |  | 
962  | 0  | MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) { | 
963  | 0  |     return Parser::from(identifier, status).parse(status);  | 
964  | 0  | }  | 
965  |  |  | 
966  |  | const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(  | 
967  | 0  |         const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) { | 
968  | 0  |     if (measureUnit.fImpl) { | 
969  | 0  |         return *measureUnit.fImpl;  | 
970  | 0  |     } else { | 
971  | 0  |         memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);  | 
972  | 0  |         return memory;  | 
973  | 0  |     }  | 
974  | 0  | }  | 
975  |  |  | 
976  |  | MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(  | 
977  | 0  |         const MeasureUnit& measureUnit, UErrorCode& status) { | 
978  | 0  |     if (measureUnit.fImpl) { | 
979  | 0  |         return measureUnit.fImpl->copy(status);  | 
980  | 0  |     } else { | 
981  | 0  |         return Parser::from(measureUnit.getIdentifier(), status).parse(status);  | 
982  | 0  |     }  | 
983  | 0  | }  | 
984  |  |  | 
985  | 0  | void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) { | 
986  | 0  |     identifier.clear();  | 
987  | 0  |     for (int32_t i = 0; i < singleUnits.length(); i++) { | 
988  | 0  |         singleUnits[i]->dimensionality *= -1;  | 
989  | 0  |     }  | 
990  | 0  | }  | 
991  |  |  | 
992  | 0  | bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) { | 
993  | 0  |     identifier.clear();  | 
994  |  | 
  | 
995  | 0  |     if (singleUnit.isDimensionless()) { | 
996  |  |         // Do not append dimensionless units.  | 
997  | 0  |         return false;  | 
998  | 0  |     }  | 
999  |  |  | 
1000  |  |     // Find a similar unit that already exists, to attempt to coalesce  | 
1001  | 0  |     SingleUnitImpl *oldUnit = nullptr;  | 
1002  | 0  |     for (int32_t i = 0; i < this->singleUnits.length(); i++) { | 
1003  | 0  |         auto *candidate = this->singleUnits[i];  | 
1004  | 0  |         if (candidate->isCompatibleWith(singleUnit)) { | 
1005  | 0  |             oldUnit = candidate;  | 
1006  | 0  |         }  | 
1007  | 0  |     }  | 
1008  |  | 
  | 
1009  | 0  |     if (oldUnit) { | 
1010  |  |         // Both dimensionalities will be positive, or both will be negative, by  | 
1011  |  |         // virtue of isCompatibleWith().  | 
1012  | 0  |         oldUnit->dimensionality += singleUnit.dimensionality;  | 
1013  |  | 
  | 
1014  | 0  |         return false;  | 
1015  | 0  |     }  | 
1016  |  |  | 
1017  |  |     // Add a copy of singleUnit  | 
1018  |  |     // NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of  singleUnit.  | 
1019  | 0  |     this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit);  | 
1020  | 0  |     if (U_FAILURE(status)) { | 
1021  | 0  |         return false;  | 
1022  | 0  |     }  | 
1023  |  |  | 
1024  |  |     // If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits`  | 
1025  |  |     // contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND`  | 
1026  | 0  |     if (this->singleUnits.length() > 1 &&  | 
1027  | 0  |         this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) { | 
1028  | 0  |         this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND;  | 
1029  | 0  |     }  | 
1030  |  | 
  | 
1031  | 0  |     return true;  | 
1032  | 0  | }  | 
1033  |  |  | 
1034  |  | MaybeStackVector<MeasureUnitImplWithIndex>  | 
1035  | 0  | MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const { | 
1036  | 0  |     MaybeStackVector<MeasureUnitImplWithIndex> result;  | 
1037  |  | 
  | 
1038  | 0  |     if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { | 
1039  | 0  |         result.emplaceBackAndCheckErrorCode(status, 0, *this, status);  | 
1040  | 0  |         return result;  | 
1041  | 0  |     }  | 
1042  |  |  | 
1043  | 0  |     for (int32_t i = 0; i < singleUnits.length(); ++i) { | 
1044  | 0  |         result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status);  | 
1045  | 0  |         if (U_FAILURE(status)) { | 
1046  | 0  |             return result;  | 
1047  | 0  |         }  | 
1048  | 0  |     }  | 
1049  |  |  | 
1050  | 0  |     return result;  | 
1051  | 0  | }  | 
1052  |  |  | 
1053  |  | /**  | 
1054  |  |  * Normalize a MeasureUnitImpl and generate the identifier string in place.  | 
1055  |  |  */  | 
1056  | 0  | void MeasureUnitImpl::serialize(UErrorCode &status) { | 
1057  | 0  |     if (U_FAILURE(status)) { | 
1058  | 0  |         return;  | 
1059  | 0  |     }  | 
1060  |  |  | 
1061  | 0  |     if (this->singleUnits.length() == 0) { | 
1062  |  |         // Dimensionless, constructed by the default constructor.  | 
1063  | 0  |         return;  | 
1064  | 0  |     }  | 
1065  |  |  | 
1066  | 0  |     if (this->complexity == UMEASURE_UNIT_COMPOUND) { | 
1067  |  |         // Note: don't sort a MIXED unit  | 
1068  | 0  |         uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(),  | 
1069  | 0  |                        sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status);  | 
1070  | 0  |         if (U_FAILURE(status)) { | 
1071  | 0  |             return;  | 
1072  | 0  |         }  | 
1073  | 0  |     }  | 
1074  |  |  | 
1075  | 0  |     CharString result;  | 
1076  | 0  |     bool beforePer = true;  | 
1077  | 0  |     bool firstTimeNegativeDimension = false;  | 
1078  | 0  |     for (int32_t i = 0; i < this->singleUnits.length(); i++) { | 
1079  | 0  |         if (beforePer && (*this->singleUnits[i]).dimensionality < 0) { | 
1080  | 0  |             beforePer = false;  | 
1081  | 0  |             firstTimeNegativeDimension = true;  | 
1082  | 0  |         } else if ((*this->singleUnits[i]).dimensionality < 0) { | 
1083  | 0  |             firstTimeNegativeDimension = false;  | 
1084  | 0  |         }  | 
1085  |  | 
  | 
1086  | 0  |         if (U_FAILURE(status)) { | 
1087  | 0  |             return;  | 
1088  | 0  |         }  | 
1089  |  |  | 
1090  | 0  |         if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { | 
1091  | 0  |             if (result.length() != 0) { | 
1092  | 0  |                 result.append(StringPiece("-and-"), status); | 
1093  | 0  |             }  | 
1094  | 0  |         } else { | 
1095  | 0  |             if (firstTimeNegativeDimension) { | 
1096  | 0  |                 if (result.length() == 0) { | 
1097  | 0  |                     result.append(StringPiece("per-"), status); | 
1098  | 0  |                 } else { | 
1099  | 0  |                     result.append(StringPiece("-per-"), status); | 
1100  | 0  |                 }  | 
1101  | 0  |             } else { | 
1102  | 0  |                 if (result.length() != 0) { | 
1103  | 0  |                     result.append(StringPiece("-"), status); | 
1104  | 0  |                 }  | 
1105  | 0  |             }  | 
1106  | 0  |         }  | 
1107  |  | 
  | 
1108  | 0  |         this->singleUnits[i]->appendNeutralIdentifier(result, status);  | 
1109  | 0  |     }  | 
1110  |  |  | 
1111  | 0  |     this->identifier = CharString(result, status);  | 
1112  | 0  | }  | 
1113  |  |  | 
1114  | 0  | MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { | 
1115  | 0  |     this->serialize(status);  | 
1116  | 0  |     return MeasureUnit(std::move(*this));  | 
1117  | 0  | }  | 
1118  |  |  | 
1119  | 0  | MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { | 
1120  | 0  |     return Parser::from(identifier, status).parse(status).build(status);  | 
1121  | 0  | }  | 
1122  |  |  | 
1123  | 0  | UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { | 
1124  | 0  |     MeasureUnitImpl temp;  | 
1125  | 0  |     return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;  | 
1126  | 0  | }  | 
1127  |  |  | 
1128  | 0  | UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const { | 
1129  | 0  |     return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix;  | 
1130  | 0  | }  | 
1131  |  |  | 
1132  | 0  | MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const { | 
1133  | 0  |     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);  | 
1134  | 0  |     singleUnit.unitPrefix = prefix;  | 
1135  | 0  |     return singleUnit.build(status);  | 
1136  | 0  | }  | 
1137  |  |  | 
1138  | 0  | int32_t MeasureUnit::getDimensionality(UErrorCode& status) const { | 
1139  | 0  |     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);  | 
1140  | 0  |     if (U_FAILURE(status)) { return 0; } | 
1141  | 0  |     if (singleUnit.isDimensionless()) { | 
1142  | 0  |         return 0;  | 
1143  | 0  |     }  | 
1144  | 0  |     return singleUnit.dimensionality;  | 
1145  | 0  | }  | 
1146  |  |  | 
1147  | 0  | MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const { | 
1148  | 0  |     SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);  | 
1149  | 0  |     singleUnit.dimensionality = dimensionality;  | 
1150  | 0  |     return singleUnit.build(status);  | 
1151  | 0  | }  | 
1152  |  |  | 
1153  | 0  | MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { | 
1154  | 0  |     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);  | 
1155  | 0  |     impl.takeReciprocal(status);  | 
1156  | 0  |     return std::move(impl).build(status);  | 
1157  | 0  | }  | 
1158  |  |  | 
1159  | 0  | MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { | 
1160  | 0  |     MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);  | 
1161  | 0  |     MeasureUnitImpl temp;  | 
1162  | 0  |     const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);  | 
1163  | 0  |     if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) { | 
1164  | 0  |         status = U_ILLEGAL_ARGUMENT_ERROR;  | 
1165  | 0  |         return {}; | 
1166  | 0  |     }  | 
1167  | 0  |     for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) { | 
1168  | 0  |         impl.appendSingleUnit(*otherImpl.singleUnits[i], status);  | 
1169  | 0  |     }  | 
1170  | 0  |     if (impl.singleUnits.length() > 1) { | 
1171  | 0  |         impl.complexity = UMEASURE_UNIT_COMPOUND;  | 
1172  | 0  |     }  | 
1173  | 0  |     return std::move(impl).build(status);  | 
1174  | 0  | }  | 
1175  |  |  | 
1176  | 0  | LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const { | 
1177  | 0  |     MeasureUnitImpl temp;  | 
1178  | 0  |     const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);  | 
1179  | 0  |     outCount = impl.singleUnits.length();  | 
1180  | 0  |     MeasureUnit* arr = new MeasureUnit[outCount];  | 
1181  | 0  |     if (arr == nullptr) { | 
1182  | 0  |         status = U_MEMORY_ALLOCATION_ERROR;  | 
1183  | 0  |         return LocalArray<MeasureUnit>();  | 
1184  | 0  |     }  | 
1185  | 0  |     for (int32_t i = 0; i < outCount; i++) { | 
1186  | 0  |         arr[i] = impl.singleUnits[i]->build(status);  | 
1187  | 0  |     }  | 
1188  | 0  |     return LocalArray<MeasureUnit>(arr, status);  | 
1189  | 0  | }  | 
1190  |  |  | 
1191  |  |  | 
1192  |  | U_NAMESPACE_END  | 
1193  |  |  | 
1194  |  | #endif /* !UNCONFIG_NO_FORMATTING */  |