/src/icu/icu4c/source/i18n/measunit_extra.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2020 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | // Extra functions for MeasureUnit not needed for all clients. |
5 | | // Separate .o file so that it can be removed for modularity. |
6 | | |
7 | | #include "unicode/utypes.h" |
8 | | |
9 | | #if !UCONFIG_NO_FORMATTING |
10 | | |
11 | | // Allow implicit conversion from char16_t* to UnicodeString for this file: |
12 | | // Helpful in toString methods and elsewhere. |
13 | | #define UNISTR_FROM_STRING_EXPLICIT |
14 | | |
15 | | #include "charstr.h" |
16 | | #include "cmemory.h" |
17 | | #include "cstring.h" |
18 | | #include "measunit_impl.h" |
19 | | #include "resource.h" |
20 | | #include "uarrsort.h" |
21 | | #include "uassert.h" |
22 | | #include "ucln_in.h" |
23 | | #include "umutex.h" |
24 | | #include "unicode/bytestrie.h" |
25 | | #include "unicode/bytestriebuilder.h" |
26 | | #include "unicode/localpointer.h" |
27 | | #include "unicode/stringpiece.h" |
28 | | #include "unicode/stringtriebuilder.h" |
29 | | #include "unicode/ures.h" |
30 | | #include "unicode/ustringtrie.h" |
31 | | #include "uresimp.h" |
32 | | #include "util.h" |
33 | | #include <cstdlib> |
34 | | |
35 | | U_NAMESPACE_BEGIN |
36 | | |
37 | | |
38 | | namespace { |
39 | | |
40 | | // TODO: Propose a new error code for this? |
41 | | constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR; |
42 | | |
43 | | // Trie value offset for SI or binary prefixes. This is big enough to ensure we only |
44 | | // insert positive integers into the trie. |
45 | | constexpr int32_t kPrefixOffset = 64; |
46 | | static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0, |
47 | | "kPrefixOffset is too small for minimum UMeasurePrefix value"); |
48 | | static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0, |
49 | | "kPrefixOffset is too small for minimum UMeasurePrefix value"); |
50 | | |
51 | | // Trie value offset for compound parts, e.g. "-per-", "-", "-and-". |
52 | | constexpr int32_t kCompoundPartOffset = 128; |
53 | | static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN, |
54 | | "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens"); |
55 | | static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI, |
56 | | "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens"); |
57 | | |
58 | | enum CompoundPart { |
59 | | // Represents "-per-" |
60 | | COMPOUND_PART_PER = kCompoundPartOffset, |
61 | | // Represents "-" |
62 | | COMPOUND_PART_TIMES, |
63 | | // Represents "-and-" |
64 | | COMPOUND_PART_AND, |
65 | | }; |
66 | | |
67 | | // Trie value offset for "per-". |
68 | | constexpr int32_t kInitialCompoundPartOffset = 192; |
69 | | |
70 | | enum InitialCompoundPart { |
71 | | // Represents "per-", the only compound part that can appear at the start of |
72 | | // an identifier. |
73 | | INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset, |
74 | | }; |
75 | | |
76 | | // Trie value offset for powers like "square-", "cubic-", "pow2-" etc. |
77 | | constexpr int32_t kPowerPartOffset = 256; |
78 | | |
79 | | enum PowerPart { |
80 | | POWER_PART_P2 = kPowerPartOffset + 2, |
81 | | POWER_PART_P3, |
82 | | POWER_PART_P4, |
83 | | POWER_PART_P5, |
84 | | POWER_PART_P6, |
85 | | POWER_PART_P7, |
86 | | POWER_PART_P8, |
87 | | POWER_PART_P9, |
88 | | POWER_PART_P10, |
89 | | POWER_PART_P11, |
90 | | POWER_PART_P12, |
91 | | POWER_PART_P13, |
92 | | POWER_PART_P14, |
93 | | POWER_PART_P15, |
94 | | }; |
95 | | |
96 | | // Trie value offset for simple units, e.g. "gram", "nautical-mile", |
97 | | // "fluid-ounce-imperial". |
98 | | constexpr int32_t kSimpleUnitOffset = 512; |
99 | | |
100 | | const struct UnitPrefixStrings { |
101 | | const char* const string; |
102 | | UMeasurePrefix value; |
103 | | } gUnitPrefixStrings[] = { |
104 | | // SI prefixes |
105 | | { "yotta", UMEASURE_PREFIX_YOTTA }, |
106 | | { "zetta", UMEASURE_PREFIX_ZETTA }, |
107 | | { "exa", UMEASURE_PREFIX_EXA }, |
108 | | { "peta", UMEASURE_PREFIX_PETA }, |
109 | | { "tera", UMEASURE_PREFIX_TERA }, |
110 | | { "giga", UMEASURE_PREFIX_GIGA }, |
111 | | { "mega", UMEASURE_PREFIX_MEGA }, |
112 | | { "kilo", UMEASURE_PREFIX_KILO }, |
113 | | { "hecto", UMEASURE_PREFIX_HECTO }, |
114 | | { "deka", UMEASURE_PREFIX_DEKA }, |
115 | | { "deci", UMEASURE_PREFIX_DECI }, |
116 | | { "centi", UMEASURE_PREFIX_CENTI }, |
117 | | { "milli", UMEASURE_PREFIX_MILLI }, |
118 | | { "micro", UMEASURE_PREFIX_MICRO }, |
119 | | { "nano", UMEASURE_PREFIX_NANO }, |
120 | | { "pico", UMEASURE_PREFIX_PICO }, |
121 | | { "femto", UMEASURE_PREFIX_FEMTO }, |
122 | | { "atto", UMEASURE_PREFIX_ATTO }, |
123 | | { "zepto", UMEASURE_PREFIX_ZEPTO }, |
124 | | { "yocto", UMEASURE_PREFIX_YOCTO }, |
125 | | // Binary prefixes |
126 | | { "yobi", UMEASURE_PREFIX_YOBI }, |
127 | | { "zebi", UMEASURE_PREFIX_ZEBI }, |
128 | | { "exbi", UMEASURE_PREFIX_EXBI }, |
129 | | { "pebi", UMEASURE_PREFIX_PEBI }, |
130 | | { "tebi", UMEASURE_PREFIX_TEBI }, |
131 | | { "gibi", UMEASURE_PREFIX_GIBI }, |
132 | | { "mebi", UMEASURE_PREFIX_MEBI }, |
133 | | { "kibi", UMEASURE_PREFIX_KIBI }, |
134 | | }; |
135 | | |
136 | | /** |
137 | | * A ResourceSink that collects simple unit identifiers from the keys of the |
138 | | * convertUnits table into an array, and adds these values to a TrieBuilder, |
139 | | * with associated values being their index into this array plus a specified |
140 | | * offset. |
141 | | * |
142 | | * Example code: |
143 | | * |
144 | | * UErrorCode status = U_ZERO_ERROR; |
145 | | * BytesTrieBuilder b(status); |
146 | | * int32_t ARR_SIZE = 200; |
147 | | * const char *unitIdentifiers[ARR_SIZE]; |
148 | | * int32_t *unitCategories[ARR_SIZE]; |
149 | | * SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers, |
150 | | * unitCategories, ARR_SIZE, b, kTrieValueOffset); |
151 | | * LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status)); |
152 | | * ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status); |
153 | | */ |
154 | | class SimpleUnitIdentifiersSink : public icu::ResourceSink { |
155 | | public: |
156 | | /** |
157 | | * Constructor. |
158 | | * @param quantitiesTrieData The data for constructing a quantitiesTrie, |
159 | | * which maps from a simple unit identifier to an index into the |
160 | | * gCategories array. |
161 | | * @param out Array of char* to which pointers to the simple unit |
162 | | * identifiers will be saved. (Does not take ownership.) |
163 | | * @param outCategories Array of int32_t to which category indexes will be |
164 | | * saved: this corresponds to simple unit IDs saved to `out`, mapping |
165 | | * from the ID to the value produced by the quantitiesTrie (which is an |
166 | | * index into the gCategories array). |
167 | | * @param outSize The size of `out` and `outCategories`. |
168 | | * @param trieBuilder The trie builder to which the simple unit identifier |
169 | | * should be added. The trie builder must outlive this resource sink. |
170 | | * @param trieValueOffset This is added to the index of the identifier in |
171 | | * the `out` array, before adding to `trieBuilder` as the value |
172 | | * associated with the identifier. |
173 | | */ |
174 | | explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out, |
175 | | int32_t *outCategories, int32_t outSize, |
176 | | BytesTrieBuilder &trieBuilder, int32_t trieValueOffset) |
177 | | : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder), |
178 | 0 | trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {} |
179 | | |
180 | | /** |
181 | | * Adds the table keys found in value to the output vector. |
182 | | * @param key The key of the resource passed to `value`: the second |
183 | | * parameter of the ures_getAllItemsWithFallback() call. |
184 | | * @param value Should be a ResourceTable value, if |
185 | | * ures_getAllItemsWithFallback() was called correctly for this sink. |
186 | | * @param noFallback Ignored. |
187 | | * @param status The standard ICU error code output parameter. |
188 | | */ |
189 | 0 | void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { |
190 | 0 | ResourceTable table = value.getTable(status); |
191 | 0 | if (U_FAILURE(status)) return; |
192 | | |
193 | 0 | if (outIndex + table.getSize() > outSize) { |
194 | 0 | status = U_INDEX_OUTOFBOUNDS_ERROR; |
195 | 0 | return; |
196 | 0 | } |
197 | | |
198 | 0 | BytesTrie quantitiesTrie(quantitiesTrieData.data()); |
199 | | |
200 | | // Collect keys from the table resource. |
201 | 0 | const char *simpleUnitID; |
202 | 0 | for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) { |
203 | 0 | U_ASSERT(i < table.getSize()); |
204 | 0 | U_ASSERT(outIndex < outSize); |
205 | 0 | if (uprv_strcmp(simpleUnitID, "kilogram") == 0) { |
206 | | // For parsing, we use "gram", the prefixless metric mass unit. We |
207 | | // thus ignore the SI Base Unit of Mass: it exists due to being the |
208 | | // mass conversion target unit, but not needed for MeasureUnit |
209 | | // parsing. |
210 | 0 | continue; |
211 | 0 | } |
212 | 0 | outArray[outIndex] = simpleUnitID; |
213 | 0 | trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status); |
214 | | |
215 | | // Find the base target unit for this simple unit |
216 | 0 | ResourceTable table = value.getTable(status); |
217 | 0 | if (U_FAILURE(status)) { return; } |
218 | 0 | if (!table.findValue("target", value)) { |
219 | 0 | status = U_INVALID_FORMAT_ERROR; |
220 | 0 | break; |
221 | 0 | } |
222 | 0 | int32_t len; |
223 | 0 | const char16_t* uTarget = value.getString(len, status); |
224 | 0 | CharString target; |
225 | 0 | target.appendInvariantChars(uTarget, len, status); |
226 | 0 | if (U_FAILURE(status)) { return; } |
227 | 0 | quantitiesTrie.reset(); |
228 | 0 | UStringTrieResult result = quantitiesTrie.next(target.data(), target.length()); |
229 | 0 | if (!USTRINGTRIE_HAS_VALUE(result)) { |
230 | 0 | status = U_INVALID_FORMAT_ERROR; |
231 | 0 | break; |
232 | 0 | } |
233 | 0 | outCategories[outIndex] = quantitiesTrie.getValue(); |
234 | |
|
235 | 0 | outIndex++; |
236 | 0 | } |
237 | 0 | } |
238 | | |
239 | | private: |
240 | | const char **outArray; |
241 | | int32_t *outCategories; |
242 | | int32_t outSize; |
243 | | BytesTrieBuilder &trieBuilder; |
244 | | int32_t trieValueOffset; |
245 | | |
246 | | StringPiece quantitiesTrieData; |
247 | | |
248 | | int32_t outIndex; |
249 | | }; |
250 | | |
251 | | /** |
252 | | * A ResourceSink that collects information from `unitQuantities` in the `units` |
253 | | * resource to provide key->value lookups from base unit to category, as well as |
254 | | * preserving ordering information for these categories. See `units.txt`. |
255 | | * |
256 | | * For example: "kilogram" -> "mass", "meter-per-second" -> "speed". |
257 | | * |
258 | | * In C++ unitQuantity values are collected in order into a char16_t* array, while |
259 | | * unitQuantity keys are added added to a TrieBuilder, with associated values |
260 | | * being the index into the aforementioned char16_t* array. |
261 | | */ |
262 | | class CategoriesSink : public icu::ResourceSink { |
263 | | public: |
264 | | /** |
265 | | * Constructor. |
266 | | * @param out Array of char16_t* to which unitQuantity values will be saved. |
267 | | * The pointers returned not owned: they point directly at the resource |
268 | | * strings in static memory. |
269 | | * @param outSize The size of the `out` array. |
270 | | * @param trieBuilder The trie builder to which the keys (base units) of |
271 | | * each unitQuantity will be added, each with value being the offset |
272 | | * into `out`. |
273 | | */ |
274 | | explicit CategoriesSink(const char16_t **out, int32_t &outSize, BytesTrieBuilder &trieBuilder) |
275 | 0 | : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {} |
276 | | |
277 | 0 | void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override { |
278 | 0 | ResourceArray array = value.getArray(status); |
279 | 0 | if (U_FAILURE(status)) { |
280 | 0 | return; |
281 | 0 | } |
282 | | |
283 | 0 | if (outIndex + array.getSize() > outSize) { |
284 | 0 | status = U_INDEX_OUTOFBOUNDS_ERROR; |
285 | 0 | return; |
286 | 0 | } |
287 | | |
288 | 0 | for (int32_t i = 0; array.getValue(i, value); ++i) { |
289 | 0 | U_ASSERT(outIndex < outSize); |
290 | 0 | ResourceTable table = value.getTable(status); |
291 | 0 | if (U_FAILURE(status)) { |
292 | 0 | return; |
293 | 0 | } |
294 | 0 | if (table.getSize() != 1) { |
295 | 0 | status = U_INVALID_FORMAT_ERROR; |
296 | 0 | return; |
297 | 0 | } |
298 | 0 | const char *key; |
299 | 0 | table.getKeyAndValue(0, key, value); |
300 | 0 | int32_t uTmpLen; |
301 | 0 | outQuantitiesArray[outIndex] = value.getString(uTmpLen, status); |
302 | 0 | trieBuilder.add(key, outIndex, status); |
303 | 0 | outIndex++; |
304 | 0 | } |
305 | 0 | } |
306 | | |
307 | | private: |
308 | | const char16_t **outQuantitiesArray; |
309 | | int32_t &outSize; |
310 | | BytesTrieBuilder &trieBuilder; |
311 | | |
312 | | int32_t outIndex; |
313 | | }; |
314 | | |
315 | | icu::UInitOnce gUnitExtrasInitOnce {}; |
316 | | |
317 | | // Array of simple unit IDs. |
318 | | // |
319 | | // The array memory itself is owned by this pointer, but the individual char* in |
320 | | // that array point at static memory. (Note that these char* are also returned |
321 | | // by SingleUnitImpl::getSimpleUnitID().) |
322 | | const char **gSimpleUnits = nullptr; |
323 | | |
324 | | // Maps from the value associated with each simple unit ID to an index into the |
325 | | // gCategories array. |
326 | | int32_t *gSimpleUnitCategories = nullptr; |
327 | | |
328 | | char *gSerializedUnitExtrasStemTrie = nullptr; |
329 | | |
330 | | // Array of char16_t* pointing at the unit categories (aka "quantities", aka |
331 | | // "types"), as found in the `unitQuantities` resource. The array memory itself |
332 | | // is owned by this pointer, but the individual char16_t* in that array point at |
333 | | // static memory. |
334 | | const char16_t **gCategories = nullptr; |
335 | | // Number of items in `gCategories`. |
336 | | int32_t gCategoriesCount = 0; |
337 | | // Serialized BytesTrie for mapping from base units to indices into gCategories. |
338 | | char *gSerializedUnitCategoriesTrie = nullptr; |
339 | | |
340 | 0 | UBool U_CALLCONV cleanupUnitExtras() { |
341 | 0 | uprv_free(gSerializedUnitCategoriesTrie); |
342 | 0 | gSerializedUnitCategoriesTrie = nullptr; |
343 | 0 | uprv_free(gCategories); |
344 | 0 | gCategories = nullptr; |
345 | 0 | uprv_free(gSerializedUnitExtrasStemTrie); |
346 | 0 | gSerializedUnitExtrasStemTrie = nullptr; |
347 | 0 | uprv_free(gSimpleUnitCategories); |
348 | 0 | gSimpleUnitCategories = nullptr; |
349 | 0 | uprv_free(gSimpleUnits); |
350 | 0 | gSimpleUnits = nullptr; |
351 | 0 | gUnitExtrasInitOnce.reset(); |
352 | 0 | return true; |
353 | 0 | } |
354 | | |
355 | 0 | void U_CALLCONV initUnitExtras(UErrorCode& status) { |
356 | 0 | ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras); |
357 | 0 | LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status)); |
358 | | |
359 | | // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories. |
360 | 0 | const char *CATEGORY_TABLE_NAME = "unitQuantities"; |
361 | 0 | LocalUResourceBundlePointer unitQuantities( |
362 | 0 | ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status)); |
363 | 0 | if (U_FAILURE(status)) { return; } |
364 | 0 | gCategoriesCount = unitQuantities.getAlias()->fSize; |
365 | 0 | size_t quantitiesMallocSize = sizeof(char16_t *) * gCategoriesCount; |
366 | 0 | gCategories = static_cast<const char16_t **>(uprv_malloc(quantitiesMallocSize)); |
367 | 0 | if (gCategories == nullptr) { |
368 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
369 | 0 | return; |
370 | 0 | } |
371 | 0 | uprv_memset(gCategories, 0, quantitiesMallocSize); |
372 | 0 | BytesTrieBuilder quantitiesBuilder(status); |
373 | 0 | CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder); |
374 | 0 | ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status); |
375 | 0 | StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status); |
376 | 0 | if (U_FAILURE(status)) { return; } |
377 | | // Copy the result into the global constant pointer |
378 | 0 | size_t numBytesQuantities = resultQuantities.length(); |
379 | 0 | gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities)); |
380 | 0 | if (gSerializedUnitCategoriesTrie == nullptr) { |
381 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
382 | 0 | return; |
383 | 0 | } |
384 | 0 | uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities); |
385 | | |
386 | | // Build the BytesTrie that Parser needs for parsing unit identifiers. |
387 | |
|
388 | 0 | BytesTrieBuilder b(status); |
389 | 0 | if (U_FAILURE(status)) { return; } |
390 | | |
391 | | // Add SI and binary prefixes |
392 | 0 | for (const auto& unitPrefixInfo : gUnitPrefixStrings) { |
393 | 0 | b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status); |
394 | 0 | } |
395 | 0 | if (U_FAILURE(status)) { return; } |
396 | | |
397 | | // Add syntax parts (compound, power prefixes) |
398 | 0 | b.add("-per-", COMPOUND_PART_PER, status); |
399 | 0 | b.add("-", COMPOUND_PART_TIMES, status); |
400 | 0 | b.add("-and-", COMPOUND_PART_AND, status); |
401 | 0 | b.add("per-", INITIAL_COMPOUND_PART_PER, status); |
402 | 0 | b.add("square-", POWER_PART_P2, status); |
403 | 0 | b.add("cubic-", POWER_PART_P3, status); |
404 | 0 | b.add("pow2-", POWER_PART_P2, status); |
405 | 0 | b.add("pow3-", POWER_PART_P3, status); |
406 | 0 | b.add("pow4-", POWER_PART_P4, status); |
407 | 0 | b.add("pow5-", POWER_PART_P5, status); |
408 | 0 | b.add("pow6-", POWER_PART_P6, status); |
409 | 0 | b.add("pow7-", POWER_PART_P7, status); |
410 | 0 | b.add("pow8-", POWER_PART_P8, status); |
411 | 0 | b.add("pow9-", POWER_PART_P9, status); |
412 | 0 | b.add("pow10-", POWER_PART_P10, status); |
413 | 0 | b.add("pow11-", POWER_PART_P11, status); |
414 | 0 | b.add("pow12-", POWER_PART_P12, status); |
415 | 0 | b.add("pow13-", POWER_PART_P13, status); |
416 | 0 | b.add("pow14-", POWER_PART_P14, status); |
417 | 0 | b.add("pow15-", POWER_PART_P15, status); |
418 | 0 | if (U_FAILURE(status)) { return; } |
419 | | |
420 | | // Add sanctioned simple units by offset: simple units all have entries in |
421 | | // units/convertUnits resources. |
422 | 0 | LocalUResourceBundlePointer convertUnits( |
423 | 0 | ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status)); |
424 | 0 | if (U_FAILURE(status)) { return; } |
425 | | |
426 | | // Allocate enough space: with identifierSink below skipping kilogram, we're |
427 | | // probably allocating one more than needed. |
428 | 0 | int32_t simpleUnitsCount = convertUnits.getAlias()->fSize; |
429 | 0 | int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount; |
430 | 0 | gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize)); |
431 | 0 | if (gSimpleUnits == nullptr) { |
432 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
433 | 0 | return; |
434 | 0 | } |
435 | 0 | uprv_memset(gSimpleUnits, 0, arrayMallocSize); |
436 | 0 | arrayMallocSize = sizeof(int32_t) * simpleUnitsCount; |
437 | 0 | gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize)); |
438 | 0 | if (gSimpleUnitCategories == nullptr) { |
439 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
440 | 0 | return; |
441 | 0 | } |
442 | 0 | uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize); |
443 | | |
444 | | // Populate gSimpleUnits and build the associated trie. |
445 | 0 | SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories, |
446 | 0 | simpleUnitsCount, b, kSimpleUnitOffset); |
447 | 0 | ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status); |
448 | | |
449 | | // Build the CharsTrie |
450 | | // TODO: Use SLOW or FAST here? |
451 | 0 | StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status); |
452 | 0 | if (U_FAILURE(status)) { return; } |
453 | | |
454 | | // Copy the result into the global constant pointer |
455 | 0 | size_t numBytes = result.length(); |
456 | 0 | gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes)); |
457 | 0 | if (gSerializedUnitExtrasStemTrie == nullptr) { |
458 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
459 | 0 | return; |
460 | 0 | } |
461 | 0 | uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes); |
462 | 0 | } |
463 | | |
464 | | class Token { |
465 | | public: |
466 | 0 | Token(int32_t match) : fMatch(match) {} |
467 | | |
468 | | enum Type { |
469 | | TYPE_UNDEFINED, |
470 | | TYPE_PREFIX, |
471 | | // Token type for "-per-", "-", and "-and-". |
472 | | TYPE_COMPOUND_PART, |
473 | | // Token type for "per-". |
474 | | TYPE_INITIAL_COMPOUND_PART, |
475 | | TYPE_POWER_PART, |
476 | | TYPE_SIMPLE_UNIT, |
477 | | }; |
478 | | |
479 | | // Calling getType() is invalid, resulting in an assertion failure, if Token |
480 | | // value isn't positive. |
481 | 0 | Type getType() const { |
482 | 0 | U_ASSERT(fMatch > 0); |
483 | 0 | if (fMatch < kCompoundPartOffset) { |
484 | 0 | return TYPE_PREFIX; |
485 | 0 | } |
486 | 0 | if (fMatch < kInitialCompoundPartOffset) { |
487 | 0 | return TYPE_COMPOUND_PART; |
488 | 0 | } |
489 | 0 | if (fMatch < kPowerPartOffset) { |
490 | 0 | return TYPE_INITIAL_COMPOUND_PART; |
491 | 0 | } |
492 | 0 | if (fMatch < kSimpleUnitOffset) { |
493 | 0 | return TYPE_POWER_PART; |
494 | 0 | } |
495 | 0 | return TYPE_SIMPLE_UNIT; |
496 | 0 | } |
497 | | |
498 | 0 | UMeasurePrefix getUnitPrefix() const { |
499 | 0 | U_ASSERT(getType() == TYPE_PREFIX); |
500 | 0 | return static_cast<UMeasurePrefix>(fMatch - kPrefixOffset); |
501 | 0 | } |
502 | | |
503 | | // Valid only for tokens with type TYPE_COMPOUND_PART. |
504 | 0 | int32_t getMatch() const { |
505 | 0 | U_ASSERT(getType() == TYPE_COMPOUND_PART); |
506 | 0 | return fMatch; |
507 | 0 | } |
508 | | |
509 | 0 | int32_t getInitialCompoundPart() const { |
510 | 0 | // Even if there is only one InitialCompoundPart value, we have this |
511 | 0 | // function for the simplicity of code consistency. |
512 | 0 | U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART); |
513 | 0 | // Defensive: if this assert fails, code using this function also needs |
514 | 0 | // to change. |
515 | 0 | U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER); |
516 | 0 | return fMatch; |
517 | 0 | } |
518 | | |
519 | 0 | int8_t getPower() const { |
520 | 0 | U_ASSERT(getType() == TYPE_POWER_PART); |
521 | 0 | return static_cast<int8_t>(fMatch - kPowerPartOffset); |
522 | 0 | } |
523 | | |
524 | 0 | int32_t getSimpleUnitIndex() const { |
525 | 0 | U_ASSERT(getType() == TYPE_SIMPLE_UNIT); |
526 | 0 | return fMatch - kSimpleUnitOffset; |
527 | 0 | } |
528 | | |
529 | | private: |
530 | | int32_t fMatch; |
531 | | }; |
532 | | |
533 | | class Parser { |
534 | | public: |
535 | | /** |
536 | | * Factory function for parsing the given identifier. |
537 | | * |
538 | | * @param source The identifier to parse. This function does not make a copy |
539 | | * of source: the underlying string that source points at, must outlive the |
540 | | * parser. |
541 | | * @param status ICU error code. |
542 | | */ |
543 | 0 | static Parser from(StringPiece source, UErrorCode& status) { |
544 | 0 | if (U_FAILURE(status)) { |
545 | 0 | return Parser(); |
546 | 0 | } |
547 | 0 | umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); |
548 | 0 | if (U_FAILURE(status)) { |
549 | 0 | return Parser(); |
550 | 0 | } |
551 | 0 | return Parser(source); |
552 | 0 | } |
553 | | |
554 | 0 | MeasureUnitImpl parse(UErrorCode& status) { |
555 | 0 | MeasureUnitImpl result; |
556 | |
|
557 | 0 | if (U_FAILURE(status)) { |
558 | 0 | return result; |
559 | 0 | } |
560 | 0 | if (fSource.empty()) { |
561 | | // The dimenionless unit: nothing to parse. leave result as is. |
562 | 0 | return result; |
563 | 0 | } |
564 | | |
565 | 0 | while (hasNext()) { |
566 | 0 | bool sawAnd = false; |
567 | |
|
568 | 0 | SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status); |
569 | 0 | if (U_FAILURE(status)) { |
570 | 0 | return result; |
571 | 0 | } |
572 | | |
573 | 0 | bool added = result.appendSingleUnit(singleUnit, status); |
574 | 0 | if (U_FAILURE(status)) { |
575 | 0 | return result; |
576 | 0 | } |
577 | | |
578 | 0 | if (sawAnd && !added) { |
579 | | // Two similar units are not allowed in a mixed unit. |
580 | 0 | status = kUnitIdentifierSyntaxError; |
581 | 0 | return result; |
582 | 0 | } |
583 | | |
584 | 0 | if (result.singleUnits.length() >= 2) { |
585 | | // nextSingleUnit fails appropriately for "per" and "and" in the |
586 | | // same identifier. It doesn't fail for other compound units |
587 | | // (COMPOUND_PART_TIMES). Consequently we take care of that |
588 | | // here. |
589 | 0 | UMeasureUnitComplexity complexity = |
590 | 0 | sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND; |
591 | 0 | if (result.singleUnits.length() == 2) { |
592 | | // After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND` |
593 | 0 | U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND); |
594 | 0 | result.complexity = complexity; |
595 | 0 | } else if (result.complexity != complexity) { |
596 | | // Can't have mixed compound units |
597 | 0 | status = kUnitIdentifierSyntaxError; |
598 | 0 | return result; |
599 | 0 | } |
600 | 0 | } |
601 | 0 | } |
602 | | |
603 | 0 | return result; |
604 | 0 | } |
605 | | |
606 | | private: |
607 | | // Tracks parser progress: the offset into fSource. |
608 | | int32_t fIndex = 0; |
609 | | |
610 | | // Since we're not owning this memory, whatever is passed to the constructor |
611 | | // should live longer than this Parser - and the parser shouldn't return any |
612 | | // references to that string. |
613 | | StringPiece fSource; |
614 | | BytesTrie fTrie; |
615 | | |
616 | | // Set to true when we've seen a "-per-" or a "per-", after which all units |
617 | | // are in the denominator. Until we find an "-and-", at which point the |
618 | | // identifier is invalid pending TODO(CLDR-13701). |
619 | | bool fAfterPer = false; |
620 | | |
621 | 0 | Parser() : fSource(""), fTrie(u"") {} |
622 | | |
623 | | Parser(StringPiece source) |
624 | 0 | : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {} |
625 | | |
626 | 0 | inline bool hasNext() const { |
627 | 0 | return fIndex < fSource.length(); |
628 | 0 | } |
629 | | |
630 | | // Returns the next Token parsed from fSource, advancing fIndex to the end |
631 | | // of that token in fSource. In case of U_FAILURE(status), the token |
632 | | // returned will cause an abort if getType() is called on it. |
633 | 0 | Token nextToken(UErrorCode& status) { |
634 | 0 | fTrie.reset(); |
635 | 0 | int32_t match = -1; |
636 | | // Saves the position in the fSource string for the end of the most |
637 | | // recent matching token. |
638 | 0 | int32_t previ = -1; |
639 | | // Find the longest token that matches a value in the trie: |
640 | 0 | while (fIndex < fSource.length()) { |
641 | 0 | auto result = fTrie.next(fSource.data()[fIndex++]); |
642 | 0 | if (result == USTRINGTRIE_NO_MATCH) { |
643 | 0 | break; |
644 | 0 | } else if (result == USTRINGTRIE_NO_VALUE) { |
645 | 0 | continue; |
646 | 0 | } |
647 | 0 | U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); |
648 | 0 | match = fTrie.getValue(); |
649 | 0 | previ = fIndex; |
650 | 0 | if (result == USTRINGTRIE_FINAL_VALUE) { |
651 | 0 | break; |
652 | 0 | } |
653 | 0 | U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE); |
654 | | // continue; |
655 | 0 | } |
656 | |
|
657 | 0 | if (match < 0) { |
658 | 0 | status = kUnitIdentifierSyntaxError; |
659 | 0 | } else { |
660 | 0 | fIndex = previ; |
661 | 0 | } |
662 | 0 | return Token(match); |
663 | 0 | } |
664 | | |
665 | | /** |
666 | | * Returns the next "single unit" via result. |
667 | | * |
668 | | * If a "-per-" was parsed, the result will have appropriate negative |
669 | | * dimensionality. |
670 | | * |
671 | | * Returns an error if we parse both compound units and "-and-", since mixed |
672 | | * compound units are not yet supported - TODO(CLDR-13701). |
673 | | * |
674 | | * @param result Will be overwritten by the result, if status shows success. |
675 | | * @param sawAnd If an "-and-" was parsed prior to finding the "single |
676 | | * unit", sawAnd is set to true. If not, it is left as is. |
677 | | * @param status ICU error code. |
678 | | */ |
679 | 0 | SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) { |
680 | 0 | SingleUnitImpl result; |
681 | 0 | if (U_FAILURE(status)) { |
682 | 0 | return result; |
683 | 0 | } |
684 | | |
685 | | // state: |
686 | | // 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit) |
687 | | // 1 = power token seen (will not accept another power token) |
688 | | // 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token) |
689 | 0 | int32_t state = 0; |
690 | |
|
691 | 0 | bool atStart = fIndex == 0; |
692 | 0 | Token token = nextToken(status); |
693 | 0 | if (U_FAILURE(status)) { |
694 | 0 | return result; |
695 | 0 | } |
696 | | |
697 | 0 | if (atStart) { |
698 | | // Identifiers optionally start with "per-". |
699 | 0 | if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) { |
700 | 0 | U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER); |
701 | 0 | fAfterPer = true; |
702 | 0 | result.dimensionality = -1; |
703 | |
|
704 | 0 | token = nextToken(status); |
705 | 0 | if (U_FAILURE(status)) { |
706 | 0 | return result; |
707 | 0 | } |
708 | 0 | } |
709 | 0 | } else { |
710 | | // All other SingleUnit's are separated from previous SingleUnit's |
711 | | // via a compound part: |
712 | 0 | if (token.getType() != Token::TYPE_COMPOUND_PART) { |
713 | 0 | status = kUnitIdentifierSyntaxError; |
714 | 0 | return result; |
715 | 0 | } |
716 | | |
717 | 0 | switch (token.getMatch()) { |
718 | 0 | case COMPOUND_PART_PER: |
719 | 0 | if (sawAnd) { |
720 | | // Mixed compound units not yet supported, |
721 | | // TODO(CLDR-13701). |
722 | 0 | status = kUnitIdentifierSyntaxError; |
723 | 0 | return result; |
724 | 0 | } |
725 | 0 | fAfterPer = true; |
726 | 0 | result.dimensionality = -1; |
727 | 0 | break; |
728 | | |
729 | 0 | case COMPOUND_PART_TIMES: |
730 | 0 | if (fAfterPer) { |
731 | 0 | result.dimensionality = -1; |
732 | 0 | } |
733 | 0 | break; |
734 | | |
735 | 0 | case COMPOUND_PART_AND: |
736 | 0 | if (fAfterPer) { |
737 | | // Can't start with "-and-", and mixed compound units |
738 | | // not yet supported, TODO(CLDR-13701). |
739 | 0 | status = kUnitIdentifierSyntaxError; |
740 | 0 | return result; |
741 | 0 | } |
742 | 0 | sawAnd = true; |
743 | 0 | break; |
744 | 0 | } |
745 | | |
746 | 0 | token = nextToken(status); |
747 | 0 | if (U_FAILURE(status)) { |
748 | 0 | return result; |
749 | 0 | } |
750 | 0 | } |
751 | | |
752 | | // Read tokens until we have a complete SingleUnit or we reach the end. |
753 | 0 | while (true) { |
754 | 0 | switch (token.getType()) { |
755 | 0 | case Token::TYPE_POWER_PART: |
756 | 0 | if (state > 0) { |
757 | 0 | status = kUnitIdentifierSyntaxError; |
758 | 0 | return result; |
759 | 0 | } |
760 | 0 | result.dimensionality *= token.getPower(); |
761 | 0 | state = 1; |
762 | 0 | break; |
763 | | |
764 | 0 | case Token::TYPE_PREFIX: |
765 | 0 | if (state > 1) { |
766 | 0 | status = kUnitIdentifierSyntaxError; |
767 | 0 | return result; |
768 | 0 | } |
769 | 0 | result.unitPrefix = token.getUnitPrefix(); |
770 | 0 | state = 2; |
771 | 0 | break; |
772 | | |
773 | 0 | case Token::TYPE_SIMPLE_UNIT: |
774 | 0 | result.index = token.getSimpleUnitIndex(); |
775 | 0 | return result; |
776 | | |
777 | 0 | default: |
778 | 0 | status = kUnitIdentifierSyntaxError; |
779 | 0 | return result; |
780 | 0 | } |
781 | | |
782 | 0 | if (!hasNext()) { |
783 | | // We ran out of tokens before finding a complete single unit. |
784 | 0 | status = kUnitIdentifierSyntaxError; |
785 | 0 | return result; |
786 | 0 | } |
787 | 0 | token = nextToken(status); |
788 | 0 | if (U_FAILURE(status)) { |
789 | 0 | return result; |
790 | 0 | } |
791 | 0 | } |
792 | | |
793 | 0 | return result; |
794 | 0 | } |
795 | | }; |
796 | | |
797 | | // Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray. |
798 | | int32_t U_CALLCONV |
799 | 0 | compareSingleUnits(const void* /*context*/, const void* left, const void* right) { |
800 | 0 | auto realLeft = static_cast<const SingleUnitImpl* const*>(left); |
801 | 0 | auto realRight = static_cast<const SingleUnitImpl* const*>(right); |
802 | 0 | return (*realLeft)->compareTo(**realRight); |
803 | 0 | } |
804 | | |
805 | | // Returns an index into the gCategories array, for the "unitQuantity" (aka |
806 | | // "type" or "category") associated with the given base unit identifier. Returns |
807 | | // -1 on failure, together with U_UNSUPPORTED_ERROR. |
808 | 0 | int32_t getUnitCategoryIndex(BytesTrie &trie, StringPiece baseUnitIdentifier, UErrorCode &status) { |
809 | 0 | UStringTrieResult result = trie.reset().next(baseUnitIdentifier.data(), baseUnitIdentifier.length()); |
810 | 0 | if (!USTRINGTRIE_HAS_VALUE(result)) { |
811 | 0 | status = U_UNSUPPORTED_ERROR; |
812 | 0 | return -1; |
813 | 0 | } |
814 | | |
815 | 0 | return trie.getValue(); |
816 | 0 | } |
817 | | |
818 | | } // namespace |
819 | | |
820 | | U_CAPI int32_t U_EXPORT2 |
821 | 0 | umeas_getPrefixPower(UMeasurePrefix unitPrefix) { |
822 | 0 | if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN && |
823 | 0 | unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { |
824 | 0 | return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN; |
825 | 0 | } |
826 | 0 | U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI && |
827 | 0 | unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI); |
828 | 0 | return unitPrefix - UMEASURE_PREFIX_ONE; |
829 | 0 | } |
830 | | |
831 | | U_CAPI int32_t U_EXPORT2 |
832 | 0 | umeas_getPrefixBase(UMeasurePrefix unitPrefix) { |
833 | 0 | if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN && |
834 | 0 | unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { |
835 | 0 | return 1024; |
836 | 0 | } |
837 | 0 | U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI && |
838 | 0 | unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI); |
839 | 0 | return 10; |
840 | 0 | } |
841 | | |
842 | 0 | CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status) { |
843 | 0 | CharString result; |
844 | 0 | MeasureUnitImpl baseUnitImpl = baseMeasureUnitImpl.copy(status); |
845 | 0 | UErrorCode localStatus = U_ZERO_ERROR; |
846 | 0 | umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); |
847 | 0 | if (U_FAILURE(status)) { |
848 | 0 | return result; |
849 | 0 | } |
850 | 0 | BytesTrie trie(gSerializedUnitCategoriesTrie); |
851 | |
|
852 | 0 | baseUnitImpl.serialize(status); |
853 | 0 | StringPiece identifier = baseUnitImpl.identifier.data(); |
854 | 0 | int32_t idx = getUnitCategoryIndex(trie, identifier, localStatus); |
855 | 0 | if (U_FAILURE(status)) { |
856 | 0 | return result; |
857 | 0 | } |
858 | | |
859 | | // In case the base unit identifier did not match any entry. |
860 | 0 | if (U_FAILURE(localStatus)) { |
861 | 0 | localStatus = U_ZERO_ERROR; |
862 | 0 | baseUnitImpl.takeReciprocal(status); |
863 | 0 | baseUnitImpl.serialize(status); |
864 | 0 | identifier.set(baseUnitImpl.identifier.data()); |
865 | 0 | idx = getUnitCategoryIndex(trie, identifier, localStatus); |
866 | |
|
867 | 0 | if (U_FAILURE(status)) { |
868 | 0 | return result; |
869 | 0 | } |
870 | 0 | } |
871 | | |
872 | | // In case the reciprocal of the base unit identifier did not match any entry. |
873 | 0 | MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(status); |
874 | 0 | if (U_FAILURE(status)) { |
875 | 0 | return result; |
876 | 0 | } |
877 | 0 | if (U_FAILURE(localStatus)) { |
878 | 0 | localStatus = U_ZERO_ERROR; |
879 | 0 | simplifiedUnit.serialize(status); |
880 | 0 | identifier.set(simplifiedUnit.identifier.data()); |
881 | 0 | idx = getUnitCategoryIndex(trie, identifier, localStatus); |
882 | |
|
883 | 0 | if (U_FAILURE(status)) { |
884 | 0 | return result; |
885 | 0 | } |
886 | 0 | } |
887 | | |
888 | | // In case the simplified base unit identifier did not match any entry. |
889 | 0 | if (U_FAILURE(localStatus)) { |
890 | 0 | localStatus = U_ZERO_ERROR; |
891 | 0 | simplifiedUnit.takeReciprocal(status); |
892 | 0 | simplifiedUnit.serialize(status); |
893 | 0 | identifier.set(simplifiedUnit.identifier.data()); |
894 | 0 | idx = getUnitCategoryIndex(trie, identifier, localStatus); |
895 | |
|
896 | 0 | if (U_FAILURE(status)) { |
897 | 0 | return result; |
898 | 0 | } |
899 | 0 | } |
900 | | |
901 | | // If there is no match at all, throw an exception. |
902 | 0 | if (U_FAILURE(localStatus)) { |
903 | 0 | status = U_INVALID_FORMAT_ERROR; |
904 | 0 | return result; |
905 | 0 | } |
906 | | |
907 | 0 | if (idx < 0 || idx >= gCategoriesCount) { |
908 | 0 | status = U_INVALID_FORMAT_ERROR; |
909 | 0 | return result; |
910 | 0 | } |
911 | | |
912 | 0 | result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status); |
913 | 0 | return result; |
914 | 0 | } |
915 | | |
916 | | // In ICU4J, this is MeasureUnit.getSingleUnitImpl(). |
917 | 0 | SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) { |
918 | 0 | MeasureUnitImpl temp; |
919 | 0 | const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status); |
920 | 0 | if (U_FAILURE(status)) { |
921 | 0 | return {}; |
922 | 0 | } |
923 | 0 | if (impl.singleUnits.length() == 0) { |
924 | 0 | return {}; |
925 | 0 | } |
926 | 0 | if (impl.singleUnits.length() == 1) { |
927 | 0 | return *impl.singleUnits[0]; |
928 | 0 | } |
929 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; |
930 | 0 | return {}; |
931 | 0 | } |
932 | | |
933 | 0 | MeasureUnit SingleUnitImpl::build(UErrorCode& status) const { |
934 | 0 | MeasureUnitImpl temp; |
935 | 0 | temp.appendSingleUnit(*this, status); |
936 | | // TODO(icu-units#28): the MeasureUnitImpl::build() method uses |
937 | | // findBySubtype, which is relatively slow. |
938 | | // - At the time of loading the simple unit IDs, we could also save a |
939 | | // mapping to the builtin MeasureUnit type and subtype they correspond to. |
940 | | // - This method could then check dimensionality and index, and if both are |
941 | | // 1, directly return MeasureUnit instances very quickly. |
942 | 0 | return std::move(temp).build(status); |
943 | 0 | } |
944 | | |
945 | 0 | const char *SingleUnitImpl::getSimpleUnitID() const { |
946 | 0 | return gSimpleUnits[index]; |
947 | 0 | } |
948 | | |
949 | 0 | void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const UPRV_NO_SANITIZE_UNDEFINED { |
950 | 0 | int32_t absPower = std::abs(this->dimensionality); |
951 | |
|
952 | 0 | U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units"; |
953 | | |
954 | 0 | if (absPower == 1) { |
955 | | // no-op |
956 | 0 | } else if (absPower == 2) { |
957 | 0 | result.append(StringPiece("square-"), status); |
958 | 0 | } else if (absPower == 3) { |
959 | 0 | result.append(StringPiece("cubic-"), status); |
960 | 0 | } else if (absPower <= 15) { |
961 | 0 | result.append(StringPiece("pow"), status); |
962 | 0 | result.appendNumber(absPower, status); |
963 | 0 | result.append(StringPiece("-"), status); |
964 | 0 | } else { |
965 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error |
966 | 0 | return; |
967 | 0 | } |
968 | | |
969 | 0 | if (U_FAILURE(status)) { |
970 | 0 | return; |
971 | 0 | } |
972 | | |
973 | 0 | if (this->unitPrefix != UMEASURE_PREFIX_ONE) { |
974 | 0 | bool found = false; |
975 | 0 | for (const auto &unitPrefixInfo : gUnitPrefixStrings) { |
976 | | // TODO: consider using binary search? If we do this, add a unit |
977 | | // test to ensure gUnitPrefixStrings is sorted? |
978 | 0 | if (unitPrefixInfo.value == this->unitPrefix) { |
979 | 0 | result.append(unitPrefixInfo.string, status); |
980 | 0 | found = true; |
981 | 0 | break; |
982 | 0 | } |
983 | 0 | } |
984 | 0 | if (!found) { |
985 | 0 | status = U_UNSUPPORTED_ERROR; |
986 | 0 | return; |
987 | 0 | } |
988 | 0 | } |
989 | | |
990 | 0 | result.append(StringPiece(this->getSimpleUnitID()), status); |
991 | 0 | } |
992 | | |
993 | 0 | int32_t SingleUnitImpl::getUnitCategoryIndex() const { |
994 | 0 | return gSimpleUnitCategories[index]; |
995 | 0 | } |
996 | | |
997 | 0 | MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) { |
998 | 0 | this->appendSingleUnit(singleUnit, status); |
999 | 0 | } |
1000 | | |
1001 | 0 | MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) { |
1002 | 0 | return Parser::from(identifier, status).parse(status); |
1003 | 0 | } |
1004 | | |
1005 | | const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit( |
1006 | 0 | const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) { |
1007 | 0 | if (measureUnit.fImpl) { |
1008 | 0 | return *measureUnit.fImpl; |
1009 | 0 | } else { |
1010 | 0 | memory = Parser::from(measureUnit.getIdentifier(), status).parse(status); |
1011 | 0 | return memory; |
1012 | 0 | } |
1013 | 0 | } |
1014 | | |
1015 | | MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy( |
1016 | 0 | const MeasureUnit& measureUnit, UErrorCode& status) { |
1017 | 0 | if (measureUnit.fImpl) { |
1018 | 0 | return measureUnit.fImpl->copy(status); |
1019 | 0 | } else { |
1020 | 0 | return Parser::from(measureUnit.getIdentifier(), status).parse(status); |
1021 | 0 | } |
1022 | 0 | } |
1023 | | |
1024 | 0 | void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) { |
1025 | 0 | identifier.clear(); |
1026 | 0 | for (int32_t i = 0; i < singleUnits.length(); i++) { |
1027 | 0 | singleUnits[i]->dimensionality *= -1; |
1028 | 0 | } |
1029 | 0 | } |
1030 | | |
1031 | 0 | MeasureUnitImpl MeasureUnitImpl::copyAndSimplify(UErrorCode &status) const { |
1032 | 0 | MeasureUnitImpl result; |
1033 | 0 | for (int32_t i = 0; i < singleUnits.length(); i++) { |
1034 | 0 | const SingleUnitImpl &singleUnit = *this->singleUnits[i]; |
1035 | | |
1036 | | // The following `for` loop will cause time complexity to be O(n^2). |
1037 | | // However, n is very small (number of units, generally, at maximum equal to 10) |
1038 | 0 | bool unitExist = false; |
1039 | 0 | for (int32_t j = 0; j < result.singleUnits.length(); j++) { |
1040 | 0 | if (uprv_strcmp(result.singleUnits[j]->getSimpleUnitID(), singleUnit.getSimpleUnitID()) == |
1041 | 0 | 0 && |
1042 | 0 | result.singleUnits[j]->unitPrefix == singleUnit.unitPrefix) { |
1043 | 0 | unitExist = true; |
1044 | 0 | result.singleUnits[j]->dimensionality = |
1045 | 0 | result.singleUnits[j]->dimensionality + singleUnit.dimensionality; |
1046 | 0 | break; |
1047 | 0 | } |
1048 | 0 | } |
1049 | |
|
1050 | 0 | if (!unitExist) { |
1051 | 0 | result.appendSingleUnit(singleUnit, status); |
1052 | 0 | } |
1053 | 0 | } |
1054 | |
|
1055 | 0 | return result; |
1056 | 0 | } |
1057 | | |
1058 | 0 | bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) { |
1059 | 0 | identifier.clear(); |
1060 | |
|
1061 | 0 | if (singleUnit.isDimensionless()) { |
1062 | | // Do not append dimensionless units. |
1063 | 0 | return false; |
1064 | 0 | } |
1065 | | |
1066 | | // Find a similar unit that already exists, to attempt to coalesce |
1067 | 0 | SingleUnitImpl *oldUnit = nullptr; |
1068 | 0 | for (int32_t i = 0; i < this->singleUnits.length(); i++) { |
1069 | 0 | auto *candidate = this->singleUnits[i]; |
1070 | 0 | if (candidate->isCompatibleWith(singleUnit)) { |
1071 | 0 | oldUnit = candidate; |
1072 | 0 | } |
1073 | 0 | } |
1074 | |
|
1075 | 0 | if (oldUnit) { |
1076 | | // Both dimensionalities will be positive, or both will be negative, by |
1077 | | // virtue of isCompatibleWith(). |
1078 | 0 | oldUnit->dimensionality += singleUnit.dimensionality; |
1079 | |
|
1080 | 0 | return false; |
1081 | 0 | } |
1082 | | |
1083 | | // Add a copy of singleUnit |
1084 | | // NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of singleUnit. |
1085 | 0 | this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit); |
1086 | 0 | if (U_FAILURE(status)) { |
1087 | 0 | return false; |
1088 | 0 | } |
1089 | | |
1090 | | // If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits` |
1091 | | // contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND` |
1092 | 0 | if (this->singleUnits.length() > 1 && |
1093 | 0 | this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) { |
1094 | 0 | this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND; |
1095 | 0 | } |
1096 | |
|
1097 | 0 | return true; |
1098 | 0 | } |
1099 | | |
1100 | | MaybeStackVector<MeasureUnitImplWithIndex> |
1101 | 0 | MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const { |
1102 | 0 | MaybeStackVector<MeasureUnitImplWithIndex> result; |
1103 | |
|
1104 | 0 | if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { |
1105 | 0 | result.emplaceBackAndCheckErrorCode(status, 0, *this, status); |
1106 | 0 | return result; |
1107 | 0 | } |
1108 | | |
1109 | 0 | for (int32_t i = 0; i < singleUnits.length(); ++i) { |
1110 | 0 | result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status); |
1111 | 0 | if (U_FAILURE(status)) { |
1112 | 0 | return result; |
1113 | 0 | } |
1114 | 0 | } |
1115 | | |
1116 | 0 | return result; |
1117 | 0 | } |
1118 | | |
1119 | | /** |
1120 | | * Normalize a MeasureUnitImpl and generate the identifier string in place. |
1121 | | */ |
1122 | 0 | void MeasureUnitImpl::serialize(UErrorCode &status) { |
1123 | 0 | if (U_FAILURE(status)) { |
1124 | 0 | return; |
1125 | 0 | } |
1126 | | |
1127 | 0 | if (this->singleUnits.length() == 0) { |
1128 | | // Dimensionless, constructed by the default constructor. |
1129 | 0 | return; |
1130 | 0 | } |
1131 | | |
1132 | 0 | if (this->complexity == UMEASURE_UNIT_COMPOUND) { |
1133 | | // Note: don't sort a MIXED unit |
1134 | 0 | uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(), |
1135 | 0 | sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status); |
1136 | 0 | if (U_FAILURE(status)) { |
1137 | 0 | return; |
1138 | 0 | } |
1139 | 0 | } |
1140 | | |
1141 | 0 | CharString result; |
1142 | 0 | bool beforePer = true; |
1143 | 0 | bool firstTimeNegativeDimension = false; |
1144 | 0 | for (int32_t i = 0; i < this->singleUnits.length(); i++) { |
1145 | 0 | if (beforePer && (*this->singleUnits[i]).dimensionality < 0) { |
1146 | 0 | beforePer = false; |
1147 | 0 | firstTimeNegativeDimension = true; |
1148 | 0 | } else if ((*this->singleUnits[i]).dimensionality < 0) { |
1149 | 0 | firstTimeNegativeDimension = false; |
1150 | 0 | } |
1151 | |
|
1152 | 0 | if (U_FAILURE(status)) { |
1153 | 0 | return; |
1154 | 0 | } |
1155 | | |
1156 | 0 | if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { |
1157 | 0 | if (result.length() != 0) { |
1158 | 0 | result.append(StringPiece("-and-"), status); |
1159 | 0 | } |
1160 | 0 | } else { |
1161 | 0 | if (firstTimeNegativeDimension) { |
1162 | 0 | if (result.length() == 0) { |
1163 | 0 | result.append(StringPiece("per-"), status); |
1164 | 0 | } else { |
1165 | 0 | result.append(StringPiece("-per-"), status); |
1166 | 0 | } |
1167 | 0 | } else { |
1168 | 0 | if (result.length() != 0) { |
1169 | 0 | result.append(StringPiece("-"), status); |
1170 | 0 | } |
1171 | 0 | } |
1172 | 0 | } |
1173 | |
|
1174 | 0 | this->singleUnits[i]->appendNeutralIdentifier(result, status); |
1175 | 0 | } |
1176 | | |
1177 | 0 | this->identifier = CharString(result, status); |
1178 | 0 | } |
1179 | | |
1180 | 0 | MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { |
1181 | 0 | this->serialize(status); |
1182 | 0 | return MeasureUnit(std::move(*this)); |
1183 | 0 | } |
1184 | | |
1185 | 0 | MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { |
1186 | 0 | return Parser::from(identifier, status).parse(status).build(status); |
1187 | 0 | } |
1188 | | |
1189 | 0 | UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { |
1190 | 0 | MeasureUnitImpl temp; |
1191 | 0 | return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity; |
1192 | 0 | } |
1193 | | |
1194 | 0 | UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const { |
1195 | 0 | return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix; |
1196 | 0 | } |
1197 | | |
1198 | 0 | MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const UPRV_NO_SANITIZE_UNDEFINED { |
1199 | 0 | SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); |
1200 | 0 | singleUnit.unitPrefix = prefix; |
1201 | 0 | return singleUnit.build(status); |
1202 | 0 | } |
1203 | | |
1204 | 0 | int32_t MeasureUnit::getDimensionality(UErrorCode& status) const { |
1205 | 0 | SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); |
1206 | 0 | if (U_FAILURE(status)) { return 0; } |
1207 | 0 | if (singleUnit.isDimensionless()) { |
1208 | 0 | return 0; |
1209 | 0 | } |
1210 | 0 | return singleUnit.dimensionality; |
1211 | 0 | } |
1212 | | |
1213 | 0 | MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const { |
1214 | 0 | SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); |
1215 | 0 | singleUnit.dimensionality = dimensionality; |
1216 | 0 | return singleUnit.build(status); |
1217 | 0 | } |
1218 | | |
1219 | 0 | MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { |
1220 | 0 | MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); |
1221 | 0 | impl.takeReciprocal(status); |
1222 | 0 | return std::move(impl).build(status); |
1223 | 0 | } |
1224 | | |
1225 | 0 | MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { |
1226 | 0 | MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); |
1227 | 0 | MeasureUnitImpl temp; |
1228 | 0 | const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status); |
1229 | 0 | if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) { |
1230 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; |
1231 | 0 | return {}; |
1232 | 0 | } |
1233 | 0 | for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) { |
1234 | 0 | impl.appendSingleUnit(*otherImpl.singleUnits[i], status); |
1235 | 0 | } |
1236 | 0 | if (impl.singleUnits.length() > 1) { |
1237 | 0 | impl.complexity = UMEASURE_UNIT_COMPOUND; |
1238 | 0 | } |
1239 | 0 | return std::move(impl).build(status); |
1240 | 0 | } |
1241 | | |
1242 | 0 | LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const { |
1243 | 0 | MeasureUnitImpl temp; |
1244 | 0 | const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status); |
1245 | 0 | outCount = impl.singleUnits.length(); |
1246 | 0 | MeasureUnit* arr = new MeasureUnit[outCount]; |
1247 | 0 | if (arr == nullptr) { |
1248 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
1249 | 0 | return LocalArray<MeasureUnit>(); |
1250 | 0 | } |
1251 | 0 | for (int32_t i = 0; i < outCount; i++) { |
1252 | 0 | arr[i] = impl.singleUnits[i]->build(status); |
1253 | 0 | } |
1254 | 0 | return LocalArray<MeasureUnit>(arr, status); |
1255 | 0 | } |
1256 | | |
1257 | | |
1258 | | U_NAMESPACE_END |
1259 | | |
1260 | | #endif /* !UNCONFIG_NO_FORMATTING */ |