/src/icu/source/i18n/measunit_extra.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2020 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | // Extra functions for MeasureUnit not needed for all clients. |
5 | | // Separate .o file so that it can be removed for modularity. |
6 | | |
7 | | #include "unicode/utypes.h" |
8 | | |
9 | | #if !UCONFIG_NO_FORMATTING |
10 | | |
11 | | // Allow implicit conversion from char16_t* to UnicodeString for this file: |
12 | | // Helpful in toString methods and elsewhere. |
13 | | #define UNISTR_FROM_STRING_EXPLICIT |
14 | | |
15 | | #include "charstr.h" |
16 | | #include "cmemory.h" |
17 | | #include "cstring.h" |
18 | | #include "measunit_impl.h" |
19 | | #include "resource.h" |
20 | | #include "uarrsort.h" |
21 | | #include "uassert.h" |
22 | | #include "ucln_in.h" |
23 | | #include "umutex.h" |
24 | | #include "unicode/bytestrie.h" |
25 | | #include "unicode/bytestriebuilder.h" |
26 | | #include "unicode/localpointer.h" |
27 | | #include "unicode/measunit.h" |
28 | | #include "unicode/stringpiece.h" |
29 | | #include "unicode/stringtriebuilder.h" |
30 | | #include "unicode/ures.h" |
31 | | #include "unicode/ustringtrie.h" |
32 | | #include "uresimp.h" |
33 | | #include "util.h" |
34 | | #include <cstdlib> |
35 | | |
36 | | U_NAMESPACE_BEGIN |
37 | | |
38 | | |
39 | | namespace { |
40 | | |
41 | | // TODO: Propose a new error code for this? |
42 | | constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR; |
43 | | |
44 | | // Trie value offset for SI or binary prefixes. This is big enough to ensure we only |
45 | | // insert positive integers into the trie. |
46 | | constexpr int32_t kPrefixOffset = 64; |
47 | | static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0, |
48 | | "kPrefixOffset is too small for minimum UMeasurePrefix value"); |
49 | | static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0, |
50 | | "kPrefixOffset is too small for minimum UMeasurePrefix value"); |
51 | | |
52 | | // Trie value offset for compound parts, e.g. "-per-", "-", "-and-". |
53 | | constexpr int32_t kCompoundPartOffset = 128; |
54 | | static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN, |
55 | | "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens"); |
56 | | static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI, |
57 | | "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens"); |
58 | | |
59 | | enum CompoundPart { |
60 | | // Represents "-per-" |
61 | | COMPOUND_PART_PER = kCompoundPartOffset, |
62 | | // Represents "-" |
63 | | COMPOUND_PART_TIMES, |
64 | | // Represents "-and-" |
65 | | COMPOUND_PART_AND, |
66 | | }; |
67 | | |
68 | | // Trie value offset for "per-". |
69 | | constexpr int32_t kInitialCompoundPartOffset = 192; |
70 | | |
71 | | enum InitialCompoundPart { |
72 | | // Represents "per-", the only compound part that can appear at the start of |
73 | | // an identifier. |
74 | | INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset, |
75 | | }; |
76 | | |
77 | | // Trie value offset for powers like "square-", "cubic-", "pow2-" etc. |
78 | | constexpr int32_t kPowerPartOffset = 256; |
79 | | |
80 | | enum PowerPart { |
81 | | POWER_PART_P2 = kPowerPartOffset + 2, |
82 | | POWER_PART_P3, |
83 | | POWER_PART_P4, |
84 | | POWER_PART_P5, |
85 | | POWER_PART_P6, |
86 | | POWER_PART_P7, |
87 | | POWER_PART_P8, |
88 | | POWER_PART_P9, |
89 | | POWER_PART_P10, |
90 | | POWER_PART_P11, |
91 | | POWER_PART_P12, |
92 | | POWER_PART_P13, |
93 | | POWER_PART_P14, |
94 | | POWER_PART_P15, |
95 | | }; |
96 | | |
97 | | // Trie value offset for simple units, e.g. "gram", "nautical-mile", |
98 | | // "fluid-ounce-imperial". |
99 | | constexpr int32_t kSimpleUnitOffset = 512; |
100 | | |
101 | | const struct UnitPrefixStrings { |
102 | | const char* const string; |
103 | | UMeasurePrefix value; |
104 | | } gUnitPrefixStrings[] = { |
105 | | // SI prefixes |
106 | | { "yotta", UMEASURE_PREFIX_YOTTA }, |
107 | | { "zetta", UMEASURE_PREFIX_ZETTA }, |
108 | | { "exa", UMEASURE_PREFIX_EXA }, |
109 | | { "peta", UMEASURE_PREFIX_PETA }, |
110 | | { "tera", UMEASURE_PREFIX_TERA }, |
111 | | { "giga", UMEASURE_PREFIX_GIGA }, |
112 | | { "mega", UMEASURE_PREFIX_MEGA }, |
113 | | { "kilo", UMEASURE_PREFIX_KILO }, |
114 | | { "hecto", UMEASURE_PREFIX_HECTO }, |
115 | | { "deka", UMEASURE_PREFIX_DEKA }, |
116 | | { "deci", UMEASURE_PREFIX_DECI }, |
117 | | { "centi", UMEASURE_PREFIX_CENTI }, |
118 | | { "milli", UMEASURE_PREFIX_MILLI }, |
119 | | { "micro", UMEASURE_PREFIX_MICRO }, |
120 | | { "nano", UMEASURE_PREFIX_NANO }, |
121 | | { "pico", UMEASURE_PREFIX_PICO }, |
122 | | { "femto", UMEASURE_PREFIX_FEMTO }, |
123 | | { "atto", UMEASURE_PREFIX_ATTO }, |
124 | | { "zepto", UMEASURE_PREFIX_ZEPTO }, |
125 | | { "yocto", UMEASURE_PREFIX_YOCTO }, |
126 | | // Binary prefixes |
127 | | { "yobi", UMEASURE_PREFIX_YOBI }, |
128 | | { "zebi", UMEASURE_PREFIX_ZEBI }, |
129 | | { "exbi", UMEASURE_PREFIX_EXBI }, |
130 | | { "pebi", UMEASURE_PREFIX_PEBI }, |
131 | | { "tebi", UMEASURE_PREFIX_TEBI }, |
132 | | { "gibi", UMEASURE_PREFIX_GIBI }, |
133 | | { "mebi", UMEASURE_PREFIX_MEBI }, |
134 | | { "kibi", UMEASURE_PREFIX_KIBI }, |
135 | | }; |
136 | | |
137 | | /** |
138 | | * A ResourceSink that collects simple unit identifiers from the keys of the |
139 | | * convertUnits table into an array, and adds these values to a TrieBuilder, |
140 | | * with associated values being their index into this array plus a specified |
141 | | * offset. |
142 | | * |
143 | | * Example code: |
144 | | * |
145 | | * UErrorCode status = U_ZERO_ERROR; |
146 | | * BytesTrieBuilder b(status); |
147 | | * int32_t ARR_SIZE = 200; |
148 | | * const char *unitIdentifiers[ARR_SIZE]; |
149 | | * int32_t *unitCategories[ARR_SIZE]; |
150 | | * SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers, |
151 | | * unitCategories, ARR_SIZE, b, kTrieValueOffset); |
152 | | * LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); |
153 | | * ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status); |
154 | | */ |
155 | | class SimpleUnitIdentifiersSink : public icu::ResourceSink { |
156 | | public: |
157 | | /** |
158 | | * Constructor. |
159 | | * @param quantitiesTrieData The data for constructing a quantitiesTrie, |
160 | | * which maps from a simple unit identifier to an index into the |
161 | | * gCategories array. |
162 | | * @param out Array of char* to which pointers to the simple unit |
163 | | * identifiers will be saved. (Does not take ownership.) |
164 | | * @param outCategories Array of int32_t to which category indexes will be |
165 | | * saved: this corresponds to simple unit IDs saved to `out`, mapping |
166 | | * from the ID to the value produced by the quantitiesTrie (which is an |
167 | | * index into the gCategories array). |
168 | | * @param outSize The size of `out` and `outCategories`. |
169 | | * @param trieBuilder The trie builder to which the simple unit identifier |
170 | | * should be added. The trie builder must outlive this resource sink. |
171 | | * @param trieValueOffset This is added to the index of the identifier in |
172 | | * the `out` array, before adding to `trieBuilder` as the value |
173 | | * associated with the identifier. |
174 | | */ |
175 | | explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out, |
176 | | int32_t *outCategories, int32_t outSize, |
177 | | BytesTrieBuilder &trieBuilder, int32_t trieValueOffset) |
178 | 0 | : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder), |
179 | 0 | trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {} |
180 | | |
181 | | /** |
182 | | * Adds the table keys found in value to the output vector. |
183 | | * @param key The key of the resource passed to `value`: the second |
184 | | * parameter of the ures_getAllItemsWithFallback() call. |
185 | | * @param value Should be a ResourceTable value, if |
186 | | * ures_getAllItemsWithFallback() was called correctly for this sink. |
187 | | * @param noFallback Ignored. |
188 | | * @param status The standard ICU error code output parameter. |
189 | | */ |
190 | 0 | void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) { |
191 | 0 | ResourceTable table = value.getTable(status); |
192 | 0 | if (U_FAILURE(status)) return; |
193 | | |
194 | 0 | if (outIndex + table.getSize() > outSize) { |
195 | 0 | status = U_INDEX_OUTOFBOUNDS_ERROR; |
196 | 0 | return; |
197 | 0 | } |
198 | | |
199 | 0 | BytesTrie quantitiesTrie(quantitiesTrieData.data()); |
200 | | |
201 | | // Collect keys from the table resource. |
202 | 0 | const char *simpleUnitID; |
203 | 0 | for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) { |
204 | 0 | U_ASSERT(i < table.getSize()); |
205 | 0 | U_ASSERT(outIndex < outSize); |
206 | 0 | if (uprv_strcmp(simpleUnitID, "kilogram") == 0) { |
207 | | // For parsing, we use "gram", the prefixless metric mass unit. We |
208 | | // thus ignore the SI Base Unit of Mass: it exists due to being the |
209 | | // mass conversion target unit, but not needed for MeasureUnit |
210 | | // parsing. |
211 | 0 | continue; |
212 | 0 | } |
213 | 0 | outArray[outIndex] = simpleUnitID; |
214 | 0 | trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status); |
215 | | |
216 | | // Find the base target unit for this simple unit |
217 | 0 | ResourceTable table = value.getTable(status); |
218 | 0 | if (U_FAILURE(status)) { return; } |
219 | 0 | if (!table.findValue("target", value)) { |
220 | 0 | status = U_INVALID_FORMAT_ERROR; |
221 | 0 | break; |
222 | 0 | } |
223 | 0 | int32_t len; |
224 | 0 | const UChar* uTarget = value.getString(len, status); |
225 | 0 | CharString target; |
226 | 0 | target.appendInvariantChars(uTarget, len, status); |
227 | 0 | if (U_FAILURE(status)) { return; } |
228 | 0 | quantitiesTrie.reset(); |
229 | 0 | UStringTrieResult result = quantitiesTrie.next(target.data(), target.length()); |
230 | 0 | if (!USTRINGTRIE_HAS_VALUE(result)) { |
231 | 0 | status = U_INVALID_FORMAT_ERROR; |
232 | 0 | break; |
233 | 0 | } |
234 | 0 | outCategories[outIndex] = quantitiesTrie.getValue(); |
235 | |
|
236 | 0 | outIndex++; |
237 | 0 | } |
238 | 0 | } |
239 | | |
240 | | private: |
241 | | const char **outArray; |
242 | | int32_t *outCategories; |
243 | | int32_t outSize; |
244 | | BytesTrieBuilder &trieBuilder; |
245 | | int32_t trieValueOffset; |
246 | | |
247 | | StringPiece quantitiesTrieData; |
248 | | |
249 | | int32_t outIndex; |
250 | | }; |
251 | | |
252 | | /** |
253 | | * A ResourceSink that collects information from `unitQuantities` in the `units` |
254 | | * resource to provide key->value lookups from base unit to category, as well as |
255 | | * preserving ordering information for these categories. See `units.txt`. |
256 | | * |
257 | | * For example: "kilogram" -> "mass", "meter-per-second" -> "speed". |
258 | | * |
259 | | * In C++ unitQuantity values are collected in order into a UChar* array, while |
260 | | * unitQuantity keys are added added to a TrieBuilder, with associated values |
261 | | * being the index into the aforementioned UChar* array. |
262 | | */ |
263 | | class CategoriesSink : public icu::ResourceSink { |
264 | | public: |
265 | | /** |
266 | | * Constructor. |
267 | | * @param out Array of UChar* to which unitQuantity values will be saved. |
268 | | * The pointers returned not owned: they point directly at the resource |
269 | | * strings in static memory. |
270 | | * @param outSize The size of the `out` array. |
271 | | * @param trieBuilder The trie builder to which the keys (base units) of |
272 | | * each unitQuantity will be added, each with value being the offset |
273 | | * into `out`. |
274 | | */ |
275 | | explicit CategoriesSink(const UChar **out, int32_t &outSize, BytesTrieBuilder &trieBuilder) |
276 | 0 | : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {} |
277 | | |
278 | 0 | void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) { |
279 | 0 | ResourceArray array = value.getArray(status); |
280 | 0 | if (U_FAILURE(status)) { |
281 | 0 | return; |
282 | 0 | } |
283 | | |
284 | 0 | if (outIndex + array.getSize() > outSize) { |
285 | 0 | status = U_INDEX_OUTOFBOUNDS_ERROR; |
286 | 0 | return; |
287 | 0 | } |
288 | | |
289 | 0 | for (int32_t i = 0; array.getValue(i, value); ++i) { |
290 | 0 | U_ASSERT(outIndex < outSize); |
291 | 0 | ResourceTable table = value.getTable(status); |
292 | 0 | if (U_FAILURE(status)) { |
293 | 0 | return; |
294 | 0 | } |
295 | 0 | if (table.getSize() != 1) { |
296 | 0 | status = U_INVALID_FORMAT_ERROR; |
297 | 0 | return; |
298 | 0 | } |
299 | 0 | const char *key; |
300 | 0 | table.getKeyAndValue(0, key, value); |
301 | 0 | int32_t uTmpLen; |
302 | 0 | outQuantitiesArray[outIndex] = value.getString(uTmpLen, status); |
303 | 0 | trieBuilder.add(key, outIndex, status); |
304 | 0 | outIndex++; |
305 | 0 | } |
306 | 0 | } |
307 | | |
308 | | private: |
309 | | const UChar **outQuantitiesArray; |
310 | | int32_t &outSize; |
311 | | BytesTrieBuilder &trieBuilder; |
312 | | |
313 | | int32_t outIndex; |
314 | | }; |
315 | | |
316 | | icu::UInitOnce gUnitExtrasInitOnce = U_INITONCE_INITIALIZER; |
317 | | |
318 | | // Array of simple unit IDs. |
319 | | // |
320 | | // The array memory itself is owned by this pointer, but the individual char* in |
321 | | // that array point at static memory. (Note that these char* are also returned |
322 | | // by SingleUnitImpl::getSimpleUnitID().) |
323 | | const char **gSimpleUnits = nullptr; |
324 | | |
325 | | // Maps from the value associated with each simple unit ID to an index into the |
326 | | // gCategories array. |
327 | | int32_t *gSimpleUnitCategories = nullptr; |
328 | | |
329 | | char *gSerializedUnitExtrasStemTrie = nullptr; |
330 | | |
331 | | // Array of UChar* pointing at the unit categories (aka "quantities", aka |
332 | | // "types"), as found in the `unitQuantities` resource. The array memory itself |
333 | | // is owned by this pointer, but the individual UChar* in that array point at |
334 | | // static memory. |
335 | | const UChar **gCategories = nullptr; |
336 | | // Number of items in `gCategories`. |
337 | | int32_t gCategoriesCount = 0; |
338 | | // TODO: rather save an index into gCategories? |
339 | | const char *kConsumption = "consumption"; |
340 | | size_t kConsumptionLen = strlen("consumption"); |
341 | | // Serialized BytesTrie for mapping from base units to indices into gCategories. |
342 | | char *gSerializedUnitCategoriesTrie = nullptr; |
343 | | |
344 | 0 | UBool U_CALLCONV cleanupUnitExtras() { |
345 | 0 | uprv_free(gSerializedUnitCategoriesTrie); |
346 | 0 | gSerializedUnitCategoriesTrie = nullptr; |
347 | 0 | uprv_free(gCategories); |
348 | 0 | gCategories = nullptr; |
349 | 0 | uprv_free(gSerializedUnitExtrasStemTrie); |
350 | 0 | gSerializedUnitExtrasStemTrie = nullptr; |
351 | 0 | uprv_free(gSimpleUnitCategories); |
352 | 0 | gSimpleUnitCategories = nullptr; |
353 | 0 | uprv_free(gSimpleUnits); |
354 | 0 | gSimpleUnits = nullptr; |
355 | 0 | gUnitExtrasInitOnce.reset(); |
356 | 0 | return TRUE; |
357 | 0 | } |
358 | | |
359 | 0 | void U_CALLCONV initUnitExtras(UErrorCode& status) { |
360 | 0 | ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras); |
361 | 0 | LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status)); |
362 | | |
363 | | // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories. |
364 | 0 | const char *CATEGORY_TABLE_NAME = "unitQuantities"; |
365 | 0 | LocalUResourceBundlePointer unitQuantities( |
366 | 0 | ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status)); |
367 | 0 | if (U_FAILURE(status)) { return; } |
368 | 0 | gCategoriesCount = unitQuantities.getAlias()->fSize; |
369 | 0 | size_t quantitiesMallocSize = sizeof(UChar *) * gCategoriesCount; |
370 | 0 | gCategories = static_cast<const UChar **>(uprv_malloc(quantitiesMallocSize)); |
371 | 0 | if (gCategories == nullptr) { |
372 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
373 | 0 | return; |
374 | 0 | } |
375 | 0 | uprv_memset(gCategories, 0, quantitiesMallocSize); |
376 | 0 | BytesTrieBuilder quantitiesBuilder(status); |
377 | 0 | CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder); |
378 | 0 | ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status); |
379 | 0 | StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status); |
380 | 0 | if (U_FAILURE(status)) { return; } |
381 | | // Copy the result into the global constant pointer |
382 | 0 | size_t numBytesQuantities = resultQuantities.length(); |
383 | 0 | gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities)); |
384 | 0 | if (gSerializedUnitCategoriesTrie == nullptr) { |
385 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
386 | 0 | return; |
387 | 0 | } |
388 | 0 | uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities); |
389 | | |
390 | | // Build the BytesTrie that Parser needs for parsing unit identifiers. |
391 | |
|
392 | 0 | BytesTrieBuilder b(status); |
393 | 0 | if (U_FAILURE(status)) { return; } |
394 | | |
395 | | // Add SI and binary prefixes |
396 | 0 | for (const auto& unitPrefixInfo : gUnitPrefixStrings) { |
397 | 0 | b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status); |
398 | 0 | } |
399 | 0 | if (U_FAILURE(status)) { return; } |
400 | | |
401 | | // Add syntax parts (compound, power prefixes) |
402 | 0 | b.add("-per-", COMPOUND_PART_PER, status); |
403 | 0 | b.add("-", COMPOUND_PART_TIMES, status); |
404 | 0 | b.add("-and-", COMPOUND_PART_AND, status); |
405 | 0 | b.add("per-", INITIAL_COMPOUND_PART_PER, status); |
406 | 0 | b.add("square-", POWER_PART_P2, status); |
407 | 0 | b.add("cubic-", POWER_PART_P3, status); |
408 | 0 | b.add("pow2-", POWER_PART_P2, status); |
409 | 0 | b.add("pow3-", POWER_PART_P3, status); |
410 | 0 | b.add("pow4-", POWER_PART_P4, status); |
411 | 0 | b.add("pow5-", POWER_PART_P5, status); |
412 | 0 | b.add("pow6-", POWER_PART_P6, status); |
413 | 0 | b.add("pow7-", POWER_PART_P7, status); |
414 | 0 | b.add("pow8-", POWER_PART_P8, status); |
415 | 0 | b.add("pow9-", POWER_PART_P9, status); |
416 | 0 | b.add("pow10-", POWER_PART_P10, status); |
417 | 0 | b.add("pow11-", POWER_PART_P11, status); |
418 | 0 | b.add("pow12-", POWER_PART_P12, status); |
419 | 0 | b.add("pow13-", POWER_PART_P13, status); |
420 | 0 | b.add("pow14-", POWER_PART_P14, status); |
421 | 0 | b.add("pow15-", POWER_PART_P15, status); |
422 | 0 | if (U_FAILURE(status)) { return; } |
423 | | |
424 | | // Add sanctioned simple units by offset: simple units all have entries in |
425 | | // units/convertUnits resources. |
426 | 0 | LocalUResourceBundlePointer convertUnits( |
427 | 0 | ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status)); |
428 | 0 | if (U_FAILURE(status)) { return; } |
429 | | |
430 | | // Allocate enough space: with identifierSink below skipping kilogram, we're |
431 | | // probably allocating one more than needed. |
432 | 0 | int32_t simpleUnitsCount = convertUnits.getAlias()->fSize; |
433 | 0 | int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount; |
434 | 0 | gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize)); |
435 | 0 | if (gSimpleUnits == nullptr) { |
436 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
437 | 0 | return; |
438 | 0 | } |
439 | 0 | uprv_memset(gSimpleUnits, 0, arrayMallocSize); |
440 | 0 | arrayMallocSize = sizeof(int32_t) * simpleUnitsCount; |
441 | 0 | gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize)); |
442 | 0 | if (gSimpleUnitCategories == nullptr) { |
443 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
444 | 0 | return; |
445 | 0 | } |
446 | 0 | uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize); |
447 | | |
448 | | // Populate gSimpleUnits and build the associated trie. |
449 | 0 | SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories, |
450 | 0 | simpleUnitsCount, b, kSimpleUnitOffset); |
451 | 0 | ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status); |
452 | | |
453 | | // Build the CharsTrie |
454 | | // TODO: Use SLOW or FAST here? |
455 | 0 | StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status); |
456 | 0 | if (U_FAILURE(status)) { return; } |
457 | | |
458 | | // Copy the result into the global constant pointer |
459 | 0 | size_t numBytes = result.length(); |
460 | 0 | gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes)); |
461 | 0 | if (gSerializedUnitExtrasStemTrie == nullptr) { |
462 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
463 | 0 | return; |
464 | 0 | } |
465 | 0 | uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes); |
466 | 0 | } |
467 | | |
468 | | class Token { |
469 | | public: |
470 | 0 | Token(int32_t match) : fMatch(match) {} |
471 | | |
472 | | enum Type { |
473 | | TYPE_UNDEFINED, |
474 | | TYPE_PREFIX, |
475 | | // Token type for "-per-", "-", and "-and-". |
476 | | TYPE_COMPOUND_PART, |
477 | | // Token type for "per-". |
478 | | TYPE_INITIAL_COMPOUND_PART, |
479 | | TYPE_POWER_PART, |
480 | | TYPE_SIMPLE_UNIT, |
481 | | }; |
482 | | |
483 | | // Calling getType() is invalid, resulting in an assertion failure, if Token |
484 | | // value isn't positive. |
485 | 0 | Type getType() const { |
486 | 0 | U_ASSERT(fMatch > 0); |
487 | 0 | if (fMatch < kCompoundPartOffset) { |
488 | 0 | return TYPE_PREFIX; |
489 | 0 | } |
490 | 0 | if (fMatch < kInitialCompoundPartOffset) { |
491 | 0 | return TYPE_COMPOUND_PART; |
492 | 0 | } |
493 | 0 | if (fMatch < kPowerPartOffset) { |
494 | 0 | return TYPE_INITIAL_COMPOUND_PART; |
495 | 0 | } |
496 | 0 | if (fMatch < kSimpleUnitOffset) { |
497 | 0 | return TYPE_POWER_PART; |
498 | 0 | } |
499 | 0 | return TYPE_SIMPLE_UNIT; |
500 | 0 | } |
501 | | |
502 | 0 | UMeasurePrefix getUnitPrefix() const { |
503 | 0 | U_ASSERT(getType() == TYPE_PREFIX); |
504 | 0 | return static_cast<UMeasurePrefix>(fMatch - kPrefixOffset); |
505 | 0 | } |
506 | | |
507 | | // Valid only for tokens with type TYPE_COMPOUND_PART. |
508 | 0 | int32_t getMatch() const { |
509 | 0 | U_ASSERT(getType() == TYPE_COMPOUND_PART); |
510 | 0 | return fMatch; |
511 | 0 | } |
512 | | |
513 | 0 | int32_t getInitialCompoundPart() const { |
514 | 0 | // Even if there is only one InitialCompoundPart value, we have this |
515 | 0 | // function for the simplicity of code consistency. |
516 | 0 | U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART); |
517 | 0 | // Defensive: if this assert fails, code using this function also needs |
518 | 0 | // to change. |
519 | 0 | U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER); |
520 | 0 | return fMatch; |
521 | 0 | } |
522 | | |
523 | 0 | int8_t getPower() const { |
524 | 0 | U_ASSERT(getType() == TYPE_POWER_PART); |
525 | 0 | return static_cast<int8_t>(fMatch - kPowerPartOffset); |
526 | 0 | } |
527 | | |
528 | 0 | int32_t getSimpleUnitIndex() const { |
529 | 0 | U_ASSERT(getType() == TYPE_SIMPLE_UNIT); |
530 | 0 | return fMatch - kSimpleUnitOffset; |
531 | 0 | } |
532 | | |
533 | | private: |
534 | | int32_t fMatch; |
535 | | }; |
536 | | |
537 | | class Parser { |
538 | | public: |
539 | | /** |
540 | | * Factory function for parsing the given identifier. |
541 | | * |
542 | | * @param source The identifier to parse. This function does not make a copy |
543 | | * of source: the underlying string that source points at, must outlive the |
544 | | * parser. |
545 | | * @param status ICU error code. |
546 | | */ |
547 | 0 | static Parser from(StringPiece source, UErrorCode& status) { |
548 | 0 | if (U_FAILURE(status)) { |
549 | 0 | return Parser(); |
550 | 0 | } |
551 | 0 | umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); |
552 | 0 | if (U_FAILURE(status)) { |
553 | 0 | return Parser(); |
554 | 0 | } |
555 | 0 | return Parser(source); |
556 | 0 | } |
557 | | |
558 | 0 | MeasureUnitImpl parse(UErrorCode& status) { |
559 | 0 | MeasureUnitImpl result; |
560 | |
|
561 | 0 | if (U_FAILURE(status)) { |
562 | 0 | return result; |
563 | 0 | } |
564 | 0 | if (fSource.empty()) { |
565 | | // The dimenionless unit: nothing to parse. leave result as is. |
566 | 0 | return result; |
567 | 0 | } |
568 | | |
569 | 0 | while (hasNext()) { |
570 | 0 | bool sawAnd = false; |
571 | |
|
572 | 0 | SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status); |
573 | 0 | if (U_FAILURE(status)) { |
574 | 0 | return result; |
575 | 0 | } |
576 | | |
577 | 0 | bool added = result.appendSingleUnit(singleUnit, status); |
578 | 0 | if (U_FAILURE(status)) { |
579 | 0 | return result; |
580 | 0 | } |
581 | | |
582 | 0 | if (sawAnd && !added) { |
583 | | // Two similar units are not allowed in a mixed unit. |
584 | 0 | status = kUnitIdentifierSyntaxError; |
585 | 0 | return result; |
586 | 0 | } |
587 | | |
588 | 0 | if (result.singleUnits.length() >= 2) { |
589 | | // nextSingleUnit fails appropriately for "per" and "and" in the |
590 | | // same identifier. It doesn't fail for other compound units |
591 | | // (COMPOUND_PART_TIMES). Consequently we take care of that |
592 | | // here. |
593 | 0 | UMeasureUnitComplexity complexity = |
594 | 0 | sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND; |
595 | 0 | if (result.singleUnits.length() == 2) { |
596 | | // After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND` |
597 | 0 | U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND); |
598 | 0 | result.complexity = complexity; |
599 | 0 | } else if (result.complexity != complexity) { |
600 | | // Can't have mixed compound units |
601 | 0 | status = kUnitIdentifierSyntaxError; |
602 | 0 | return result; |
603 | 0 | } |
604 | 0 | } |
605 | 0 | } |
606 | | |
607 | 0 | return result; |
608 | 0 | } |
609 | | |
610 | | private: |
611 | | // Tracks parser progress: the offset into fSource. |
612 | | int32_t fIndex = 0; |
613 | | |
614 | | // Since we're not owning this memory, whatever is passed to the constructor |
615 | | // should live longer than this Parser - and the parser shouldn't return any |
616 | | // references to that string. |
617 | | StringPiece fSource; |
618 | | BytesTrie fTrie; |
619 | | |
620 | | // Set to true when we've seen a "-per-" or a "per-", after which all units |
621 | | // are in the denominator. Until we find an "-and-", at which point the |
622 | | // identifier is invalid pending TODO(CLDR-13700). |
623 | | bool fAfterPer = false; |
624 | | |
625 | 0 | Parser() : fSource(""), fTrie(u"") {} |
626 | | |
627 | | Parser(StringPiece source) |
628 | 0 | : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {} |
629 | | |
630 | 0 | inline bool hasNext() const { |
631 | 0 | return fIndex < fSource.length(); |
632 | 0 | } |
633 | | |
634 | | // Returns the next Token parsed from fSource, advancing fIndex to the end |
635 | | // of that token in fSource. In case of U_FAILURE(status), the token |
636 | | // returned will cause an abort if getType() is called on it. |
637 | 0 | Token nextToken(UErrorCode& status) { |
638 | 0 | fTrie.reset(); |
639 | 0 | int32_t match = -1; |
640 | | // Saves the position in the fSource string for the end of the most |
641 | | // recent matching token. |
642 | 0 | int32_t previ = -1; |
643 | | // Find the longest token that matches a value in the trie: |
644 | 0 | while (fIndex < fSource.length()) { |
645 | 0 | auto result = fTrie.next(fSource.data()[fIndex++]); |
646 | 0 | if (result == USTRINGTRIE_NO_MATCH) { |
647 | 0 | break; |
648 | 0 | } else if (result == USTRINGTRIE_NO_VALUE) { |
649 | 0 | continue; |
650 | 0 | } |
651 | 0 | U_ASSERT(USTRINGTRIE_HAS_VALUE(result)); |
652 | 0 | match = fTrie.getValue(); |
653 | 0 | previ = fIndex; |
654 | 0 | if (result == USTRINGTRIE_FINAL_VALUE) { |
655 | 0 | break; |
656 | 0 | } |
657 | 0 | U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE); |
658 | | // continue; |
659 | 0 | } |
660 | |
|
661 | 0 | if (match < 0) { |
662 | 0 | status = kUnitIdentifierSyntaxError; |
663 | 0 | } else { |
664 | 0 | fIndex = previ; |
665 | 0 | } |
666 | 0 | return Token(match); |
667 | 0 | } |
668 | | |
669 | | /** |
670 | | * Returns the next "single unit" via result. |
671 | | * |
672 | | * If a "-per-" was parsed, the result will have appropriate negative |
673 | | * dimensionality. |
674 | | * |
675 | | * Returns an error if we parse both compound units and "-and-", since mixed |
676 | | * compound units are not yet supported - TODO(CLDR-13700). |
677 | | * |
678 | | * @param result Will be overwritten by the result, if status shows success. |
679 | | * @param sawAnd If an "-and-" was parsed prior to finding the "single |
680 | | * unit", sawAnd is set to true. If not, it is left as is. |
681 | | * @param status ICU error code. |
682 | | */ |
683 | 0 | SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) { |
684 | 0 | SingleUnitImpl result; |
685 | 0 | if (U_FAILURE(status)) { |
686 | 0 | return result; |
687 | 0 | } |
688 | | |
689 | | // state: |
690 | | // 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit) |
691 | | // 1 = power token seen (will not accept another power token) |
692 | | // 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token) |
693 | 0 | int32_t state = 0; |
694 | |
|
695 | 0 | bool atStart = fIndex == 0; |
696 | 0 | Token token = nextToken(status); |
697 | 0 | if (U_FAILURE(status)) { |
698 | 0 | return result; |
699 | 0 | } |
700 | | |
701 | 0 | if (atStart) { |
702 | | // Identifiers optionally start with "per-". |
703 | 0 | if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) { |
704 | 0 | U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER); |
705 | 0 | fAfterPer = true; |
706 | 0 | result.dimensionality = -1; |
707 | |
|
708 | 0 | token = nextToken(status); |
709 | 0 | if (U_FAILURE(status)) { |
710 | 0 | return result; |
711 | 0 | } |
712 | 0 | } |
713 | 0 | } else { |
714 | | // All other SingleUnit's are separated from previous SingleUnit's |
715 | | // via a compound part: |
716 | 0 | if (token.getType() != Token::TYPE_COMPOUND_PART) { |
717 | 0 | status = kUnitIdentifierSyntaxError; |
718 | 0 | return result; |
719 | 0 | } |
720 | | |
721 | 0 | switch (token.getMatch()) { |
722 | 0 | case COMPOUND_PART_PER: |
723 | 0 | if (sawAnd) { |
724 | | // Mixed compound units not yet supported, |
725 | | // TODO(CLDR-13700). |
726 | 0 | status = kUnitIdentifierSyntaxError; |
727 | 0 | return result; |
728 | 0 | } |
729 | 0 | fAfterPer = true; |
730 | 0 | result.dimensionality = -1; |
731 | 0 | break; |
732 | | |
733 | 0 | case COMPOUND_PART_TIMES: |
734 | 0 | if (fAfterPer) { |
735 | 0 | result.dimensionality = -1; |
736 | 0 | } |
737 | 0 | break; |
738 | | |
739 | 0 | case COMPOUND_PART_AND: |
740 | 0 | if (fAfterPer) { |
741 | | // Can't start with "-and-", and mixed compound units |
742 | | // not yet supported, TODO(CLDR-13700). |
743 | 0 | status = kUnitIdentifierSyntaxError; |
744 | 0 | return result; |
745 | 0 | } |
746 | 0 | sawAnd = true; |
747 | 0 | break; |
748 | 0 | } |
749 | | |
750 | 0 | token = nextToken(status); |
751 | 0 | if (U_FAILURE(status)) { |
752 | 0 | return result; |
753 | 0 | } |
754 | 0 | } |
755 | | |
756 | | // Read tokens until we have a complete SingleUnit or we reach the end. |
757 | 0 | while (true) { |
758 | 0 | switch (token.getType()) { |
759 | 0 | case Token::TYPE_POWER_PART: |
760 | 0 | if (state > 0) { |
761 | 0 | status = kUnitIdentifierSyntaxError; |
762 | 0 | return result; |
763 | 0 | } |
764 | 0 | result.dimensionality *= token.getPower(); |
765 | 0 | state = 1; |
766 | 0 | break; |
767 | | |
768 | 0 | case Token::TYPE_PREFIX: |
769 | 0 | if (state > 1) { |
770 | 0 | status = kUnitIdentifierSyntaxError; |
771 | 0 | return result; |
772 | 0 | } |
773 | 0 | result.unitPrefix = token.getUnitPrefix(); |
774 | 0 | state = 2; |
775 | 0 | break; |
776 | | |
777 | 0 | case Token::TYPE_SIMPLE_UNIT: |
778 | 0 | result.index = token.getSimpleUnitIndex(); |
779 | 0 | return result; |
780 | | |
781 | 0 | default: |
782 | 0 | status = kUnitIdentifierSyntaxError; |
783 | 0 | return result; |
784 | 0 | } |
785 | | |
786 | 0 | if (!hasNext()) { |
787 | | // We ran out of tokens before finding a complete single unit. |
788 | 0 | status = kUnitIdentifierSyntaxError; |
789 | 0 | return result; |
790 | 0 | } |
791 | 0 | token = nextToken(status); |
792 | 0 | if (U_FAILURE(status)) { |
793 | 0 | return result; |
794 | 0 | } |
795 | 0 | } |
796 | | |
797 | 0 | return result; |
798 | 0 | } |
799 | | }; |
800 | | |
801 | | // Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray. |
802 | | int32_t U_CALLCONV |
803 | 0 | compareSingleUnits(const void* /*context*/, const void* left, const void* right) { |
804 | 0 | auto realLeft = static_cast<const SingleUnitImpl* const*>(left); |
805 | 0 | auto realRight = static_cast<const SingleUnitImpl* const*>(right); |
806 | 0 | return (*realLeft)->compareTo(**realRight); |
807 | 0 | } |
808 | | |
809 | | // Returns an index into the gCategories array, for the "unitQuantity" (aka |
810 | | // "type" or "category") associated with the given base unit identifier. Returns |
811 | | // -1 on failure, together with U_UNSUPPORTED_ERROR. |
812 | 0 | int32_t getUnitCategoryIndex(StringPiece baseUnitIdentifier, UErrorCode &status) { |
813 | 0 | umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status); |
814 | 0 | if (U_FAILURE(status)) { |
815 | 0 | return -1; |
816 | 0 | } |
817 | 0 | BytesTrie trie(gSerializedUnitCategoriesTrie); |
818 | 0 | UStringTrieResult result = trie.next(baseUnitIdentifier.data(), baseUnitIdentifier.length()); |
819 | 0 | if (!USTRINGTRIE_HAS_VALUE(result)) { |
820 | 0 | status = U_UNSUPPORTED_ERROR; |
821 | 0 | return -1; |
822 | 0 | } |
823 | 0 | return trie.getValue(); |
824 | 0 | } |
825 | | |
826 | | } // namespace |
827 | | |
828 | | U_CAPI int32_t U_EXPORT2 |
829 | 0 | umeas_getPrefixPower(UMeasurePrefix unitPrefix) { |
830 | 0 | if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN && |
831 | 0 | unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { |
832 | 0 | return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN; |
833 | 0 | } |
834 | 0 | U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI && |
835 | 0 | unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI); |
836 | 0 | return unitPrefix - UMEASURE_PREFIX_ONE; |
837 | 0 | } |
838 | | |
839 | | U_CAPI int32_t U_EXPORT2 |
840 | 0 | umeas_getPrefixBase(UMeasurePrefix unitPrefix) { |
841 | 0 | if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN && |
842 | 0 | unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) { |
843 | 0 | return 1024; |
844 | 0 | } |
845 | 0 | U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI && |
846 | 0 | unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI); |
847 | 0 | return 10; |
848 | 0 | } |
849 | | |
850 | 0 | CharString U_I18N_API getUnitQuantity(StringPiece baseUnitIdentifier, UErrorCode &status) { |
851 | 0 | CharString result; |
852 | 0 | U_ASSERT(result.length() == 0); |
853 | 0 | if (U_FAILURE(status)) { |
854 | 0 | return result; |
855 | 0 | } |
856 | 0 | UErrorCode localStatus = U_ZERO_ERROR; |
857 | 0 | int32_t idx = getUnitCategoryIndex(baseUnitIdentifier, localStatus); |
858 | 0 | if (U_FAILURE(localStatus)) { |
859 | | // TODO(icu-units#130): support inverting any unit, with correct |
860 | | // fallback logic: inversion and fallback may depend on presence or |
861 | | // absence of a usage for that category. |
862 | 0 | if (uprv_strcmp(baseUnitIdentifier.data(), "meter-per-cubic-meter") == 0) { |
863 | 0 | result.append(kConsumption, (int32_t)kConsumptionLen, status); |
864 | 0 | return result; |
865 | 0 | } |
866 | 0 | status = U_INVALID_FORMAT_ERROR; |
867 | 0 | return result; |
868 | 0 | } |
869 | 0 | if (idx < 0 || idx >= gCategoriesCount) { |
870 | 0 | status = U_INVALID_FORMAT_ERROR; |
871 | 0 | return result; |
872 | 0 | } |
873 | 0 | result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status); |
874 | 0 | return result; |
875 | 0 | } |
876 | | |
877 | | // In ICU4J, this is MeasureUnit.getSingleUnitImpl(). |
878 | 0 | SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) { |
879 | 0 | MeasureUnitImpl temp; |
880 | 0 | const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status); |
881 | 0 | if (U_FAILURE(status)) { |
882 | 0 | return {}; |
883 | 0 | } |
884 | 0 | if (impl.singleUnits.length() == 0) { |
885 | 0 | return {}; |
886 | 0 | } |
887 | 0 | if (impl.singleUnits.length() == 1) { |
888 | 0 | return *impl.singleUnits[0]; |
889 | 0 | } |
890 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; |
891 | 0 | return {}; |
892 | 0 | } |
893 | | |
894 | 0 | MeasureUnit SingleUnitImpl::build(UErrorCode& status) const { |
895 | 0 | MeasureUnitImpl temp; |
896 | 0 | temp.appendSingleUnit(*this, status); |
897 | | // TODO(icu-units#28): the MeasureUnitImpl::build() method uses |
898 | | // findBySubtype, which is relatively slow. |
899 | | // - At the time of loading the simple unit IDs, we could also save a |
900 | | // mapping to the builtin MeasureUnit type and subtype they correspond to. |
901 | | // - This method could then check dimensionality and index, and if both are |
902 | | // 1, directly return MeasureUnit instances very quickly. |
903 | 0 | return std::move(temp).build(status); |
904 | 0 | } |
905 | | |
906 | 0 | const char *SingleUnitImpl::getSimpleUnitID() const { |
907 | 0 | return gSimpleUnits[index]; |
908 | 0 | } |
909 | | |
910 | 0 | void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const { |
911 | 0 | int32_t absPower = std::abs(this->dimensionality); |
912 | |
|
913 | 0 | U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units"; |
914 | | |
915 | 0 | if (absPower == 1) { |
916 | | // no-op |
917 | 0 | } else if (absPower == 2) { |
918 | 0 | result.append(StringPiece("square-"), status); |
919 | 0 | } else if (absPower == 3) { |
920 | 0 | result.append(StringPiece("cubic-"), status); |
921 | 0 | } else if (absPower <= 15) { |
922 | 0 | result.append(StringPiece("pow"), status); |
923 | 0 | result.appendNumber(absPower, status); |
924 | 0 | result.append(StringPiece("-"), status); |
925 | 0 | } else { |
926 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error |
927 | 0 | return; |
928 | 0 | } |
929 | | |
930 | 0 | if (U_FAILURE(status)) { |
931 | 0 | return; |
932 | 0 | } |
933 | | |
934 | 0 | if (this->unitPrefix != UMEASURE_PREFIX_ONE) { |
935 | 0 | bool found = false; |
936 | 0 | for (const auto &unitPrefixInfo : gUnitPrefixStrings) { |
937 | | // TODO: consider using binary search? If we do this, add a unit |
938 | | // test to ensure gUnitPrefixStrings is sorted? |
939 | 0 | if (unitPrefixInfo.value == this->unitPrefix) { |
940 | 0 | result.append(unitPrefixInfo.string, status); |
941 | 0 | found = true; |
942 | 0 | break; |
943 | 0 | } |
944 | 0 | } |
945 | 0 | if (!found) { |
946 | 0 | status = U_UNSUPPORTED_ERROR; |
947 | 0 | return; |
948 | 0 | } |
949 | 0 | } |
950 | | |
951 | 0 | result.append(StringPiece(this->getSimpleUnitID()), status); |
952 | 0 | } |
953 | | |
954 | 0 | int32_t SingleUnitImpl::getUnitCategoryIndex() const { |
955 | 0 | return gSimpleUnitCategories[index]; |
956 | 0 | } |
957 | | |
958 | 0 | MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) { |
959 | 0 | this->appendSingleUnit(singleUnit, status); |
960 | 0 | } |
961 | | |
962 | 0 | MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) { |
963 | 0 | return Parser::from(identifier, status).parse(status); |
964 | 0 | } |
965 | | |
966 | | const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit( |
967 | 0 | const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) { |
968 | 0 | if (measureUnit.fImpl) { |
969 | 0 | return *measureUnit.fImpl; |
970 | 0 | } else { |
971 | 0 | memory = Parser::from(measureUnit.getIdentifier(), status).parse(status); |
972 | 0 | return memory; |
973 | 0 | } |
974 | 0 | } |
975 | | |
976 | | MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy( |
977 | 0 | const MeasureUnit& measureUnit, UErrorCode& status) { |
978 | 0 | if (measureUnit.fImpl) { |
979 | 0 | return measureUnit.fImpl->copy(status); |
980 | 0 | } else { |
981 | 0 | return Parser::from(measureUnit.getIdentifier(), status).parse(status); |
982 | 0 | } |
983 | 0 | } |
984 | | |
985 | 0 | void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) { |
986 | 0 | identifier.clear(); |
987 | 0 | for (int32_t i = 0; i < singleUnits.length(); i++) { |
988 | 0 | singleUnits[i]->dimensionality *= -1; |
989 | 0 | } |
990 | 0 | } |
991 | | |
992 | 0 | bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) { |
993 | 0 | identifier.clear(); |
994 | |
|
995 | 0 | if (singleUnit.isDimensionless()) { |
996 | | // Do not append dimensionless units. |
997 | 0 | return false; |
998 | 0 | } |
999 | | |
1000 | | // Find a similar unit that already exists, to attempt to coalesce |
1001 | 0 | SingleUnitImpl *oldUnit = nullptr; |
1002 | 0 | for (int32_t i = 0; i < this->singleUnits.length(); i++) { |
1003 | 0 | auto *candidate = this->singleUnits[i]; |
1004 | 0 | if (candidate->isCompatibleWith(singleUnit)) { |
1005 | 0 | oldUnit = candidate; |
1006 | 0 | } |
1007 | 0 | } |
1008 | |
|
1009 | 0 | if (oldUnit) { |
1010 | | // Both dimensionalities will be positive, or both will be negative, by |
1011 | | // virtue of isCompatibleWith(). |
1012 | 0 | oldUnit->dimensionality += singleUnit.dimensionality; |
1013 | |
|
1014 | 0 | return false; |
1015 | 0 | } |
1016 | | |
1017 | | // Add a copy of singleUnit |
1018 | | // NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of singleUnit. |
1019 | 0 | this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit); |
1020 | 0 | if (U_FAILURE(status)) { |
1021 | 0 | return false; |
1022 | 0 | } |
1023 | | |
1024 | | // If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits` |
1025 | | // contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND` |
1026 | 0 | if (this->singleUnits.length() > 1 && |
1027 | 0 | this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) { |
1028 | 0 | this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND; |
1029 | 0 | } |
1030 | |
|
1031 | 0 | return true; |
1032 | 0 | } |
1033 | | |
1034 | | MaybeStackVector<MeasureUnitImplWithIndex> |
1035 | 0 | MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const { |
1036 | 0 | MaybeStackVector<MeasureUnitImplWithIndex> result; |
1037 | |
|
1038 | 0 | if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { |
1039 | 0 | result.emplaceBackAndCheckErrorCode(status, 0, *this, status); |
1040 | 0 | return result; |
1041 | 0 | } |
1042 | | |
1043 | 0 | for (int32_t i = 0; i < singleUnits.length(); ++i) { |
1044 | 0 | result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status); |
1045 | 0 | if (U_FAILURE(status)) { |
1046 | 0 | return result; |
1047 | 0 | } |
1048 | 0 | } |
1049 | | |
1050 | 0 | return result; |
1051 | 0 | } |
1052 | | |
1053 | | /** |
1054 | | * Normalize a MeasureUnitImpl and generate the identifier string in place. |
1055 | | */ |
1056 | 0 | void MeasureUnitImpl::serialize(UErrorCode &status) { |
1057 | 0 | if (U_FAILURE(status)) { |
1058 | 0 | return; |
1059 | 0 | } |
1060 | | |
1061 | 0 | if (this->singleUnits.length() == 0) { |
1062 | | // Dimensionless, constructed by the default constructor. |
1063 | 0 | return; |
1064 | 0 | } |
1065 | | |
1066 | 0 | if (this->complexity == UMEASURE_UNIT_COMPOUND) { |
1067 | | // Note: don't sort a MIXED unit |
1068 | 0 | uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(), |
1069 | 0 | sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status); |
1070 | 0 | if (U_FAILURE(status)) { |
1071 | 0 | return; |
1072 | 0 | } |
1073 | 0 | } |
1074 | | |
1075 | 0 | CharString result; |
1076 | 0 | bool beforePer = true; |
1077 | 0 | bool firstTimeNegativeDimension = false; |
1078 | 0 | for (int32_t i = 0; i < this->singleUnits.length(); i++) { |
1079 | 0 | if (beforePer && (*this->singleUnits[i]).dimensionality < 0) { |
1080 | 0 | beforePer = false; |
1081 | 0 | firstTimeNegativeDimension = true; |
1082 | 0 | } else if ((*this->singleUnits[i]).dimensionality < 0) { |
1083 | 0 | firstTimeNegativeDimension = false; |
1084 | 0 | } |
1085 | |
|
1086 | 0 | if (U_FAILURE(status)) { |
1087 | 0 | return; |
1088 | 0 | } |
1089 | | |
1090 | 0 | if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) { |
1091 | 0 | if (result.length() != 0) { |
1092 | 0 | result.append(StringPiece("-and-"), status); |
1093 | 0 | } |
1094 | 0 | } else { |
1095 | 0 | if (firstTimeNegativeDimension) { |
1096 | 0 | if (result.length() == 0) { |
1097 | 0 | result.append(StringPiece("per-"), status); |
1098 | 0 | } else { |
1099 | 0 | result.append(StringPiece("-per-"), status); |
1100 | 0 | } |
1101 | 0 | } else { |
1102 | 0 | if (result.length() != 0) { |
1103 | 0 | result.append(StringPiece("-"), status); |
1104 | 0 | } |
1105 | 0 | } |
1106 | 0 | } |
1107 | |
|
1108 | 0 | this->singleUnits[i]->appendNeutralIdentifier(result, status); |
1109 | 0 | } |
1110 | | |
1111 | 0 | this->identifier = CharString(result, status); |
1112 | 0 | } |
1113 | | |
1114 | 0 | MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && { |
1115 | 0 | this->serialize(status); |
1116 | 0 | return MeasureUnit(std::move(*this)); |
1117 | 0 | } |
1118 | | |
1119 | 0 | MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) { |
1120 | 0 | return Parser::from(identifier, status).parse(status).build(status); |
1121 | 0 | } |
1122 | | |
1123 | 0 | UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const { |
1124 | 0 | MeasureUnitImpl temp; |
1125 | 0 | return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity; |
1126 | 0 | } |
1127 | | |
1128 | 0 | UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const { |
1129 | 0 | return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix; |
1130 | 0 | } |
1131 | | |
1132 | 0 | MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const { |
1133 | 0 | SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); |
1134 | 0 | singleUnit.unitPrefix = prefix; |
1135 | 0 | return singleUnit.build(status); |
1136 | 0 | } |
1137 | | |
1138 | 0 | int32_t MeasureUnit::getDimensionality(UErrorCode& status) const { |
1139 | 0 | SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); |
1140 | 0 | if (U_FAILURE(status)) { return 0; } |
1141 | 0 | if (singleUnit.isDimensionless()) { |
1142 | 0 | return 0; |
1143 | 0 | } |
1144 | 0 | return singleUnit.dimensionality; |
1145 | 0 | } |
1146 | | |
1147 | 0 | MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const { |
1148 | 0 | SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status); |
1149 | 0 | singleUnit.dimensionality = dimensionality; |
1150 | 0 | return singleUnit.build(status); |
1151 | 0 | } |
1152 | | |
1153 | 0 | MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const { |
1154 | 0 | MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); |
1155 | 0 | impl.takeReciprocal(status); |
1156 | 0 | return std::move(impl).build(status); |
1157 | 0 | } |
1158 | | |
1159 | 0 | MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const { |
1160 | 0 | MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status); |
1161 | 0 | MeasureUnitImpl temp; |
1162 | 0 | const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status); |
1163 | 0 | if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) { |
1164 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; |
1165 | 0 | return {}; |
1166 | 0 | } |
1167 | 0 | for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) { |
1168 | 0 | impl.appendSingleUnit(*otherImpl.singleUnits[i], status); |
1169 | 0 | } |
1170 | 0 | if (impl.singleUnits.length() > 1) { |
1171 | 0 | impl.complexity = UMEASURE_UNIT_COMPOUND; |
1172 | 0 | } |
1173 | 0 | return std::move(impl).build(status); |
1174 | 0 | } |
1175 | | |
1176 | 0 | LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const { |
1177 | 0 | MeasureUnitImpl temp; |
1178 | 0 | const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status); |
1179 | 0 | outCount = impl.singleUnits.length(); |
1180 | 0 | MeasureUnit* arr = new MeasureUnit[outCount]; |
1181 | 0 | if (arr == nullptr) { |
1182 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
1183 | 0 | return LocalArray<MeasureUnit>(); |
1184 | 0 | } |
1185 | 0 | for (int32_t i = 0; i < outCount; i++) { |
1186 | 0 | arr[i] = impl.singleUnits[i]->build(status); |
1187 | 0 | } |
1188 | 0 | return LocalArray<MeasureUnit>(arr, status); |
1189 | 0 | } |
1190 | | |
1191 | | |
1192 | | U_NAMESPACE_END |
1193 | | |
1194 | | #endif /* !UNCONFIG_NO_FORMATTING */ |