Coverage Report

Created: 2023-03-29 06:15

/src/icu/icu4c/source/i18n/measunit_extra.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2020 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
// Extra functions for MeasureUnit not needed for all clients.
5
// Separate .o file so that it can be removed for modularity.
6
7
#include "unicode/utypes.h"
8
9
#if !UCONFIG_NO_FORMATTING
10
11
// Allow implicit conversion from char16_t* to UnicodeString for this file:
12
// Helpful in toString methods and elsewhere.
13
#define UNISTR_FROM_STRING_EXPLICIT
14
15
#include "charstr.h"
16
#include "cmemory.h"
17
#include "cstring.h"
18
#include "measunit_impl.h"
19
#include "resource.h"
20
#include "uarrsort.h"
21
#include "uassert.h"
22
#include "ucln_in.h"
23
#include "umutex.h"
24
#include "unicode/bytestrie.h"
25
#include "unicode/bytestriebuilder.h"
26
#include "unicode/localpointer.h"
27
#include "unicode/stringpiece.h"
28
#include "unicode/stringtriebuilder.h"
29
#include "unicode/ures.h"
30
#include "unicode/ustringtrie.h"
31
#include "uresimp.h"
32
#include "util.h"
33
#include <cstdlib>
34
35
U_NAMESPACE_BEGIN
36
37
38
namespace {
39
40
// TODO: Propose a new error code for this?
41
constexpr UErrorCode kUnitIdentifierSyntaxError = U_ILLEGAL_ARGUMENT_ERROR;
42
43
// Trie value offset for SI or binary prefixes. This is big enough to ensure we only
44
// insert positive integers into the trie.
45
constexpr int32_t kPrefixOffset = 64;
46
static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_BIN > 0,
47
              "kPrefixOffset is too small for minimum UMeasurePrefix value");
48
static_assert(kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MIN_SI > 0,
49
              "kPrefixOffset is too small for minimum UMeasurePrefix value");
50
51
// Trie value offset for compound parts, e.g. "-per-", "-", "-and-".
52
constexpr int32_t kCompoundPartOffset = 128;
53
static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_BIN,
54
              "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
55
static_assert(kCompoundPartOffset > kPrefixOffset + UMEASURE_PREFIX_INTERNAL_MAX_SI,
56
              "Ambiguous token values: prefix tokens are overlapping with CompoundPart tokens");
57
58
enum CompoundPart {
59
    // Represents "-per-"
60
    COMPOUND_PART_PER = kCompoundPartOffset,
61
    // Represents "-"
62
    COMPOUND_PART_TIMES,
63
    // Represents "-and-"
64
    COMPOUND_PART_AND,
65
};
66
67
// Trie value offset for "per-".
68
constexpr int32_t kInitialCompoundPartOffset = 192;
69
70
enum InitialCompoundPart {
71
    // Represents "per-", the only compound part that can appear at the start of
72
    // an identifier.
73
    INITIAL_COMPOUND_PART_PER = kInitialCompoundPartOffset,
74
};
75
76
// Trie value offset for powers like "square-", "cubic-", "pow2-" etc.
77
constexpr int32_t kPowerPartOffset = 256;
78
79
enum PowerPart {
80
    POWER_PART_P2 = kPowerPartOffset + 2,
81
    POWER_PART_P3,
82
    POWER_PART_P4,
83
    POWER_PART_P5,
84
    POWER_PART_P6,
85
    POWER_PART_P7,
86
    POWER_PART_P8,
87
    POWER_PART_P9,
88
    POWER_PART_P10,
89
    POWER_PART_P11,
90
    POWER_PART_P12,
91
    POWER_PART_P13,
92
    POWER_PART_P14,
93
    POWER_PART_P15,
94
};
95
96
// Trie value offset for simple units, e.g. "gram", "nautical-mile",
97
// "fluid-ounce-imperial".
98
constexpr int32_t kSimpleUnitOffset = 512;
99
100
const struct UnitPrefixStrings {
101
    const char* const string;
102
    UMeasurePrefix value;
103
} gUnitPrefixStrings[] = {
104
    // SI prefixes
105
    { "yotta", UMEASURE_PREFIX_YOTTA },
106
    { "zetta", UMEASURE_PREFIX_ZETTA },
107
    { "exa", UMEASURE_PREFIX_EXA },
108
    { "peta", UMEASURE_PREFIX_PETA },
109
    { "tera", UMEASURE_PREFIX_TERA },
110
    { "giga", UMEASURE_PREFIX_GIGA },
111
    { "mega", UMEASURE_PREFIX_MEGA },
112
    { "kilo", UMEASURE_PREFIX_KILO },
113
    { "hecto", UMEASURE_PREFIX_HECTO },
114
    { "deka", UMEASURE_PREFIX_DEKA },
115
    { "deci", UMEASURE_PREFIX_DECI },
116
    { "centi", UMEASURE_PREFIX_CENTI },
117
    { "milli", UMEASURE_PREFIX_MILLI },
118
    { "micro", UMEASURE_PREFIX_MICRO },
119
    { "nano", UMEASURE_PREFIX_NANO },
120
    { "pico", UMEASURE_PREFIX_PICO },
121
    { "femto", UMEASURE_PREFIX_FEMTO },
122
    { "atto", UMEASURE_PREFIX_ATTO },
123
    { "zepto", UMEASURE_PREFIX_ZEPTO },
124
    { "yocto", UMEASURE_PREFIX_YOCTO },
125
    // Binary prefixes
126
    { "yobi", UMEASURE_PREFIX_YOBI },
127
    { "zebi", UMEASURE_PREFIX_ZEBI },
128
    { "exbi", UMEASURE_PREFIX_EXBI },
129
    { "pebi", UMEASURE_PREFIX_PEBI },
130
    { "tebi", UMEASURE_PREFIX_TEBI },
131
    { "gibi", UMEASURE_PREFIX_GIBI },
132
    { "mebi", UMEASURE_PREFIX_MEBI },
133
    { "kibi", UMEASURE_PREFIX_KIBI },
134
};
135
136
/**
137
 * A ResourceSink that collects simple unit identifiers from the keys of the
138
 * convertUnits table into an array, and adds these values to a TrieBuilder,
139
 * with associated values being their index into this array plus a specified
140
 * offset.
141
 *
142
 * Example code:
143
 *
144
 *     UErrorCode status = U_ZERO_ERROR;
145
 *     BytesTrieBuilder b(status);
146
 *     int32_t ARR_SIZE = 200;
147
 *     const char *unitIdentifiers[ARR_SIZE];
148
 *     int32_t *unitCategories[ARR_SIZE];
149
 *     SimpleUnitIdentifiersSink identifierSink(gSerializedUnitCategoriesTrie, unitIdentifiers,
150
 *                                              unitCategories, ARR_SIZE, b, kTrieValueOffset);
151
 *     LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
152
 *     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
153
 */
154
class SimpleUnitIdentifiersSink : public icu::ResourceSink {
155
  public:
156
    /**
157
     * Constructor.
158
     * @param quantitiesTrieData The data for constructing a quantitiesTrie,
159
     *     which maps from a simple unit identifier to an index into the
160
     *     gCategories array.
161
     * @param out Array of char* to which pointers to the simple unit
162
     *     identifiers will be saved. (Does not take ownership.)
163
     * @param outCategories Array of int32_t to which category indexes will be
164
     *     saved: this corresponds to simple unit IDs saved to `out`, mapping
165
     *     from the ID to the value produced by the quantitiesTrie (which is an
166
     *     index into the gCategories array).
167
     * @param outSize The size of `out` and `outCategories`.
168
     * @param trieBuilder The trie builder to which the simple unit identifier
169
     *     should be added. The trie builder must outlive this resource sink.
170
     * @param trieValueOffset This is added to the index of the identifier in
171
     *     the `out` array, before adding to `trieBuilder` as the value
172
     *     associated with the identifier.
173
     */
174
    explicit SimpleUnitIdentifiersSink(StringPiece quantitiesTrieData, const char **out,
175
                                       int32_t *outCategories, int32_t outSize,
176
                                       BytesTrieBuilder &trieBuilder, int32_t trieValueOffset)
177
        : outArray(out), outCategories(outCategories), outSize(outSize), trieBuilder(trieBuilder),
178
0
          trieValueOffset(trieValueOffset), quantitiesTrieData(quantitiesTrieData), outIndex(0) {}
179
180
    /**
181
     * Adds the table keys found in value to the output vector.
182
     * @param key The key of the resource passed to `value`: the second
183
     *     parameter of the ures_getAllItemsWithFallback() call.
184
     * @param value Should be a ResourceTable value, if
185
     *     ures_getAllItemsWithFallback() was called correctly for this sink.
186
     * @param noFallback Ignored.
187
     * @param status The standard ICU error code output parameter.
188
     */
189
0
    void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
190
0
        ResourceTable table = value.getTable(status);
191
0
        if (U_FAILURE(status)) return;
192
193
0
        if (outIndex + table.getSize() > outSize) {
194
0
            status = U_INDEX_OUTOFBOUNDS_ERROR;
195
0
            return;
196
0
        }
197
198
0
        BytesTrie quantitiesTrie(quantitiesTrieData.data());
199
200
        // Collect keys from the table resource.
201
0
        const char *simpleUnitID;
202
0
        for (int32_t i = 0; table.getKeyAndValue(i, simpleUnitID, value); ++i) {
203
0
            U_ASSERT(i < table.getSize());
204
0
            U_ASSERT(outIndex < outSize);
205
0
            if (uprv_strcmp(simpleUnitID, "kilogram") == 0) {
206
                // For parsing, we use "gram", the prefixless metric mass unit. We
207
                // thus ignore the SI Base Unit of Mass: it exists due to being the
208
                // mass conversion target unit, but not needed for MeasureUnit
209
                // parsing.
210
0
                continue;
211
0
            }
212
0
            outArray[outIndex] = simpleUnitID;
213
0
            trieBuilder.add(simpleUnitID, trieValueOffset + outIndex, status);
214
215
            // Find the base target unit for this simple unit
216
0
            ResourceTable table = value.getTable(status);
217
0
            if (U_FAILURE(status)) { return; }
218
0
            if (!table.findValue("target", value)) {
219
0
                status = U_INVALID_FORMAT_ERROR;
220
0
                break;
221
0
            }
222
0
            int32_t len;
223
0
            const char16_t* uTarget = value.getString(len, status);
224
0
            CharString target;
225
0
            target.appendInvariantChars(uTarget, len, status);
226
0
            if (U_FAILURE(status)) { return; }
227
0
            quantitiesTrie.reset();
228
0
            UStringTrieResult result = quantitiesTrie.next(target.data(), target.length());
229
0
            if (!USTRINGTRIE_HAS_VALUE(result)) {
230
0
                status = U_INVALID_FORMAT_ERROR;
231
0
                break;
232
0
            }
233
0
            outCategories[outIndex] = quantitiesTrie.getValue();
234
235
0
            outIndex++;
236
0
        }
237
0
    }
238
239
  private:
240
    const char **outArray;
241
    int32_t *outCategories;
242
    int32_t outSize;
243
    BytesTrieBuilder &trieBuilder;
244
    int32_t trieValueOffset;
245
246
    StringPiece quantitiesTrieData;
247
248
    int32_t outIndex;
249
};
250
251
/**
252
 * A ResourceSink that collects information from `unitQuantities` in the `units`
253
 * resource to provide key->value lookups from base unit to category, as well as
254
 * preserving ordering information for these categories. See `units.txt`.
255
 *
256
 * For example: "kilogram" -> "mass", "meter-per-second" -> "speed".
257
 *
258
 * In C++ unitQuantity values are collected in order into a char16_t* array, while
259
 * unitQuantity keys are added added to a TrieBuilder, with associated values
260
 * being the index into the aforementioned char16_t* array.
261
 */
262
class CategoriesSink : public icu::ResourceSink {
263
  public:
264
    /**
265
     * Constructor.
266
     * @param out Array of char16_t* to which unitQuantity values will be saved.
267
     *     The pointers returned  not owned: they point directly at the resource
268
     *     strings in static memory.
269
     * @param outSize The size of the `out` array.
270
     * @param trieBuilder The trie builder to which the keys (base units) of
271
     *     each unitQuantity will be added, each with value being the offset
272
     *     into `out`.
273
     */
274
    explicit CategoriesSink(const char16_t **out, int32_t &outSize, BytesTrieBuilder &trieBuilder)
275
0
        : outQuantitiesArray(out), outSize(outSize), trieBuilder(trieBuilder), outIndex(0) {}
276
277
0
    void put(const char * /*key*/, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) override {
278
0
        ResourceArray array = value.getArray(status);
279
0
        if (U_FAILURE(status)) {
280
0
            return;
281
0
        }
282
283
0
        if (outIndex + array.getSize() > outSize) {
284
0
            status = U_INDEX_OUTOFBOUNDS_ERROR;
285
0
            return;
286
0
        }
287
288
0
        for (int32_t i = 0; array.getValue(i, value); ++i) {
289
0
            U_ASSERT(outIndex < outSize);
290
0
            ResourceTable table = value.getTable(status);
291
0
            if (U_FAILURE(status)) {
292
0
                return;
293
0
            }
294
0
            if (table.getSize() != 1) {
295
0
                status = U_INVALID_FORMAT_ERROR;
296
0
                return;
297
0
            }
298
0
            const char *key;
299
0
            table.getKeyAndValue(0, key, value);
300
0
            int32_t uTmpLen;
301
0
            outQuantitiesArray[outIndex] = value.getString(uTmpLen, status);
302
0
            trieBuilder.add(key, outIndex, status);
303
0
            outIndex++;
304
0
        }
305
0
    }
306
307
  private:
308
    const char16_t **outQuantitiesArray;
309
    int32_t &outSize;
310
    BytesTrieBuilder &trieBuilder;
311
312
    int32_t outIndex;
313
};
314
315
icu::UInitOnce gUnitExtrasInitOnce {};
316
317
// Array of simple unit IDs.
318
//
319
// The array memory itself is owned by this pointer, but the individual char* in
320
// that array point at static memory. (Note that these char* are also returned
321
// by SingleUnitImpl::getSimpleUnitID().)
322
const char **gSimpleUnits = nullptr;
323
324
// Maps from the value associated with each simple unit ID to an index into the
325
// gCategories array.
326
int32_t *gSimpleUnitCategories = nullptr;
327
328
char *gSerializedUnitExtrasStemTrie = nullptr;
329
330
// Array of char16_t* pointing at the unit categories (aka "quantities", aka
331
// "types"), as found in the `unitQuantities` resource. The array memory itself
332
// is owned by this pointer, but the individual char16_t* in that array point at
333
// static memory.
334
const char16_t **gCategories = nullptr;
335
// Number of items in `gCategories`.
336
int32_t gCategoriesCount = 0;
337
// Serialized BytesTrie for mapping from base units to indices into gCategories.
338
char *gSerializedUnitCategoriesTrie = nullptr;
339
340
0
UBool U_CALLCONV cleanupUnitExtras() {
341
0
    uprv_free(gSerializedUnitCategoriesTrie);
342
0
    gSerializedUnitCategoriesTrie = nullptr;
343
0
    uprv_free(gCategories);
344
0
    gCategories = nullptr;
345
0
    uprv_free(gSerializedUnitExtrasStemTrie);
346
0
    gSerializedUnitExtrasStemTrie = nullptr;
347
0
    uprv_free(gSimpleUnitCategories);
348
0
    gSimpleUnitCategories = nullptr;
349
0
    uprv_free(gSimpleUnits);
350
0
    gSimpleUnits = nullptr;
351
0
    gUnitExtrasInitOnce.reset();
352
0
    return true;
353
0
}
354
355
0
void U_CALLCONV initUnitExtras(UErrorCode& status) {
356
0
    ucln_i18n_registerCleanup(UCLN_I18N_UNIT_EXTRAS, cleanupUnitExtras);
357
0
    LocalUResourceBundlePointer unitsBundle(ures_openDirect(nullptr, "units", &status));
358
359
    // Collect unitQuantities information into gSerializedUnitCategoriesTrie and gCategories.
360
0
    const char *CATEGORY_TABLE_NAME = "unitQuantities";
361
0
    LocalUResourceBundlePointer unitQuantities(
362
0
        ures_getByKey(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, nullptr, &status));
363
0
    if (U_FAILURE(status)) { return; }
364
0
    gCategoriesCount = unitQuantities.getAlias()->fSize;
365
0
    size_t quantitiesMallocSize = sizeof(char16_t *) * gCategoriesCount;
366
0
    gCategories = static_cast<const char16_t **>(uprv_malloc(quantitiesMallocSize));
367
0
    if (gCategories == nullptr) {
368
0
        status = U_MEMORY_ALLOCATION_ERROR;
369
0
        return;
370
0
    }
371
0
    uprv_memset(gCategories, 0, quantitiesMallocSize);
372
0
    BytesTrieBuilder quantitiesBuilder(status);
373
0
    CategoriesSink categoriesSink(gCategories, gCategoriesCount, quantitiesBuilder);
374
0
    ures_getAllItemsWithFallback(unitsBundle.getAlias(), CATEGORY_TABLE_NAME, categoriesSink, status);
375
0
    StringPiece resultQuantities = quantitiesBuilder.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
376
0
    if (U_FAILURE(status)) { return; }
377
    // Copy the result into the global constant pointer
378
0
    size_t numBytesQuantities = resultQuantities.length();
379
0
    gSerializedUnitCategoriesTrie = static_cast<char *>(uprv_malloc(numBytesQuantities));
380
0
    if (gSerializedUnitCategoriesTrie == nullptr) {
381
0
        status = U_MEMORY_ALLOCATION_ERROR;
382
0
        return;
383
0
    }
384
0
    uprv_memcpy(gSerializedUnitCategoriesTrie, resultQuantities.data(), numBytesQuantities);
385
386
    // Build the BytesTrie that Parser needs for parsing unit identifiers.
387
388
0
    BytesTrieBuilder b(status);
389
0
    if (U_FAILURE(status)) { return; }
390
391
    // Add SI and binary prefixes
392
0
    for (const auto& unitPrefixInfo : gUnitPrefixStrings) {
393
0
        b.add(unitPrefixInfo.string, unitPrefixInfo.value + kPrefixOffset, status);
394
0
    }
395
0
    if (U_FAILURE(status)) { return; }
396
397
    // Add syntax parts (compound, power prefixes)
398
0
    b.add("-per-", COMPOUND_PART_PER, status);
399
0
    b.add("-", COMPOUND_PART_TIMES, status);
400
0
    b.add("-and-", COMPOUND_PART_AND, status);
401
0
    b.add("per-", INITIAL_COMPOUND_PART_PER, status);
402
0
    b.add("square-", POWER_PART_P2, status);
403
0
    b.add("cubic-", POWER_PART_P3, status);
404
0
    b.add("pow2-", POWER_PART_P2, status);
405
0
    b.add("pow3-", POWER_PART_P3, status);
406
0
    b.add("pow4-", POWER_PART_P4, status);
407
0
    b.add("pow5-", POWER_PART_P5, status);
408
0
    b.add("pow6-", POWER_PART_P6, status);
409
0
    b.add("pow7-", POWER_PART_P7, status);
410
0
    b.add("pow8-", POWER_PART_P8, status);
411
0
    b.add("pow9-", POWER_PART_P9, status);
412
0
    b.add("pow10-", POWER_PART_P10, status);
413
0
    b.add("pow11-", POWER_PART_P11, status);
414
0
    b.add("pow12-", POWER_PART_P12, status);
415
0
    b.add("pow13-", POWER_PART_P13, status);
416
0
    b.add("pow14-", POWER_PART_P14, status);
417
0
    b.add("pow15-", POWER_PART_P15, status);
418
0
    if (U_FAILURE(status)) { return; }
419
420
    // Add sanctioned simple units by offset: simple units all have entries in
421
    // units/convertUnits resources.
422
0
    LocalUResourceBundlePointer convertUnits(
423
0
        ures_getByKey(unitsBundle.getAlias(), "convertUnits", nullptr, &status));
424
0
    if (U_FAILURE(status)) { return; }
425
426
    // Allocate enough space: with identifierSink below skipping kilogram, we're
427
    // probably allocating one more than needed.
428
0
    int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;
429
0
    int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;
430
0
    gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));
431
0
    if (gSimpleUnits == nullptr) {
432
0
        status = U_MEMORY_ALLOCATION_ERROR;
433
0
        return;
434
0
    }
435
0
    uprv_memset(gSimpleUnits, 0, arrayMallocSize);
436
0
    arrayMallocSize = sizeof(int32_t) * simpleUnitsCount;
437
0
    gSimpleUnitCategories = static_cast<int32_t *>(uprv_malloc(arrayMallocSize));
438
0
    if (gSimpleUnitCategories == nullptr) {
439
0
        status = U_MEMORY_ALLOCATION_ERROR;
440
0
        return;
441
0
    }
442
0
    uprv_memset(gSimpleUnitCategories, 0, arrayMallocSize);
443
444
    // Populate gSimpleUnits and build the associated trie.
445
0
    SimpleUnitIdentifiersSink identifierSink(resultQuantities, gSimpleUnits, gSimpleUnitCategories,
446
0
                                             simpleUnitsCount, b, kSimpleUnitOffset);
447
0
    ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
448
449
    // Build the CharsTrie
450
    // TODO: Use SLOW or FAST here?
451
0
    StringPiece result = b.buildStringPiece(USTRINGTRIE_BUILD_FAST, status);
452
0
    if (U_FAILURE(status)) { return; }
453
454
    // Copy the result into the global constant pointer
455
0
    size_t numBytes = result.length();
456
0
    gSerializedUnitExtrasStemTrie = static_cast<char *>(uprv_malloc(numBytes));
457
0
    if (gSerializedUnitExtrasStemTrie == nullptr) {
458
0
        status = U_MEMORY_ALLOCATION_ERROR;
459
0
        return;
460
0
    }
461
0
    uprv_memcpy(gSerializedUnitExtrasStemTrie, result.data(), numBytes);
462
0
}
463
464
class Token {
465
public:
466
0
    Token(int32_t match) : fMatch(match) {}
467
468
    enum Type {
469
        TYPE_UNDEFINED,
470
        TYPE_PREFIX,
471
        // Token type for "-per-", "-", and "-and-".
472
        TYPE_COMPOUND_PART,
473
        // Token type for "per-".
474
        TYPE_INITIAL_COMPOUND_PART,
475
        TYPE_POWER_PART,
476
        TYPE_SIMPLE_UNIT,
477
    };
478
479
    // Calling getType() is invalid, resulting in an assertion failure, if Token
480
    // value isn't positive.
481
0
    Type getType() const {
482
0
        U_ASSERT(fMatch > 0);
483
0
        if (fMatch < kCompoundPartOffset) {
484
0
            return TYPE_PREFIX;
485
0
        }
486
0
        if (fMatch < kInitialCompoundPartOffset) {
487
0
            return TYPE_COMPOUND_PART;
488
0
        }
489
0
        if (fMatch < kPowerPartOffset) {
490
0
            return TYPE_INITIAL_COMPOUND_PART;
491
0
        }
492
0
        if (fMatch < kSimpleUnitOffset) {
493
0
            return TYPE_POWER_PART;
494
0
        }
495
0
        return TYPE_SIMPLE_UNIT;
496
0
    }
497
498
0
    UMeasurePrefix getUnitPrefix() const {
499
0
        U_ASSERT(getType() == TYPE_PREFIX);
500
0
        return static_cast<UMeasurePrefix>(fMatch - kPrefixOffset);
501
0
    }
502
503
    // Valid only for tokens with type TYPE_COMPOUND_PART.
504
0
    int32_t getMatch() const {
505
0
        U_ASSERT(getType() == TYPE_COMPOUND_PART);
506
0
        return fMatch;
507
0
    }
508
509
0
    int32_t getInitialCompoundPart() const {
510
0
        // Even if there is only one InitialCompoundPart value, we have this
511
0
        // function for the simplicity of code consistency.
512
0
        U_ASSERT(getType() == TYPE_INITIAL_COMPOUND_PART);
513
0
        // Defensive: if this assert fails, code using this function also needs
514
0
        // to change.
515
0
        U_ASSERT(fMatch == INITIAL_COMPOUND_PART_PER);
516
0
        return fMatch;
517
0
    }
518
519
0
    int8_t getPower() const {
520
0
        U_ASSERT(getType() == TYPE_POWER_PART);
521
0
        return static_cast<int8_t>(fMatch - kPowerPartOffset);
522
0
    }
523
524
0
    int32_t getSimpleUnitIndex() const {
525
0
        U_ASSERT(getType() == TYPE_SIMPLE_UNIT);
526
0
        return fMatch - kSimpleUnitOffset;
527
0
    }
528
529
private:
530
    int32_t fMatch;
531
};
532
533
class Parser {
534
public:
535
    /**
536
     * Factory function for parsing the given identifier.
537
     *
538
     * @param source The identifier to parse. This function does not make a copy
539
     * of source: the underlying string that source points at, must outlive the
540
     * parser.
541
     * @param status ICU error code.
542
     */
543
0
    static Parser from(StringPiece source, UErrorCode& status) {
544
0
        if (U_FAILURE(status)) {
545
0
            return Parser();
546
0
        }
547
0
        umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
548
0
        if (U_FAILURE(status)) {
549
0
            return Parser();
550
0
        }
551
0
        return Parser(source);
552
0
    }
553
554
0
    MeasureUnitImpl parse(UErrorCode& status) {
555
0
        MeasureUnitImpl result;
556
557
0
        if (U_FAILURE(status)) {
558
0
            return result;
559
0
        }
560
0
        if (fSource.empty()) {
561
            // The dimenionless unit: nothing to parse. leave result as is.
562
0
            return result;
563
0
        }
564
565
0
        while (hasNext()) {
566
0
            bool sawAnd = false;
567
568
0
            SingleUnitImpl singleUnit = nextSingleUnit(sawAnd, status);
569
0
            if (U_FAILURE(status)) {
570
0
                return result;
571
0
            }
572
573
0
            bool added = result.appendSingleUnit(singleUnit, status);
574
0
            if (U_FAILURE(status)) {
575
0
                return result;
576
0
            }
577
578
0
            if (sawAnd && !added) {
579
                // Two similar units are not allowed in a mixed unit.
580
0
                status = kUnitIdentifierSyntaxError;
581
0
                return result;
582
0
            }
583
584
0
            if (result.singleUnits.length() >= 2) {
585
                // nextSingleUnit fails appropriately for "per" and "and" in the
586
                // same identifier. It doesn't fail for other compound units
587
                // (COMPOUND_PART_TIMES). Consequently we take care of that
588
                // here.
589
0
                UMeasureUnitComplexity complexity =
590
0
                    sawAnd ? UMEASURE_UNIT_MIXED : UMEASURE_UNIT_COMPOUND;
591
0
                if (result.singleUnits.length() == 2) {
592
                    // After appending two singleUnits, the complexity will be `UMEASURE_UNIT_COMPOUND`
593
0
                    U_ASSERT(result.complexity == UMEASURE_UNIT_COMPOUND);
594
0
                    result.complexity = complexity;
595
0
                } else if (result.complexity != complexity) {
596
                    // Can't have mixed compound units
597
0
                    status = kUnitIdentifierSyntaxError;
598
0
                    return result;
599
0
                }
600
0
            }
601
0
        }
602
603
0
        return result;
604
0
    }
605
606
private:
607
    // Tracks parser progress: the offset into fSource.
608
    int32_t fIndex = 0;
609
610
    // Since we're not owning this memory, whatever is passed to the constructor
611
    // should live longer than this Parser - and the parser shouldn't return any
612
    // references to that string.
613
    StringPiece fSource;
614
    BytesTrie fTrie;
615
616
    // Set to true when we've seen a "-per-" or a "per-", after which all units
617
    // are in the denominator. Until we find an "-and-", at which point the
618
    // identifier is invalid pending TODO(CLDR-13701).
619
    bool fAfterPer = false;
620
621
0
    Parser() : fSource(""), fTrie(u"") {}
622
623
    Parser(StringPiece source)
624
0
        : fSource(source), fTrie(gSerializedUnitExtrasStemTrie) {}
625
626
0
    inline bool hasNext() const {
627
0
        return fIndex < fSource.length();
628
0
    }
629
630
    // Returns the next Token parsed from fSource, advancing fIndex to the end
631
    // of that token in fSource. In case of U_FAILURE(status), the token
632
    // returned will cause an abort if getType() is called on it.
633
0
    Token nextToken(UErrorCode& status) {
634
0
        fTrie.reset();
635
0
        int32_t match = -1;
636
        // Saves the position in the fSource string for the end of the most
637
        // recent matching token.
638
0
        int32_t previ = -1;
639
        // Find the longest token that matches a value in the trie:
640
0
        while (fIndex < fSource.length()) {
641
0
            auto result = fTrie.next(fSource.data()[fIndex++]);
642
0
            if (result == USTRINGTRIE_NO_MATCH) {
643
0
                break;
644
0
            } else if (result == USTRINGTRIE_NO_VALUE) {
645
0
                continue;
646
0
            }
647
0
            U_ASSERT(USTRINGTRIE_HAS_VALUE(result));
648
0
            match = fTrie.getValue();
649
0
            previ = fIndex;
650
0
            if (result == USTRINGTRIE_FINAL_VALUE) {
651
0
                break;
652
0
            }
653
0
            U_ASSERT(result == USTRINGTRIE_INTERMEDIATE_VALUE);
654
            // continue;
655
0
        }
656
657
0
        if (match < 0) {
658
0
            status = kUnitIdentifierSyntaxError;
659
0
        } else {
660
0
            fIndex = previ;
661
0
        }
662
0
        return Token(match);
663
0
    }
664
665
    /**
666
     * Returns the next "single unit" via result.
667
     *
668
     * If a "-per-" was parsed, the result will have appropriate negative
669
     * dimensionality.
670
     *
671
     * Returns an error if we parse both compound units and "-and-", since mixed
672
     * compound units are not yet supported - TODO(CLDR-13701).
673
     *
674
     * @param result Will be overwritten by the result, if status shows success.
675
     * @param sawAnd If an "-and-" was parsed prior to finding the "single
676
     * unit", sawAnd is set to true. If not, it is left as is.
677
     * @param status ICU error code.
678
     */
679
0
    SingleUnitImpl nextSingleUnit(bool &sawAnd, UErrorCode &status) {
680
0
        SingleUnitImpl result;
681
0
        if (U_FAILURE(status)) {
682
0
            return result;
683
0
        }
684
685
        // state:
686
        // 0 = no tokens seen yet (will accept power, SI or binary prefix, or simple unit)
687
        // 1 = power token seen (will not accept another power token)
688
        // 2 = SI or binary prefix token seen (will not accept a power, or SI or binary prefix token)
689
0
        int32_t state = 0;
690
691
0
        bool atStart = fIndex == 0;
692
0
        Token token = nextToken(status);
693
0
        if (U_FAILURE(status)) {
694
0
            return result;
695
0
        }
696
697
0
        if (atStart) {
698
            // Identifiers optionally start with "per-".
699
0
            if (token.getType() == Token::TYPE_INITIAL_COMPOUND_PART) {
700
0
                U_ASSERT(token.getInitialCompoundPart() == INITIAL_COMPOUND_PART_PER);
701
0
                fAfterPer = true;
702
0
                result.dimensionality = -1;
703
704
0
                token = nextToken(status);
705
0
                if (U_FAILURE(status)) {
706
0
                    return result;
707
0
                }
708
0
            }
709
0
        } else {
710
            // All other SingleUnit's are separated from previous SingleUnit's
711
            // via a compound part:
712
0
            if (token.getType() != Token::TYPE_COMPOUND_PART) {
713
0
                status = kUnitIdentifierSyntaxError;
714
0
                return result;
715
0
            }
716
717
0
            switch (token.getMatch()) {
718
0
            case COMPOUND_PART_PER:
719
0
                if (sawAnd) {
720
                    // Mixed compound units not yet supported,
721
                    // TODO(CLDR-13701).
722
0
                    status = kUnitIdentifierSyntaxError;
723
0
                    return result;
724
0
                }
725
0
                fAfterPer = true;
726
0
                result.dimensionality = -1;
727
0
                break;
728
729
0
            case COMPOUND_PART_TIMES:
730
0
                if (fAfterPer) {
731
0
                    result.dimensionality = -1;
732
0
                }
733
0
                break;
734
735
0
            case COMPOUND_PART_AND:
736
0
                if (fAfterPer) {
737
                    // Can't start with "-and-", and mixed compound units
738
                    // not yet supported, TODO(CLDR-13701).
739
0
                    status = kUnitIdentifierSyntaxError;
740
0
                    return result;
741
0
                }
742
0
                sawAnd = true;
743
0
                break;
744
0
            }
745
746
0
            token = nextToken(status);
747
0
            if (U_FAILURE(status)) {
748
0
                return result;
749
0
            }
750
0
        }
751
752
        // Read tokens until we have a complete SingleUnit or we reach the end.
753
0
        while (true) {
754
0
            switch (token.getType()) {
755
0
                case Token::TYPE_POWER_PART:
756
0
                    if (state > 0) {
757
0
                        status = kUnitIdentifierSyntaxError;
758
0
                        return result;
759
0
                    }
760
0
                    result.dimensionality *= token.getPower();
761
0
                    state = 1;
762
0
                    break;
763
764
0
                case Token::TYPE_PREFIX:
765
0
                    if (state > 1) {
766
0
                        status = kUnitIdentifierSyntaxError;
767
0
                        return result;
768
0
                    }
769
0
                    result.unitPrefix = token.getUnitPrefix();
770
0
                    state = 2;
771
0
                    break;
772
773
0
                case Token::TYPE_SIMPLE_UNIT:
774
0
                    result.index = token.getSimpleUnitIndex();
775
0
                    return result;
776
777
0
                default:
778
0
                    status = kUnitIdentifierSyntaxError;
779
0
                    return result;
780
0
            }
781
782
0
            if (!hasNext()) {
783
                // We ran out of tokens before finding a complete single unit.
784
0
                status = kUnitIdentifierSyntaxError;
785
0
                return result;
786
0
            }
787
0
            token = nextToken(status);
788
0
            if (U_FAILURE(status)) {
789
0
                return result;
790
0
            }
791
0
        }
792
793
0
        return result;
794
0
    }
795
};
796
797
// Sorting function wrapping SingleUnitImpl::compareTo for use with uprv_sortArray.
798
int32_t U_CALLCONV
799
0
compareSingleUnits(const void* /*context*/, const void* left, const void* right) {
800
0
    auto realLeft = static_cast<const SingleUnitImpl* const*>(left);
801
0
    auto realRight = static_cast<const SingleUnitImpl* const*>(right);
802
0
    return (*realLeft)->compareTo(**realRight);
803
0
}
804
805
// Returns an index into the gCategories array, for the "unitQuantity" (aka
806
// "type" or "category") associated with the given base unit identifier. Returns
807
// -1 on failure, together with U_UNSUPPORTED_ERROR.
808
0
int32_t getUnitCategoryIndex(BytesTrie &trie, StringPiece baseUnitIdentifier, UErrorCode &status) {
809
0
    UStringTrieResult result = trie.reset().next(baseUnitIdentifier.data(), baseUnitIdentifier.length());
810
0
    if (!USTRINGTRIE_HAS_VALUE(result)) {
811
0
        status = U_UNSUPPORTED_ERROR;
812
0
        return -1;
813
0
    }
814
815
0
    return trie.getValue();
816
0
}
817
818
} // namespace
819
820
U_CAPI int32_t U_EXPORT2
821
0
umeas_getPrefixPower(UMeasurePrefix unitPrefix) {
822
0
    if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
823
0
        unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
824
0
        return unitPrefix - UMEASURE_PREFIX_INTERNAL_ONE_BIN;
825
0
    }
826
0
    U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
827
0
             unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
828
0
    return unitPrefix - UMEASURE_PREFIX_ONE;
829
0
}
830
831
U_CAPI int32_t U_EXPORT2
832
0
umeas_getPrefixBase(UMeasurePrefix unitPrefix) {
833
0
    if (unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_BIN &&
834
0
        unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_BIN) {
835
0
        return 1024;
836
0
    }
837
0
    U_ASSERT(unitPrefix >= UMEASURE_PREFIX_INTERNAL_MIN_SI &&
838
0
             unitPrefix <= UMEASURE_PREFIX_INTERNAL_MAX_SI);
839
0
    return 10;
840
0
}
841
842
0
CharString U_I18N_API getUnitQuantity(const MeasureUnitImpl &baseMeasureUnitImpl, UErrorCode &status) {
843
0
    CharString result;
844
0
    MeasureUnitImpl baseUnitImpl = baseMeasureUnitImpl.copy(status);
845
0
    UErrorCode localStatus = U_ZERO_ERROR;
846
0
    umtx_initOnce(gUnitExtrasInitOnce, &initUnitExtras, status);
847
0
    if (U_FAILURE(status)) {
848
0
        return result;
849
0
    }
850
0
    BytesTrie trie(gSerializedUnitCategoriesTrie);
851
852
0
    baseUnitImpl.serialize(status);
853
0
    StringPiece identifier = baseUnitImpl.identifier.data();
854
0
    int32_t idx = getUnitCategoryIndex(trie, identifier, localStatus);
855
0
    if (U_FAILURE(status)) {
856
0
        return result;
857
0
    }
858
859
    // In case the base unit identifier did not match any entry.
860
0
    if (U_FAILURE(localStatus)) {
861
0
        localStatus = U_ZERO_ERROR;
862
0
        baseUnitImpl.takeReciprocal(status);
863
0
        baseUnitImpl.serialize(status);
864
0
        identifier.set(baseUnitImpl.identifier.data());
865
0
        idx = getUnitCategoryIndex(trie, identifier, localStatus);
866
867
0
        if (U_FAILURE(status)) {
868
0
            return result;
869
0
        }
870
0
    }
871
872
    // In case the reciprocal of the base unit identifier did not match any entry.
873
0
    MeasureUnitImpl simplifiedUnit = baseMeasureUnitImpl.copyAndSimplify(status);
874
0
    if (U_FAILURE(status)) {
875
0
        return result;
876
0
    }
877
0
    if (U_FAILURE(localStatus)) {
878
0
        localStatus = U_ZERO_ERROR;
879
0
        simplifiedUnit.serialize(status);
880
0
        identifier.set(simplifiedUnit.identifier.data());
881
0
        idx = getUnitCategoryIndex(trie, identifier, localStatus);
882
883
0
        if (U_FAILURE(status)) {
884
0
            return result;
885
0
        }
886
0
    }
887
888
    // In case the simplified base unit identifier did not match any entry.
889
0
    if (U_FAILURE(localStatus)) {
890
0
        localStatus = U_ZERO_ERROR;
891
0
        simplifiedUnit.takeReciprocal(status);
892
0
        simplifiedUnit.serialize(status);
893
0
        identifier.set(simplifiedUnit.identifier.data());
894
0
        idx = getUnitCategoryIndex(trie, identifier, localStatus);
895
896
0
        if (U_FAILURE(status)) {
897
0
            return result;
898
0
        }
899
0
    }
900
901
    // If there is no match at all, throw an exception.
902
0
    if (U_FAILURE(localStatus)) {
903
0
        status = U_INVALID_FORMAT_ERROR;
904
0
        return result;
905
0
    }
906
907
0
    if (idx < 0 || idx >= gCategoriesCount) {
908
0
        status = U_INVALID_FORMAT_ERROR;
909
0
        return result;
910
0
    }
911
912
0
    result.appendInvariantChars(gCategories[idx], u_strlen(gCategories[idx]), status);
913
0
    return result;
914
0
}
915
916
// In ICU4J, this is MeasureUnit.getSingleUnitImpl().
917
0
SingleUnitImpl SingleUnitImpl::forMeasureUnit(const MeasureUnit& measureUnit, UErrorCode& status) {
918
0
    MeasureUnitImpl temp;
919
0
    const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(measureUnit, temp, status);
920
0
    if (U_FAILURE(status)) {
921
0
        return {};
922
0
    }
923
0
    if (impl.singleUnits.length() == 0) {
924
0
        return {};
925
0
    }
926
0
    if (impl.singleUnits.length() == 1) {
927
0
        return *impl.singleUnits[0];
928
0
    }
929
0
    status = U_ILLEGAL_ARGUMENT_ERROR;
930
0
    return {};
931
0
}
932
933
0
MeasureUnit SingleUnitImpl::build(UErrorCode& status) const {
934
0
    MeasureUnitImpl temp;
935
0
    temp.appendSingleUnit(*this, status);
936
    // TODO(icu-units#28): the MeasureUnitImpl::build() method uses
937
    // findBySubtype, which is relatively slow.
938
    // - At the time of loading the simple unit IDs, we could also save a
939
    //   mapping to the builtin MeasureUnit type and subtype they correspond to.
940
    // - This method could then check dimensionality and index, and if both are
941
    //   1, directly return MeasureUnit instances very quickly.
942
0
    return std::move(temp).build(status);
943
0
}
944
945
0
const char *SingleUnitImpl::getSimpleUnitID() const {
946
0
    return gSimpleUnits[index];
947
0
}
948
949
0
void SingleUnitImpl::appendNeutralIdentifier(CharString &result, UErrorCode &status) const UPRV_NO_SANITIZE_UNDEFINED {
950
0
    int32_t absPower = std::abs(this->dimensionality);
951
952
0
    U_ASSERT(absPower > 0); // "this function does not support the dimensionless single units";
953
    
954
0
    if (absPower == 1) {
955
        // no-op
956
0
    } else if (absPower == 2) {
957
0
        result.append(StringPiece("square-"), status);
958
0
    } else if (absPower == 3) {
959
0
        result.append(StringPiece("cubic-"), status);
960
0
    } else if (absPower <= 15) {
961
0
        result.append(StringPiece("pow"), status);
962
0
        result.appendNumber(absPower, status);
963
0
        result.append(StringPiece("-"), status);
964
0
    } else {
965
0
        status = U_ILLEGAL_ARGUMENT_ERROR; // Unit Identifier Syntax Error
966
0
        return;
967
0
    }
968
969
0
    if (U_FAILURE(status)) {
970
0
        return;
971
0
    }
972
973
0
    if (this->unitPrefix != UMEASURE_PREFIX_ONE) {
974
0
        bool found = false;
975
0
        for (const auto &unitPrefixInfo : gUnitPrefixStrings) {
976
            // TODO: consider using binary search? If we do this, add a unit
977
            // test to ensure gUnitPrefixStrings is sorted?
978
0
            if (unitPrefixInfo.value == this->unitPrefix) {
979
0
                result.append(unitPrefixInfo.string, status);
980
0
                found = true;
981
0
                break;
982
0
            }
983
0
        }
984
0
        if (!found) {
985
0
            status = U_UNSUPPORTED_ERROR;
986
0
            return;
987
0
        }
988
0
    }
989
990
0
    result.append(StringPiece(this->getSimpleUnitID()), status);
991
0
}
992
993
0
int32_t SingleUnitImpl::getUnitCategoryIndex() const {
994
0
    return gSimpleUnitCategories[index];
995
0
}
996
997
0
MeasureUnitImpl::MeasureUnitImpl(const SingleUnitImpl &singleUnit, UErrorCode &status) {
998
0
    this->appendSingleUnit(singleUnit, status);
999
0
}
1000
1001
0
MeasureUnitImpl MeasureUnitImpl::forIdentifier(StringPiece identifier, UErrorCode& status) {
1002
0
    return Parser::from(identifier, status).parse(status);
1003
0
}
1004
1005
const MeasureUnitImpl& MeasureUnitImpl::forMeasureUnit(
1006
0
        const MeasureUnit& measureUnit, MeasureUnitImpl& memory, UErrorCode& status) {
1007
0
    if (measureUnit.fImpl) {
1008
0
        return *measureUnit.fImpl;
1009
0
    } else {
1010
0
        memory = Parser::from(measureUnit.getIdentifier(), status).parse(status);
1011
0
        return memory;
1012
0
    }
1013
0
}
1014
1015
MeasureUnitImpl MeasureUnitImpl::forMeasureUnitMaybeCopy(
1016
0
        const MeasureUnit& measureUnit, UErrorCode& status) {
1017
0
    if (measureUnit.fImpl) {
1018
0
        return measureUnit.fImpl->copy(status);
1019
0
    } else {
1020
0
        return Parser::from(measureUnit.getIdentifier(), status).parse(status);
1021
0
    }
1022
0
}
1023
1024
0
void MeasureUnitImpl::takeReciprocal(UErrorCode& /*status*/) {
1025
0
    identifier.clear();
1026
0
    for (int32_t i = 0; i < singleUnits.length(); i++) {
1027
0
        singleUnits[i]->dimensionality *= -1;
1028
0
    }
1029
0
}
1030
1031
0
MeasureUnitImpl MeasureUnitImpl::copyAndSimplify(UErrorCode &status) const {
1032
0
    MeasureUnitImpl result;
1033
0
    for (int32_t i = 0; i < singleUnits.length(); i++) {
1034
0
        const SingleUnitImpl &singleUnit = *this->singleUnits[i];
1035
        
1036
        // The following `for` loop will cause time complexity to be O(n^2).
1037
        // However, n is very small (number of units, generally, at maximum equal to 10)
1038
0
        bool unitExist = false;
1039
0
        for (int32_t j = 0; j < result.singleUnits.length(); j++) {
1040
0
            if (uprv_strcmp(result.singleUnits[j]->getSimpleUnitID(), singleUnit.getSimpleUnitID()) ==
1041
0
                    0 &&
1042
0
                result.singleUnits[j]->unitPrefix == singleUnit.unitPrefix) {
1043
0
                unitExist = true;
1044
0
                result.singleUnits[j]->dimensionality =
1045
0
                    result.singleUnits[j]->dimensionality + singleUnit.dimensionality;
1046
0
                break;
1047
0
            }
1048
0
        }
1049
1050
0
        if (!unitExist) {
1051
0
            result.appendSingleUnit(singleUnit, status);
1052
0
        }
1053
0
    }
1054
1055
0
    return result;
1056
0
}
1057
1058
0
bool MeasureUnitImpl::appendSingleUnit(const SingleUnitImpl &singleUnit, UErrorCode &status) {
1059
0
    identifier.clear();
1060
1061
0
    if (singleUnit.isDimensionless()) {
1062
        // Do not append dimensionless units.
1063
0
        return false;
1064
0
    }
1065
1066
    // Find a similar unit that already exists, to attempt to coalesce
1067
0
    SingleUnitImpl *oldUnit = nullptr;
1068
0
    for (int32_t i = 0; i < this->singleUnits.length(); i++) {
1069
0
        auto *candidate = this->singleUnits[i];
1070
0
        if (candidate->isCompatibleWith(singleUnit)) {
1071
0
            oldUnit = candidate;
1072
0
        }
1073
0
    }
1074
1075
0
    if (oldUnit) {
1076
        // Both dimensionalities will be positive, or both will be negative, by
1077
        // virtue of isCompatibleWith().
1078
0
        oldUnit->dimensionality += singleUnit.dimensionality;
1079
1080
0
        return false;
1081
0
    }
1082
1083
    // Add a copy of singleUnit
1084
    // NOTE: MaybeStackVector::emplaceBackAndCheckErrorCode creates new copy of  singleUnit.
1085
0
    this->singleUnits.emplaceBackAndCheckErrorCode(status, singleUnit);
1086
0
    if (U_FAILURE(status)) {
1087
0
        return false;
1088
0
    }
1089
1090
    // If the MeasureUnitImpl is `UMEASURE_UNIT_SINGLE` and after the appending a unit, the `singleUnits`
1091
    // contains more than one. thus means the complexity should be `UMEASURE_UNIT_COMPOUND`
1092
0
    if (this->singleUnits.length() > 1 &&
1093
0
        this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_SINGLE) {
1094
0
        this->complexity = UMeasureUnitComplexity::UMEASURE_UNIT_COMPOUND;
1095
0
    }
1096
1097
0
    return true;
1098
0
}
1099
1100
MaybeStackVector<MeasureUnitImplWithIndex>
1101
0
MeasureUnitImpl::extractIndividualUnitsWithIndices(UErrorCode &status) const {
1102
0
    MaybeStackVector<MeasureUnitImplWithIndex> result;
1103
1104
0
    if (this->complexity != UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
1105
0
        result.emplaceBackAndCheckErrorCode(status, 0, *this, status);
1106
0
        return result;
1107
0
    }
1108
1109
0
    for (int32_t i = 0; i < singleUnits.length(); ++i) {
1110
0
        result.emplaceBackAndCheckErrorCode(status, i, *singleUnits[i], status);
1111
0
        if (U_FAILURE(status)) {
1112
0
            return result;
1113
0
        }
1114
0
    }
1115
1116
0
    return result;
1117
0
}
1118
1119
/**
1120
 * Normalize a MeasureUnitImpl and generate the identifier string in place.
1121
 */
1122
0
void MeasureUnitImpl::serialize(UErrorCode &status) {
1123
0
    if (U_FAILURE(status)) {
1124
0
        return;
1125
0
    }
1126
1127
0
    if (this->singleUnits.length() == 0) {
1128
        // Dimensionless, constructed by the default constructor.
1129
0
        return;
1130
0
    }
1131
1132
0
    if (this->complexity == UMEASURE_UNIT_COMPOUND) {
1133
        // Note: don't sort a MIXED unit
1134
0
        uprv_sortArray(this->singleUnits.getAlias(), this->singleUnits.length(),
1135
0
                       sizeof(this->singleUnits[0]), compareSingleUnits, nullptr, false, &status);
1136
0
        if (U_FAILURE(status)) {
1137
0
            return;
1138
0
        }
1139
0
    }
1140
1141
0
    CharString result;
1142
0
    bool beforePer = true;
1143
0
    bool firstTimeNegativeDimension = false;
1144
0
    for (int32_t i = 0; i < this->singleUnits.length(); i++) {
1145
0
        if (beforePer && (*this->singleUnits[i]).dimensionality < 0) {
1146
0
            beforePer = false;
1147
0
            firstTimeNegativeDimension = true;
1148
0
        } else if ((*this->singleUnits[i]).dimensionality < 0) {
1149
0
            firstTimeNegativeDimension = false;
1150
0
        }
1151
1152
0
        if (U_FAILURE(status)) {
1153
0
            return;
1154
0
        }
1155
1156
0
        if (this->complexity == UMeasureUnitComplexity::UMEASURE_UNIT_MIXED) {
1157
0
            if (result.length() != 0) {
1158
0
                result.append(StringPiece("-and-"), status);
1159
0
            }
1160
0
        } else {
1161
0
            if (firstTimeNegativeDimension) {
1162
0
                if (result.length() == 0) {
1163
0
                    result.append(StringPiece("per-"), status);
1164
0
                } else {
1165
0
                    result.append(StringPiece("-per-"), status);
1166
0
                }
1167
0
            } else {
1168
0
                if (result.length() != 0) {
1169
0
                    result.append(StringPiece("-"), status);
1170
0
                }
1171
0
            }
1172
0
        }
1173
1174
0
        this->singleUnits[i]->appendNeutralIdentifier(result, status);
1175
0
    }
1176
1177
0
    this->identifier = CharString(result, status);
1178
0
}
1179
1180
0
MeasureUnit MeasureUnitImpl::build(UErrorCode& status) && {
1181
0
    this->serialize(status);
1182
0
    return MeasureUnit(std::move(*this));
1183
0
}
1184
1185
0
MeasureUnit MeasureUnit::forIdentifier(StringPiece identifier, UErrorCode& status) {
1186
0
    return Parser::from(identifier, status).parse(status).build(status);
1187
0
}
1188
1189
0
UMeasureUnitComplexity MeasureUnit::getComplexity(UErrorCode& status) const {
1190
0
    MeasureUnitImpl temp;
1191
0
    return MeasureUnitImpl::forMeasureUnit(*this, temp, status).complexity;
1192
0
}
1193
1194
0
UMeasurePrefix MeasureUnit::getPrefix(UErrorCode& status) const {
1195
0
    return SingleUnitImpl::forMeasureUnit(*this, status).unitPrefix;
1196
0
}
1197
1198
0
MeasureUnit MeasureUnit::withPrefix(UMeasurePrefix prefix, UErrorCode& status) const UPRV_NO_SANITIZE_UNDEFINED {
1199
0
    SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1200
0
    singleUnit.unitPrefix = prefix;
1201
0
    return singleUnit.build(status);
1202
0
}
1203
1204
0
int32_t MeasureUnit::getDimensionality(UErrorCode& status) const {
1205
0
    SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1206
0
    if (U_FAILURE(status)) { return 0; }
1207
0
    if (singleUnit.isDimensionless()) {
1208
0
        return 0;
1209
0
    }
1210
0
    return singleUnit.dimensionality;
1211
0
}
1212
1213
0
MeasureUnit MeasureUnit::withDimensionality(int32_t dimensionality, UErrorCode& status) const {
1214
0
    SingleUnitImpl singleUnit = SingleUnitImpl::forMeasureUnit(*this, status);
1215
0
    singleUnit.dimensionality = dimensionality;
1216
0
    return singleUnit.build(status);
1217
0
}
1218
1219
0
MeasureUnit MeasureUnit::reciprocal(UErrorCode& status) const {
1220
0
    MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
1221
0
    impl.takeReciprocal(status);
1222
0
    return std::move(impl).build(status);
1223
0
}
1224
1225
0
MeasureUnit MeasureUnit::product(const MeasureUnit& other, UErrorCode& status) const {
1226
0
    MeasureUnitImpl impl = MeasureUnitImpl::forMeasureUnitMaybeCopy(*this, status);
1227
0
    MeasureUnitImpl temp;
1228
0
    const MeasureUnitImpl& otherImpl = MeasureUnitImpl::forMeasureUnit(other, temp, status);
1229
0
    if (impl.complexity == UMEASURE_UNIT_MIXED || otherImpl.complexity == UMEASURE_UNIT_MIXED) {
1230
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
1231
0
        return {};
1232
0
    }
1233
0
    for (int32_t i = 0; i < otherImpl.singleUnits.length(); i++) {
1234
0
        impl.appendSingleUnit(*otherImpl.singleUnits[i], status);
1235
0
    }
1236
0
    if (impl.singleUnits.length() > 1) {
1237
0
        impl.complexity = UMEASURE_UNIT_COMPOUND;
1238
0
    }
1239
0
    return std::move(impl).build(status);
1240
0
}
1241
1242
0
LocalArray<MeasureUnit> MeasureUnit::splitToSingleUnitsImpl(int32_t& outCount, UErrorCode& status) const {
1243
0
    MeasureUnitImpl temp;
1244
0
    const MeasureUnitImpl& impl = MeasureUnitImpl::forMeasureUnit(*this, temp, status);
1245
0
    outCount = impl.singleUnits.length();
1246
0
    MeasureUnit* arr = new MeasureUnit[outCount];
1247
0
    if (arr == nullptr) {
1248
0
        status = U_MEMORY_ALLOCATION_ERROR;
1249
0
        return LocalArray<MeasureUnit>();
1250
0
    }
1251
0
    for (int32_t i = 0; i < outCount; i++) {
1252
0
        arr[i] = impl.singleUnits[i]->build(status);
1253
0
    }
1254
0
    return LocalArray<MeasureUnit>(arr, status);
1255
0
}
1256
1257
1258
U_NAMESPACE_END
1259
1260
#endif /* !UNCONFIG_NO_FORMATTING */