Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/number_longnames.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2017 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_FORMATTING
7
8
#include <cstdlib>
9
10
#include "unicode/simpleformatter.h"
11
#include "unicode/ures.h"
12
#include "ureslocs.h"
13
#include "charstr.h"
14
#include "uresimp.h"
15
#include "measunit_impl.h"
16
#include "number_longnames.h"
17
#include "number_microprops.h"
18
#include <algorithm>
19
#include "cstring.h"
20
#include "util.h"
21
22
using namespace icu;
23
using namespace icu::number;
24
using namespace icu::number::impl;
25
26
namespace {
27
28
/**
29
 * Display Name (this format has no placeholder).
30
 *
31
 * Used as an index into the LongNameHandler::simpleFormats array. Units
32
 * resources cover the normal set of PluralRules keys, as well as `dnam` and
33
 * `per` forms.
34
 */
35
constexpr int32_t DNAM_INDEX = StandardPlural::Form::COUNT;
36
/**
37
 * "per" form (e.g. "{0} per day" is day's "per" form).
38
 *
39
 * Used as an index into the LongNameHandler::simpleFormats array. Units
40
 * resources cover the normal set of PluralRules keys, as well as `dnam` and
41
 * `per` forms.
42
 */
43
constexpr int32_t PER_INDEX = StandardPlural::Form::COUNT + 1;
44
/**
45
 * Gender of the word, in languages with grammatical gender.
46
 */
47
constexpr int32_t GENDER_INDEX = StandardPlural::Form::COUNT + 2;
48
// Number of keys in the array populated by PluralTableSink.
49
constexpr int32_t ARRAY_LENGTH = StandardPlural::Form::COUNT + 3;
50
51
// TODO(icu-units#28): load this list from resources, after creating a "&set"
52
// function for use in ldml2icu rules.
53
const int32_t GENDER_COUNT = 7;
54
const char *gGenders[GENDER_COUNT] = {"animate",   "common", "feminine", "inanimate",
55
                                      "masculine", "neuter", "personal"};
56
57
// Converts a UnicodeString to a const char*, either pointing to a string in
58
// gGenders, or pointing to an empty string if an appropriate string was not
59
// found.
60
0
const char *getGenderString(UnicodeString uGender, UErrorCode status) {
61
0
    if (uGender.length() == 0) {
62
0
        return "";
63
0
    }
64
0
    CharString gender;
65
0
    gender.appendInvariantChars(uGender, status);
66
0
    if (U_FAILURE(status)) {
67
0
        return "";
68
0
    }
69
0
    int32_t first = 0;
70
0
    int32_t last = GENDER_COUNT;
71
0
    while (first < last) {
72
0
        int32_t mid = (first + last) / 2;
73
0
        int32_t cmp = uprv_strcmp(gender.data(), gGenders[mid]);
74
0
        if (cmp == 0) {
75
0
            return gGenders[mid];
76
0
        } else if (cmp > 0) {
77
0
            first = mid + 1;
78
0
        } else if (cmp < 0) {
79
0
            last = mid;
80
0
        }
81
0
    }
82
    // We don't return an error in case our gGenders list is incomplete in
83
    // production.
84
    //
85
    // TODO(icu-units#28): a unit test checking all locales' genders are covered
86
    // by gGenders? Else load a complete list of genders found in
87
    // grammaticalFeatures in an initOnce.
88
0
    return "";
89
0
}
90
91
// Returns the array index that corresponds to the given pluralKeyword.
92
0
static int32_t getIndex(const char* pluralKeyword, UErrorCode& status) {
93
    // pluralKeyword can also be "dnam", "per", or "gender"
94
0
    switch (*pluralKeyword) {
95
0
    case 'd':
96
0
        if (uprv_strcmp(pluralKeyword + 1, "nam") == 0) {
97
0
            return DNAM_INDEX;
98
0
        }
99
0
        break;
100
0
    case 'g':
101
0
        if (uprv_strcmp(pluralKeyword + 1, "ender") == 0) {
102
0
            return GENDER_INDEX;
103
0
        }
104
0
        break;
105
0
    case 'p':
106
0
        if (uprv_strcmp(pluralKeyword + 1, "er") == 0) {
107
0
            return PER_INDEX;
108
0
        }
109
0
        break;
110
0
    default:
111
0
        break;
112
0
    }
113
0
    StandardPlural::Form plural = StandardPlural::fromString(pluralKeyword, status);
114
0
    return plural;
115
0
}
116
117
// Selects a string out of the `strings` array which corresponds to the
118
// specified plural form, with fallback to the OTHER form.
119
//
120
// The `strings` array must have ARRAY_LENGTH items: one corresponding to each
121
// of the plural forms, plus a display name ("dnam") and a "per" form.
122
static UnicodeString getWithPlural(
123
        const UnicodeString* strings,
124
        StandardPlural::Form plural,
125
0
        UErrorCode& status) {
126
0
    UnicodeString result = strings[plural];
127
0
    if (result.isBogus()) {
128
0
        result = strings[StandardPlural::Form::OTHER];
129
0
    }
130
0
    if (result.isBogus()) {
131
        // There should always be data in the "other" plural variant.
132
0
        status = U_INTERNAL_PROGRAM_ERROR;
133
0
    }
134
0
    return result;
135
0
}
136
137
enum PlaceholderPosition { PH_EMPTY, PH_NONE, PH_BEGINNING, PH_MIDDLE, PH_END };
138
139
/**
140
 * Returns three outputs extracted from pattern.
141
 *
142
 * @param coreUnit is extracted as per Extract(...) in the spec:
143
 *   https://unicode.org/reports/tr35/tr35-general.html#compound-units
144
 * @param PlaceholderPosition indicates where in the string the placeholder was
145
 *   found.
146
 * @param joinerChar Iff the placeholder was at the beginning or end, joinerChar
147
 *   contains the space character (if any) that separated the placeholder from
148
 *   the rest of the pattern. Otherwise, joinerChar is set to NUL. Only one
149
 *   space character is considered.
150
 */
151
void extractCorePattern(const UnicodeString &pattern,
152
                        UnicodeString &coreUnit,
153
                        PlaceholderPosition &placeholderPosition,
154
0
                        UChar &joinerChar) {
155
0
    joinerChar = 0;
156
0
    int32_t len = pattern.length();
157
0
    if (pattern.startsWith(u"{0}", 3)) {
158
0
        placeholderPosition = PH_BEGINNING;
159
0
        if (u_isJavaSpaceChar(pattern[3])) {
160
0
            joinerChar = pattern[3];
161
0
            coreUnit.setTo(pattern, 4, len - 4);
162
0
        } else {
163
0
            coreUnit.setTo(pattern, 3, len - 3);
164
0
        }
165
0
    } else if (pattern.endsWith(u"{0}", 3)) {
166
0
        placeholderPosition = PH_END;
167
0
        if (u_isJavaSpaceChar(pattern[len - 4])) {
168
0
            coreUnit.setTo(pattern, 0, len - 4);
169
0
            joinerChar = pattern[len - 4];
170
0
        } else {
171
0
            coreUnit.setTo(pattern, 0, len - 3);
172
0
        }
173
0
    } else if (pattern.indexOf(u"{0}", 3, 1, len - 2) == -1) {
174
0
        placeholderPosition = PH_NONE;
175
0
        coreUnit = pattern;
176
0
    } else {
177
0
        placeholderPosition = PH_MIDDLE;
178
0
        coreUnit = pattern;
179
0
    }
180
0
}
181
182
//////////////////////////
183
/// BEGIN DATA LOADING ///
184
//////////////////////////
185
186
// Gets the gender of a built-in unit: unit must be a built-in. Returns an empty
187
// string both in case of unknown gender and in case of unknown unit.
188
UnicodeString
189
0
getGenderForBuiltin(const Locale &locale, const MeasureUnit &builtinUnit, UErrorCode &status) {
190
0
    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
191
0
    if (U_FAILURE(status)) { return {}; }
192
193
    // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
194
    // TODO(ICU-20400): Get duration-*-person data properly with aliases.
195
0
    StringPiece subtypeForResource;
196
0
    int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(builtinUnit.getSubtype()));
197
0
    if (subtypeLen > 7 && uprv_strcmp(builtinUnit.getSubtype() + subtypeLen - 7, "-person") == 0) {
198
0
        subtypeForResource = {builtinUnit.getSubtype(), subtypeLen - 7};
199
0
    } else {
200
0
        subtypeForResource = builtinUnit.getSubtype();
201
0
    }
202
203
0
    CharString key;
204
0
    key.append("units/", status);
205
0
    key.append(builtinUnit.getType(), status);
206
0
    key.append("/", status);
207
0
    key.append(subtypeForResource, status);
208
0
    key.append("/gender", status);
209
210
0
    UErrorCode localStatus = status;
211
0
    int32_t resultLen = 0;
212
0
    const UChar *result =
213
0
        ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &resultLen, &localStatus);
214
0
    if (U_SUCCESS(localStatus)) {
215
0
        status = localStatus;
216
0
        return UnicodeString(true, result, resultLen);
217
0
    } else {
218
        // TODO(icu-units#28): "$unitRes/gender" does not exist. Do we want to
219
        // check whether the parent "$unitRes" exists? Then we could return
220
        // U_MISSING_RESOURCE_ERROR for incorrect usage (e.g. builtinUnit not
221
        // being a builtin).
222
0
        return {};
223
0
    }
224
0
}
225
226
// Loads data from a resource tree with paths matching
227
// $key/$pluralForm/$gender/$case, with lateral inheritance for missing cases
228
// and genders.
229
//
230
// An InflectedPluralSink is configured to load data for a specific gender and
231
// case. It loads all plural forms, because selection between plural forms is
232
// dependent upon the value being formatted.
233
//
234
// See data/unit/de.txt and data/unit/fr.txt for examples - take a look at
235
// units/compound/power2: German has case, French has differences for gender,
236
// but no case.
237
//
238
// TODO(icu-units#138): Conceptually similar to PluralTableSink, however the
239
// tree structures are different. After homogenizing the structures, we may be
240
// able to unify the two classes.
241
//
242
// TODO: Spec violation: expects presence of "count" - does not fallback to an
243
// absent "count"! If this fallback were added, getCompoundValue could be
244
// superseded?
245
class InflectedPluralSink : public ResourceSink {
246
  public:
247
    // Accepts `char*` rather than StringPiece because
248
    // ResourceTable::findValue(...) requires a null-terminated `char*`.
249
    //
250
    // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
251
    // checking is performed.
252
    explicit InflectedPluralSink(const char *gender, const char *caseVariant, UnicodeString *outArray)
253
0
        : gender(gender), caseVariant(caseVariant), outArray(outArray) {
254
        // Initialize the array to bogus strings.
255
0
        for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
256
0
            outArray[i].setToBogus();
257
0
        }
258
0
    }
259
260
    // See ResourceSink::put().
261
0
    void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
262
0
        ResourceTable pluralsTable = value.getTable(status);
263
0
        if (U_FAILURE(status)) { return; }
264
0
        for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) {
265
0
            int32_t pluralIndex = getIndex(key, status);
266
0
            if (U_FAILURE(status)) { return; }
267
0
            if (!outArray[pluralIndex].isBogus()) {
268
                // We already have a pattern
269
0
                continue;
270
0
            }
271
0
            ResourceTable genderTable = value.getTable(status);
272
0
            ResourceTable caseTable; // This instance has to outlive `value`
273
0
            if (loadForPluralForm(genderTable, caseTable, value, status)) {
274
0
                outArray[pluralIndex] = value.getUnicodeString(status);
275
0
            }
276
0
        }
277
0
    }
278
279
  private:
280
    // Tries to load data for the configured gender from `genderTable`. Returns
281
    // true if found, returning the data in `value`. The returned data will be
282
    // for the configured gender if found, falling back to "neuter" and
283
    // no-gender if not. The caseTable parameter holds the intermediate
284
    // ResourceTable for the sake of lifetime management.
285
    bool loadForPluralForm(const ResourceTable &genderTable,
286
                           ResourceTable &caseTable,
287
                           ResourceValue &value,
288
0
                           UErrorCode &status) {
289
0
        if (uprv_strcmp(gender, "") != 0) {
290
0
            if (loadForGender(genderTable, gender, caseTable, value, status)) {
291
0
                return true;
292
0
            }
293
0
            if (uprv_strcmp(gender, "neuter") != 0 &&
294
0
                loadForGender(genderTable, "neuter", caseTable, value, status)) {
295
0
                return true;
296
0
            }
297
0
        }
298
0
        if (loadForGender(genderTable, "_", caseTable, value, status)) {
299
0
            return true;
300
0
        }
301
0
        return false;
302
0
    }
303
304
    // Tries to load data for the given gender from `genderTable`. Returns true
305
    // if found, returning the data in `value`. The returned data will be for
306
    // the configured case if found, falling back to "nominative" and no-case if
307
    // not.
308
    bool loadForGender(const ResourceTable &genderTable,
309
                       const char *genderVal,
310
                       ResourceTable &caseTable,
311
                       ResourceValue &value,
312
0
                       UErrorCode &status) {
313
0
        if (!genderTable.findValue(genderVal, value)) {
314
0
            return false;
315
0
        }
316
0
        caseTable = value.getTable(status);
317
0
        if (uprv_strcmp(caseVariant, "") != 0) {
318
0
            if (loadForCase(caseTable, caseVariant, value)) {
319
0
                return true;
320
0
            }
321
0
            if (uprv_strcmp(caseVariant, "nominative") != 0 &&
322
0
                loadForCase(caseTable, "nominative", value)) {
323
0
                return true;
324
0
            }
325
0
        }
326
0
        if (loadForCase(caseTable, "_", value)) {
327
0
            return true;
328
0
        }
329
0
        return false;
330
0
    }
331
332
    // Tries to load data for the given case from `caseTable`. Returns true if
333
    // found, returning the data in `value`.
334
0
    bool loadForCase(const ResourceTable &caseTable, const char *caseValue, ResourceValue &value) {
335
0
        if (!caseTable.findValue(caseValue, value)) {
336
0
            return false;
337
0
        }
338
0
        return true;
339
0
    }
340
341
    const char *gender;
342
    const char *caseVariant;
343
    UnicodeString *outArray;
344
};
345
346
// Fetches localised formatting patterns for the given subKey. See documentation
347
// for InflectedPluralSink for details.
348
//
349
// Data is loaded for the appropriate unit width, with missing data filled in
350
// from unitsShort.
351
void getInflectedMeasureData(StringPiece subKey,
352
                             const Locale &locale,
353
                             const UNumberUnitWidth &width,
354
                             const char *gender,
355
                             const char *caseVariant,
356
                             UnicodeString *outArray,
357
0
                             UErrorCode &status) {
358
0
    InflectedPluralSink sink(gender, caseVariant, outArray);
359
0
    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
360
0
    if (U_FAILURE(status)) { return; }
361
362
0
    CharString key;
363
0
    key.append("units", status);
364
0
    if (width == UNUM_UNIT_WIDTH_NARROW) {
365
0
        key.append("Narrow", status);
366
0
    } else if (width == UNUM_UNIT_WIDTH_SHORT) {
367
0
        key.append("Short", status);
368
0
    }
369
0
    key.append("/", status);
370
0
    key.append(subKey, status);
371
372
0
    UErrorCode localStatus = status;
373
0
    ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
374
0
    if (width == UNUM_UNIT_WIDTH_SHORT) {
375
0
        status = localStatus;
376
0
        return;
377
0
    }
378
379
    // TODO(ICU-13353): The fallback to short does not work in ICU4C.
380
    // Manually fall back to short (this is done automatically in Java).
381
0
    key.clear();
382
0
    key.append("unitsShort/", status);
383
0
    key.append(subKey, status);
384
0
    ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status);
385
0
}
386
387
class PluralTableSink : public ResourceSink {
388
  public:
389
    // NOTE: outArray MUST have a length of at least ARRAY_LENGTH. No bounds
390
    // checking is performed.
391
0
    explicit PluralTableSink(UnicodeString *outArray) : outArray(outArray) {
392
        // Initialize the array to bogus strings.
393
0
        for (int32_t i = 0; i < ARRAY_LENGTH; i++) {
394
0
            outArray[i].setToBogus();
395
0
        }
396
0
    }
397
398
0
    void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) U_OVERRIDE {
399
0
        ResourceTable pluralsTable = value.getTable(status);
400
0
        if (U_FAILURE(status)) { return; }
401
0
        for (int32_t i = 0; pluralsTable.getKeyAndValue(i, key, value); ++i) {
402
0
            if (uprv_strcmp(key, "case") == 0) {
403
0
                continue;
404
0
            }
405
0
            int32_t index = getIndex(key, status);
406
0
            if (U_FAILURE(status)) { return; }
407
0
            if (!outArray[index].isBogus()) {
408
0
                continue;
409
0
            }
410
0
            outArray[index] = value.getUnicodeString(status);
411
0
            if (U_FAILURE(status)) { return; }
412
0
        }
413
0
    }
414
415
  private:
416
    UnicodeString *outArray;
417
};
418
419
/**
420
 * Populates outArray with `locale`-specific values for `unit` through use of
421
 * PluralTableSink. Only the set of basic units are supported!
422
 *
423
 * Reading from resources *unitsNarrow* and *unitsShort* (for width
424
 * UNUM_UNIT_WIDTH_NARROW), or just *unitsShort* (for width
425
 * UNUM_UNIT_WIDTH_SHORT). For other widths, it reads just "units".
426
 *
427
 * @param unit must be a built-in unit, i.e. must have a type and subtype,
428
 *     listed in gTypes and gSubTypes in measunit.cpp.
429
 * @param unitDisplayCase the empty string and "nominative" are treated the
430
 *     same. For other cases, strings for the requested case are used if found.
431
 *     (For any missing case-specific data, we fall back to nominative.)
432
 * @param outArray must be of fixed length ARRAY_LENGTH.
433
 */
434
void getMeasureData(const Locale &locale,
435
                    const MeasureUnit &unit,
436
                    const UNumberUnitWidth &width,
437
                    const char *unitDisplayCase,
438
                    UnicodeString *outArray,
439
0
                    UErrorCode &status) {
440
0
    PluralTableSink sink(outArray);
441
0
    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
442
0
    if (U_FAILURE(status)) { return; }
443
444
0
    CharString subKey;
445
0
    subKey.append("/", status);
446
0
    subKey.append(unit.getType(), status);
447
0
    subKey.append("/", status);
448
449
    // Map duration-year-person, duration-week-person, etc. to duration-year, duration-week, ...
450
    // TODO(ICU-20400): Get duration-*-person data properly with aliases.
451
0
    int32_t subtypeLen = static_cast<int32_t>(uprv_strlen(unit.getSubtype()));
452
0
    if (subtypeLen > 7 && uprv_strcmp(unit.getSubtype() + subtypeLen - 7, "-person") == 0) {
453
0
        subKey.append({unit.getSubtype(), subtypeLen - 7}, status);
454
0
    } else {
455
0
        subKey.append({unit.getSubtype(), subtypeLen}, status);
456
0
    }
457
458
0
    if (width != UNUM_UNIT_WIDTH_FULL_NAME) {
459
0
        UErrorCode localStatus = status;
460
0
        CharString genderKey;
461
0
        genderKey.append("units", localStatus);
462
0
        genderKey.append(subKey, localStatus);
463
0
        genderKey.append("/gender", localStatus);
464
0
        StackUResourceBundle fillIn;
465
0
        ures_getByKeyWithFallback(unitsBundle.getAlias(), genderKey.data(), fillIn.getAlias(),
466
0
                                  &localStatus);
467
0
        outArray[GENDER_INDEX] = ures_getUnicodeString(fillIn.getAlias(), &localStatus);
468
0
    }
469
470
0
    CharString key;
471
0
    key.append("units", status);
472
0
    if (width == UNUM_UNIT_WIDTH_NARROW) {
473
0
        key.append("Narrow", status);
474
0
    } else if (width == UNUM_UNIT_WIDTH_SHORT) {
475
0
        key.append("Short", status);
476
0
    }
477
0
    key.append(subKey, status);
478
479
    // Grab desired case first, if available. Then grab no-case data to fill in
480
    // the gaps.
481
0
    if (width == UNUM_UNIT_WIDTH_FULL_NAME && unitDisplayCase[0] != 0) {
482
0
        CharString caseKey;
483
0
        caseKey.append(key, status);
484
0
        caseKey.append("/case/", status);
485
0
        caseKey.append(unitDisplayCase, status);
486
487
0
        UErrorCode localStatus = U_ZERO_ERROR;
488
        // TODO(icu-units#138): our fallback logic is not spec-compliant:
489
        // lateral fallback should happen before locale fallback. Switch to
490
        // getInflectedMeasureData after homogenizing data format? Find a unit
491
        // test case that demonstrates the incorrect fallback logic (via
492
        // regional variant of an inflected language?)
493
0
        ures_getAllItemsWithFallback(unitsBundle.getAlias(), caseKey.data(), sink, localStatus);
494
0
    }
495
496
    // TODO(icu-units#138): our fallback logic is not spec-compliant: we
497
    // check the given case, then go straight to the no-case data. The spec
498
    // states we should first look for case="nominative". As part of #138,
499
    // either get the spec changed, or add unit tests that warn us if
500
    // case="nominative" data differs from no-case data?
501
0
    UErrorCode localStatus = U_ZERO_ERROR;
502
0
    ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, localStatus);
503
0
    if (width == UNUM_UNIT_WIDTH_SHORT) {
504
0
        if (U_FAILURE(localStatus)) {
505
0
            status = localStatus;
506
0
        }
507
0
        return;
508
0
    }
509
510
    // TODO(ICU-13353): The fallback to short does not work in ICU4C.
511
    // Manually fall back to short (this is done automatically in Java).
512
0
    key.clear();
513
0
    key.append("unitsShort", status);
514
0
    key.append(subKey, status);
515
0
    ures_getAllItemsWithFallback(unitsBundle.getAlias(), key.data(), sink, status);
516
0
}
517
518
// NOTE: outArray MUST have a length of at least ARRAY_LENGTH.
519
void getCurrencyLongNameData(const Locale &locale, const CurrencyUnit &currency, UnicodeString *outArray,
520
0
                             UErrorCode &status) {
521
    // In ICU4J, this method gets a CurrencyData from CurrencyData.provider.
522
    // TODO(ICU4J): Implement this without going through CurrencyData, like in ICU4C?
523
0
    PluralTableSink sink(outArray);
524
0
    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_CURR, locale.getName(), &status));
525
0
    if (U_FAILURE(status)) { return; }
526
0
    ures_getAllItemsWithFallback(unitsBundle.getAlias(), "CurrencyUnitPatterns", sink, status);
527
0
    if (U_FAILURE(status)) { return; }
528
0
    for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
529
0
        UnicodeString &pattern = outArray[i];
530
0
        if (pattern.isBogus()) {
531
0
            continue;
532
0
        }
533
0
        int32_t longNameLen = 0;
534
0
        const char16_t *longName = ucurr_getPluralName(
535
0
                currency.getISOCurrency(),
536
0
                locale.getName(),
537
0
                nullptr /* isChoiceFormat */,
538
0
                StandardPlural::getKeyword(static_cast<StandardPlural::Form>(i)),
539
0
                &longNameLen,
540
0
                &status);
541
        // Example pattern from data: "{0} {1}"
542
        // Example output after find-and-replace: "{0} US dollars"
543
0
        pattern.findAndReplace(UnicodeString(u"{1}"), UnicodeString(longName, longNameLen));
544
0
    }
545
0
}
546
547
UnicodeString getCompoundValue(StringPiece compoundKey,
548
                               const Locale &locale,
549
                               const UNumberUnitWidth &width,
550
0
                               UErrorCode &status) {
551
0
    LocalUResourceBundlePointer unitsBundle(ures_open(U_ICUDATA_UNIT, locale.getName(), &status));
552
0
    if (U_FAILURE(status)) { return {}; }
553
0
    CharString key;
554
0
    key.append("units", status);
555
0
    if (width == UNUM_UNIT_WIDTH_NARROW) {
556
0
        key.append("Narrow", status);
557
0
    } else if (width == UNUM_UNIT_WIDTH_SHORT) {
558
0
        key.append("Short", status);
559
0
    }
560
0
    key.append("/compound/", status);
561
0
    key.append(compoundKey, status);
562
563
0
    UErrorCode localStatus = status;
564
0
    int32_t len = 0;
565
0
    const UChar *ptr =
566
0
        ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &localStatus);
567
0
    if (U_FAILURE(localStatus) && width != UNUM_UNIT_WIDTH_SHORT) {
568
        // Fall back to short, which contains more compound data
569
0
        key.clear();
570
0
        key.append("unitsShort/compound/", status);
571
0
        key.append(compoundKey, status);
572
0
        ptr = ures_getStringByKeyWithFallback(unitsBundle.getAlias(), key.data(), &len, &status);
573
0
    } else {
574
0
        status = localStatus;
575
0
    }
576
0
    if (U_FAILURE(status)) {
577
0
        return {};
578
0
    }
579
0
    return UnicodeString(ptr, len);
580
0
}
581
582
/**
583
 * Loads and applies deriveComponent rules from CLDR's grammaticalFeatures.xml.
584
 *
585
 * Consider a deriveComponent rule that looks like this:
586
 *
587
 *     <deriveComponent feature="case" structure="per" value0="compound" value1="nominative"/>
588
 *
589
 * Instantiating an instance as follows:
590
 *
591
 *     DerivedComponents d(loc, "case", "per");
592
 *
593
 * Applying the rule in the XML element above, `d.value0("foo")` will be "foo",
594
 * and `d.value1("foo")` will be "nominative".
595
 *
596
 * The values returned by value0(...) and value1(...) are valid only while the
597
 * instance exists. In case of any kind of failure, value0(...) and value1(...)
598
 * will return "".
599
 */
600
class DerivedComponents {
601
  public:
602
    /**
603
     * Constructor.
604
     *
605
     * The feature and structure parameters must be null-terminated. The string
606
     * referenced by compoundValue must exist for longer than the
607
     * DerivedComponents instance.
608
     */
609
0
    DerivedComponents(const Locale &locale, const char *feature, const char *structure) {
610
0
        StackUResourceBundle derivationsBundle, stackBundle;
611
0
        ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
612
0
        ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
613
0
                      &status);
614
0
        ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(),
615
0
                      &status);
616
0
        if (U_FAILURE(status)) {
617
0
            return;
618
0
        }
619
0
        UErrorCode localStatus = U_ZERO_ERROR;
620
        // TODO(icu-units#28): use standard normal locale resolution algorithms
621
        // rather than just grabbing language:
622
0
        ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(),
623
0
                      &localStatus);
624
        // TODO(icu-units#28):
625
        // - code currently assumes if the locale exists, the rules are there -
626
        //   instead of falling back to root when the requested rule is missing.
627
        // - investigate ures.h functions, see if one that uses res_findResource()
628
        //   might be better (or use res_findResource directly), or maybe help
629
        //   improve ures documentation to guide function selection?
630
0
        if (localStatus == U_MISSING_RESOURCE_ERROR) {
631
0
            ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
632
0
        } else {
633
0
            status = localStatus;
634
0
        }
635
0
        ures_getByKey(stackBundle.getAlias(), "component", stackBundle.getAlias(), &status);
636
0
        ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
637
0
        ures_getByKey(stackBundle.getAlias(), structure, stackBundle.getAlias(), &status);
638
0
        UnicodeString val0 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 0, &status);
639
0
        UnicodeString val1 = ures_getUnicodeStringByIndex(stackBundle.getAlias(), 1, &status);
640
0
        if (U_SUCCESS(status)) {
641
0
            if (val0.compare(UnicodeString(u"compound")) == 0) {
642
0
                compound0_ = true;
643
0
            } else {
644
0
                compound0_ = false;
645
0
                value0_.appendInvariantChars(val0, status);
646
0
            }
647
0
            if (val1.compare(UnicodeString(u"compound")) == 0) {
648
0
                compound1_ = true;
649
0
            } else {
650
0
                compound1_ = false;
651
0
                value1_.appendInvariantChars(val1, status);
652
0
            }
653
0
        }
654
0
    }
655
656
    // Returns a StringPiece that is only valid as long as the instance exists.
657
0
    StringPiece value0(const StringPiece compoundValue) const {
658
0
        return compound0_ ? compoundValue : value0_.toStringPiece();
659
0
    }
660
661
    // Returns a StringPiece that is only valid as long as the instance exists.
662
0
    StringPiece value1(const StringPiece compoundValue) const {
663
0
        return compound1_ ? compoundValue : value1_.toStringPiece();
664
0
    }
665
666
    // Returns a char* that is only valid as long as the instance exists.
667
0
    const char *value0(const char *compoundValue) const {
668
0
        return compound0_ ? compoundValue : value0_.data();
669
0
    }
670
671
    // Returns a char* that is only valid as long as the instance exists.
672
0
    const char *value1(const char *compoundValue) const {
673
0
        return compound1_ ? compoundValue : value1_.data();
674
0
    }
675
676
  private:
677
    UErrorCode status = U_ZERO_ERROR;
678
679
    // Holds strings referred to by value0 and value1;
680
    bool compound0_ = false, compound1_ = false;
681
    CharString value0_, value1_;
682
};
683
684
// TODO(icu-units#28): test somehow? Associate with an ICU ticket for adding
685
// testsuite support for testing with synthetic data?
686
/**
687
 * Loads and returns the value in rules that look like these:
688
 *
689
 * <deriveCompound feature="gender" structure="per" value="0"/>
690
 * <deriveCompound feature="gender" structure="times" value="1"/>
691
 *
692
 * Currently a fake example, but spec compliant:
693
 * <deriveCompound feature="gender" structure="power" value="feminine"/>
694
 *
695
 * NOTE: If U_FAILURE(status), returns an empty string.
696
 */ 
697
UnicodeString
698
0
getDeriveCompoundRule(Locale locale, const char *feature, const char *structure, UErrorCode &status) {
699
0
    StackUResourceBundle derivationsBundle, stackBundle;
700
0
    ures_openDirectFillIn(derivationsBundle.getAlias(), NULL, "grammaticalFeatures", &status);
701
0
    ures_getByKey(derivationsBundle.getAlias(), "grammaticalData", derivationsBundle.getAlias(),
702
0
                  &status);
703
0
    ures_getByKey(derivationsBundle.getAlias(), "derivations", derivationsBundle.getAlias(), &status);
704
    // TODO: use standard normal locale resolution algorithms rather than just grabbing language:
705
0
    ures_getByKey(derivationsBundle.getAlias(), locale.getLanguage(), stackBundle.getAlias(), &status);
706
    // TODO:
707
    // - code currently assumes if the locale exists, the rules are there -
708
    //   instead of falling back to root when the requested rule is missing.
709
    // - investigate ures.h functions, see if one that uses res_findResource()
710
    //   might be better (or use res_findResource directly), or maybe help
711
    //   improve ures documentation to guide function selection?
712
0
    if (status == U_MISSING_RESOURCE_ERROR) {
713
0
        status = U_ZERO_ERROR;
714
0
        ures_getByKey(derivationsBundle.getAlias(), "root", stackBundle.getAlias(), &status);
715
0
    }
716
0
    ures_getByKey(stackBundle.getAlias(), "compound", stackBundle.getAlias(), &status);
717
0
    ures_getByKey(stackBundle.getAlias(), feature, stackBundle.getAlias(), &status);
718
0
    UnicodeString uVal = ures_getUnicodeStringByKey(stackBundle.getAlias(), structure, &status);
719
0
    if (U_FAILURE(status)) {
720
0
        return {};
721
0
    }
722
0
    U_ASSERT(!uVal.isBogus());
723
0
    return uVal;
724
0
}
725
726
// Returns the gender string for structures following these rules:
727
//
728
// <deriveCompound feature="gender" structure="per" value="0"/>
729
// <deriveCompound feature="gender" structure="times" value="1"/>
730
//
731
// Fake example:
732
// <deriveCompound feature="gender" structure="power" value="feminine"/>
733
//
734
// data0 and data1 should be pattern arrays (UnicodeString[ARRAY_SIZE]) that
735
// correspond to value="0" and value="1".
736
//
737
// Pass a nullptr to data1 if the structure has no concept of value="1" (e.g.
738
// "prefix" doesn't).
739
UnicodeString getDerivedGender(Locale locale,
740
                               const char *structure,
741
                               UnicodeString *data0,
742
                               UnicodeString *data1,
743
0
                               UErrorCode &status) {
744
0
    UnicodeString val = getDeriveCompoundRule(locale, "gender", structure, status);
745
0
    if (val.length() == 1) {
746
0
        switch (val[0]) {
747
0
        case u'0':
748
0
            return data0[GENDER_INDEX];
749
0
        case u'1':
750
0
            if (data1 == nullptr) {
751
0
                return {};
752
0
            }
753
0
            return data1[GENDER_INDEX];
754
0
        }
755
0
    }
756
0
    return val;
757
0
}
758
759
////////////////////////
760
/// END DATA LOADING ///
761
////////////////////////
762
763
// TODO: promote this somewhere? It's based on patternprops.cpp' trimWhitespace
764
0
const UChar *trimSpaceChars(const UChar *s, int32_t &length) {
765
0
    if (length <= 0 || (!u_isJavaSpaceChar(s[0]) && !u_isJavaSpaceChar(s[length - 1]))) {
766
0
        return s;
767
0
    }
768
0
    int32_t start = 0;
769
0
    int32_t limit = length;
770
0
    while (start < limit && u_isJavaSpaceChar(s[start])) {
771
0
        ++start;
772
0
    }
773
0
    if (start < limit) {
774
        // There is non-white space at start; we will not move limit below that,
775
        // so we need not test start<limit in the loop.
776
0
        while (u_isJavaSpaceChar(s[limit - 1])) {
777
0
            --limit;
778
0
        }
779
0
    }
780
0
    length = limit - start;
781
0
    return s + start;
782
0
}
783
784
/**
785
 * Calculates the gender of an arbitrary unit: this is the *second*
786
 * implementation of an algorithm to do this:
787
 *
788
 * Gender is also calculated in "processPatternTimes": that code path is "bottom
789
 * up", loading the gender for every component of a compound unit (at the same
790
 * time as loading the Long Names formatting patterns), even if the gender is
791
 * unneeded, then combining the single units' genders into the compound unit's
792
 * gender, according to the rules. This algorithm does a lazier "top-down"
793
 * evaluation, starting with the compound unit, calculating which single unit's
794
 * gender is needed by breaking it down according to the rules, and then loading
795
 * only the gender of the one single unit who's gender is needed.
796
 *
797
 * For future refactorings:
798
 * 1. we could drop processPatternTimes' gender calculation and just call this
799
 *    function: for UNUM_UNIT_WIDTH_FULL_NAME, the unit gender is in the very
800
 *    same table as the formatting patterns, so loading it then may be
801
 *    efficient. For other unit widths however, it needs to be explicitly looked
802
 *    up anyway.
803
 * 2. alternatively, if CLDR is providing all the genders we need such that we
804
 *    don't need to calculate them in ICU anymore, we could drop this function
805
 *    and keep only processPatternTimes' calculation. (And optimise it a bit?)
806
 *
807
 * @param locale The desired locale.
808
 * @param unit The measure unit to calculate the gender for.
809
 * @return The gender string for the unit, or an empty string if unknown or
810
 *     ungendered.
811
 */
812
0
UnicodeString calculateGenderForUnit(const Locale &locale, const MeasureUnit &unit, UErrorCode &status) {
813
0
    MeasureUnitImpl impl;
814
0
    const MeasureUnitImpl& mui = MeasureUnitImpl::forMeasureUnit(unit, impl, status);
815
0
    int32_t singleUnitIndex = 0;
816
0
    if (mui.complexity == UMEASURE_UNIT_COMPOUND) {
817
0
        int32_t startSlice = 0;
818
        // inclusive
819
0
        int32_t endSlice = mui.singleUnits.length()-1;
820
0
        U_ASSERT(endSlice > 0); // Else it would not be COMPOUND
821
0
        if (mui.singleUnits[endSlice]->dimensionality < 0) {
822
            // We have a -per- construct
823
0
            UnicodeString perRule = getDeriveCompoundRule(locale, "gender", "per", status);
824
0
            if (perRule.length() != 1) {
825
                // Fixed gender for -per- units
826
0
                return perRule;
827
0
            }
828
0
            if (perRule[0] == u'1') {
829
                // Find the start of the denominator. We already know there is one.
830
0
                while (mui.singleUnits[startSlice]->dimensionality >= 0) {
831
0
                    startSlice++;
832
0
                }
833
0
            } else {
834
                // Find the end of the numerator
835
0
                while (endSlice >= 0 && mui.singleUnits[endSlice]->dimensionality < 0) {
836
0
                    endSlice--;
837
0
                }
838
0
                if (endSlice < 0) {
839
                    // We have only a denominator, e.g. "per-second".
840
                    // TODO(icu-units#28): find out what gender to use in the
841
                    // absence of a first value - mentioned in CLDR-14253.
842
0
                    return {};
843
0
                }
844
0
            }
845
0
        }
846
0
        if (endSlice > startSlice) {
847
            // We have a -times- construct
848
0
            UnicodeString timesRule = getDeriveCompoundRule(locale, "gender", "times", status);
849
0
            if (timesRule.length() != 1) {
850
                // Fixed gender for -times- units
851
0
                return timesRule;
852
0
            }
853
0
            if (timesRule[0] == u'0') {
854
0
                endSlice = startSlice;
855
0
            } else {
856
                // We assume timesRule[0] == u'1'
857
0
                startSlice = endSlice;
858
0
            }
859
0
        }
860
0
        U_ASSERT(startSlice == endSlice);
861
0
        singleUnitIndex = startSlice;
862
0
    } else if (mui.complexity == UMEASURE_UNIT_MIXED) {
863
0
        status = U_INTERNAL_PROGRAM_ERROR;
864
0
        return {};
865
0
    } else {
866
0
        U_ASSERT(mui.complexity == UMEASURE_UNIT_SINGLE);
867
0
        U_ASSERT(mui.singleUnits.length() == 1);
868
0
    }
869
870
    // Now we know which singleUnit's gender we want
871
0
    const SingleUnitImpl *singleUnit = mui.singleUnits[singleUnitIndex];
872
    // Check for any power-prefix gender override:
873
0
    if (std::abs(singleUnit->dimensionality) != 1) {
874
0
        UnicodeString powerRule = getDeriveCompoundRule(locale, "gender", "power", status);
875
0
        if (powerRule.length() != 1) {
876
            // Fixed gender for -powN- units
877
0
            return powerRule;
878
0
        }
879
        // powerRule[0] == u'0'; u'1' not currently in spec.
880
0
    }
881
    // Check for any SI and binary prefix gender override:
882
0
    if (std::abs(singleUnit->dimensionality) != 1) {
883
0
        UnicodeString prefixRule = getDeriveCompoundRule(locale, "gender", "prefix", status);
884
0
        if (prefixRule.length() != 1) {
885
            // Fixed gender for -powN- units
886
0
            return prefixRule;
887
0
        }
888
        // prefixRule[0] == u'0'; u'1' not currently in spec.
889
0
    }
890
    // Now we've boiled it down to the gender of one simple unit identifier:
891
0
    return getGenderForBuiltin(locale, MeasureUnit::forIdentifier(singleUnit->getSimpleUnitID(), status),
892
0
                               status);
893
0
}
894
895
void maybeCalculateGender(const Locale &locale,
896
                          const MeasureUnit &unitRef,
897
                          UnicodeString *outArray,
898
0
                          UErrorCode &status) {
899
0
    if (outArray[GENDER_INDEX].isBogus()) {
900
0
        UnicodeString meterGender = getGenderForBuiltin(locale, MeasureUnit::getMeter(), status);
901
0
        if (meterGender.isEmpty()) {
902
            // No gender for meter: assume ungendered language
903
0
            return;
904
0
        }
905
        // We have a gendered language, but are lacking gender for unitRef.
906
0
        outArray[GENDER_INDEX] = calculateGenderForUnit(locale, unitRef, status);
907
0
    }
908
0
}
909
910
} // namespace
911
912
void LongNameHandler::forMeasureUnit(const Locale &loc,
913
                                     const MeasureUnit &unitRef,
914
                                     const UNumberUnitWidth &width,
915
                                     const char *unitDisplayCase,
916
                                     const PluralRules *rules,
917
                                     const MicroPropsGenerator *parent,
918
                                     LongNameHandler *fillIn,
919
0
                                     UErrorCode &status) {
920
    // From https://unicode.org/reports/tr35/tr35-general.html#compound-units -
921
    // Points 1 and 2 are mostly handled by MeasureUnit:
922
    //
923
    // 1. If the unitId is empty or invalid, fail
924
    // 2. Put the unitId into normalized order
925
0
    U_ASSERT(fillIn != nullptr);
926
927
0
    if (uprv_strcmp(unitRef.getType(), "") != 0) {
928
        // Handling built-in units:
929
        //
930
        // 3. Set result to be getValue(unitId with length, pluralCategory, caseVariant)
931
        //    - If result is not empty, return it
932
0
        UnicodeString simpleFormats[ARRAY_LENGTH];
933
0
        getMeasureData(loc, unitRef, width, unitDisplayCase, simpleFormats, status);
934
0
        maybeCalculateGender(loc, unitRef, simpleFormats, status);
935
0
        if (U_FAILURE(status)) {
936
0
            return;
937
0
        }
938
0
        fillIn->rules = rules;
939
0
        fillIn->parent = parent;
940
0
        fillIn->simpleFormatsToModifiers(simpleFormats,
941
0
                                         {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
942
0
        if (!simpleFormats[GENDER_INDEX].isBogus()) {
943
0
            fillIn->gender = getGenderString(simpleFormats[GENDER_INDEX], status);
944
0
        }
945
0
        return;
946
947
        // TODO(icu-units#145): figure out why this causes a failure in
948
        // format/MeasureFormatTest/TestIndividualPluralFallback and other
949
        // tests, when it should have been an alternative for the lines above:
950
951
        // forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
952
        // fillIn->rules = rules;
953
        // fillIn->parent = parent;
954
        // return;
955
0
    } else {
956
        // Check if it is a MeasureUnit this constructor handles: this
957
        // constructor does not handle mixed units
958
0
        U_ASSERT(unitRef.getComplexity(status) != UMEASURE_UNIT_MIXED);
959
0
        forArbitraryUnit(loc, unitRef, width, unitDisplayCase, fillIn, status);
960
0
        fillIn->rules = rules;
961
0
        fillIn->parent = parent;
962
0
        return;
963
0
    }
964
0
}
965
966
void LongNameHandler::forArbitraryUnit(const Locale &loc,
967
                                       const MeasureUnit &unitRef,
968
                                       const UNumberUnitWidth &width,
969
                                       const char *unitDisplayCase,
970
                                       LongNameHandler *fillIn,
971
0
                                       UErrorCode &status) {
972
0
    if (U_FAILURE(status)) {
973
0
        return;
974
0
    }
975
0
    if (fillIn == nullptr) {
976
0
        status = U_INTERNAL_PROGRAM_ERROR;
977
0
        return;
978
0
    }
979
980
    // Numbered list items are from the algorithms at
981
    // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
982
    //
983
    // 4. Divide the unitId into numerator (the part before the "-per-") and
984
    //    denominator (the part after the "-per-). If both are empty, fail
985
0
    MeasureUnitImpl unit;
986
0
    MeasureUnitImpl perUnit;
987
0
    {
988
0
        MeasureUnitImpl fullUnit = MeasureUnitImpl::forMeasureUnitMaybeCopy(unitRef, status);
989
0
        if (U_FAILURE(status)) {
990
0
            return;
991
0
        }
992
0
        for (int32_t i = 0; i < fullUnit.singleUnits.length(); i++) {
993
0
            SingleUnitImpl *subUnit = fullUnit.singleUnits[i];
994
0
            if (subUnit->dimensionality > 0) {
995
0
                unit.appendSingleUnit(*subUnit, status);
996
0
            } else {
997
0
                subUnit->dimensionality *= -1;
998
0
                perUnit.appendSingleUnit(*subUnit, status);
999
0
            }
1000
0
        }
1001
0
    }
1002
1003
    // TODO(icu-units#28): check placeholder logic, see if it needs to be
1004
    // present here instead of only in processPatternTimes:
1005
    //
1006
    // 5. Set both globalPlaceholder and globalPlaceholderPosition to be empty
1007
1008
0
    DerivedComponents derivedPerCases(loc, "case", "per");
1009
1010
    // 6. numeratorUnitString
1011
0
    UnicodeString numeratorUnitData[ARRAY_LENGTH];
1012
0
    processPatternTimes(std::move(unit), loc, width, derivedPerCases.value0(unitDisplayCase),
1013
0
                        numeratorUnitData, status);
1014
1015
    // 7. denominatorUnitString
1016
0
    UnicodeString denominatorUnitData[ARRAY_LENGTH];
1017
0
    processPatternTimes(std::move(perUnit), loc, width, derivedPerCases.value1(unitDisplayCase),
1018
0
                        denominatorUnitData, status);
1019
1020
    // TODO(icu-units#139):
1021
    // - implement DerivedComponents for "plural/times" and "plural/power":
1022
    //   French has different rules, we'll be producing the wrong results
1023
    //   currently. (Prove via tests!)
1024
    // - implement DerivedComponents for "plural/per", "plural/prefix",
1025
    //   "case/times", "case/power", and "case/prefix" - although they're
1026
    //   currently hardcoded. Languages with different rules are surely on the
1027
    //   way.
1028
    //
1029
    // Currently we only use "case/per", "plural/times", "case/times", and
1030
    // "case/power".
1031
    //
1032
    // This may have impact on multiSimpleFormatsToModifiers(...) below too?
1033
    // These rules are currently (ICU 69) all the same and hard-coded below.
1034
0
    UnicodeString perUnitPattern;
1035
0
    if (!denominatorUnitData[PER_INDEX].isBogus()) {
1036
        // If we have no denominator, we obtain the empty string:
1037
0
        perUnitPattern = denominatorUnitData[PER_INDEX];
1038
0
    } else {
1039
        // 8. Set perPattern to be getValue([per], locale, length)
1040
0
        UnicodeString rawPerUnitFormat = getCompoundValue("per", loc, width, status);
1041
        // rawPerUnitFormat is something like "{0} per {1}"; we need to substitute in the secondary unit.
1042
0
        SimpleFormatter perPatternFormatter(rawPerUnitFormat, 2, 2, status);
1043
0
        if (U_FAILURE(status)) {
1044
0
            return;
1045
0
        }
1046
        // Plural and placeholder handling for 7. denominatorUnitString:
1047
        // TODO(icu-units#139): hardcoded:
1048
        // <deriveComponent feature="plural" structure="per" value0="compound" value1="one"/>
1049
0
        UnicodeString denominatorFormat =
1050
0
            getWithPlural(denominatorUnitData, StandardPlural::Form::ONE, status);
1051
        // Some "one" pattern may not contain "{0}". For example in "ar" or "ne" locale.
1052
0
        SimpleFormatter denominatorFormatter(denominatorFormat, 0, 1, status);
1053
0
        if (U_FAILURE(status)) {
1054
0
            return;
1055
0
        }
1056
0
        UnicodeString denominatorPattern = denominatorFormatter.getTextWithNoArguments();
1057
0
        int32_t trimmedLen = denominatorPattern.length();
1058
0
        const UChar *trimmed = trimSpaceChars(denominatorPattern.getBuffer(), trimmedLen);
1059
0
        UnicodeString denominatorString(false, trimmed, trimmedLen);
1060
        // 9. If the denominatorString is empty, set result to
1061
        //    [numeratorString], otherwise set result to format(perPattern,
1062
        //    numeratorString, denominatorString)
1063
        //
1064
        // TODO(icu-units#28): Why does UnicodeString need to be explicit in the
1065
        // following line?
1066
0
        perPatternFormatter.format(UnicodeString(u"{0}"), denominatorString, perUnitPattern, status);
1067
0
        if (U_FAILURE(status)) {
1068
0
            return;
1069
0
        }
1070
0
    }
1071
0
    if (perUnitPattern.length() == 0) {
1072
0
        fillIn->simpleFormatsToModifiers(numeratorUnitData,
1073
0
                                         {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1074
0
    } else {
1075
0
        fillIn->multiSimpleFormatsToModifiers(numeratorUnitData, perUnitPattern,
1076
0
                                              {UFIELD_CATEGORY_NUMBER, UNUM_MEASURE_UNIT_FIELD}, status);
1077
0
    }
1078
1079
    // Gender
1080
    //
1081
    // TODO(icu-units#28): find out what gender to use in the absence of a first
1082
    // value - e.g. what's the gender of "per-second"? Mentioned in CLDR-14253.
1083
    //
1084
    // gender/per deriveCompound rules don't say:
1085
    // <deriveCompound feature="gender" structure="per" value="0"/> <!-- gender(gram-per-meter) ←  gender(gram) -->
1086
0
    fillIn->gender = getGenderString(
1087
0
        getDerivedGender(loc, "per", numeratorUnitData, denominatorUnitData, status), status);
1088
0
}
1089
1090
void LongNameHandler::processPatternTimes(MeasureUnitImpl &&productUnit,
1091
                                          Locale loc,
1092
                                          const UNumberUnitWidth &width,
1093
                                          const char *caseVariant,
1094
                                          UnicodeString *outArray,
1095
0
                                          UErrorCode &status) {
1096
0
    if (U_FAILURE(status)) {
1097
0
        return;
1098
0
    }
1099
0
    if (productUnit.complexity == UMEASURE_UNIT_MIXED) {
1100
        // These are handled by MixedUnitLongNameHandler
1101
0
        status = U_UNSUPPORTED_ERROR;
1102
0
        return;
1103
0
    }
1104
1105
#if U_DEBUG
1106
    for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1107
        U_ASSERT(outArray[pluralIndex].length() == 0);
1108
        U_ASSERT(!outArray[pluralIndex].isBogus());
1109
    }
1110
#endif
1111
1112
0
    if (productUnit.identifier.isEmpty()) {
1113
        // TODO(icu-units#28): consider when serialize should be called.
1114
        // identifier might also be empty for MeasureUnit().
1115
0
        productUnit.serialize(status);
1116
0
    }
1117
0
    if (U_FAILURE(status)) {
1118
0
        return;
1119
0
    }
1120
0
    if (productUnit.identifier.length() == 0) {
1121
        // MeasureUnit(): no units: return empty strings.
1122
0
        return;
1123
0
    }
1124
1125
0
    MeasureUnit builtinUnit;
1126
0
    if (MeasureUnit::findBySubType(productUnit.identifier.toStringPiece(), &builtinUnit)) {
1127
        // TODO(icu-units#145): spec doesn't cover builtin-per-builtin, it
1128
        // breaks them all down. Do we want to drop this?
1129
        // - findBySubType isn't super efficient, if we skip it and go to basic
1130
        //   singles, we don't have to construct MeasureUnit's anymore.
1131
        // - Check all the existing unit tests that fail without this: is it due
1132
        //   to incorrect fallback via getMeasureData?
1133
        // - Do those unit tests cover this code path representatively?
1134
0
        if (builtinUnit != MeasureUnit()) {
1135
0
            getMeasureData(loc, builtinUnit, width, caseVariant, outArray, status);
1136
0
            maybeCalculateGender(loc, builtinUnit, outArray, status);
1137
0
        }
1138
0
        return;
1139
0
    }
1140
1141
    // 2. Set timesPattern to be getValue(times, locale, length)
1142
0
    UnicodeString timesPattern = getCompoundValue("times", loc, width, status);
1143
0
    SimpleFormatter timesPatternFormatter(timesPattern, 2, 2, status);
1144
0
    if (U_FAILURE(status)) {
1145
0
        return;
1146
0
    }
1147
1148
0
    PlaceholderPosition globalPlaceholder[ARRAY_LENGTH];
1149
0
    UChar globalJoinerChar = 0;
1150
    // Numbered list items are from the algorithms at
1151
    // https://unicode.org/reports/tr35/tr35-general.html#compound-units:
1152
    //
1153
    // pattern(...) point 5:
1154
    // - Set both globalPlaceholder and globalPlaceholderPosition to be empty
1155
    //
1156
    // 3. Set result to be empty
1157
0
    for (int32_t pluralIndex = 0; pluralIndex < ARRAY_LENGTH; pluralIndex++) {
1158
        // Initial state: empty string pattern, via all falling back to OTHER:
1159
0
        if (pluralIndex == StandardPlural::Form::OTHER) {
1160
0
            outArray[pluralIndex].remove();
1161
0
        } else {
1162
0
            outArray[pluralIndex].setToBogus();
1163
0
        }
1164
0
        globalPlaceholder[pluralIndex] = PH_EMPTY;
1165
0
    }
1166
1167
    // Empty string represents "compound" (propagate the plural form).
1168
0
    const char *pluralCategory = "";
1169
0
    DerivedComponents derivedTimesPlurals(loc, "plural", "times");
1170
0
    DerivedComponents derivedTimesCases(loc, "case", "times");
1171
0
    DerivedComponents derivedPowerCases(loc, "case", "power");
1172
1173
    // 4. For each single_unit in product_unit
1174
0
    for (int32_t singleUnitIndex = 0; singleUnitIndex < productUnit.singleUnits.length();
1175
0
         singleUnitIndex++) {
1176
0
        SingleUnitImpl *singleUnit = productUnit.singleUnits[singleUnitIndex];
1177
0
        const char *singlePluralCategory;
1178
0
        const char *singleCaseVariant;
1179
        // TODO(icu-units#28): ensure we have unit tests that change/fail if we
1180
        // assign incorrect case variants here:
1181
0
        if (singleUnitIndex < productUnit.singleUnits.length() - 1) {
1182
            // 4.1. If hasMultiple
1183
0
            singlePluralCategory = derivedTimesPlurals.value0(pluralCategory);
1184
0
            singleCaseVariant = derivedTimesCases.value0(caseVariant);
1185
0
            pluralCategory = derivedTimesPlurals.value1(pluralCategory);
1186
0
            caseVariant = derivedTimesCases.value1(caseVariant);
1187
0
        } else {
1188
0
            singlePluralCategory = derivedTimesPlurals.value1(pluralCategory);
1189
0
            singleCaseVariant = derivedTimesCases.value1(caseVariant);
1190
0
        }
1191
1192
        // 4.2. Get the gender of that single_unit
1193
0
        MeasureUnit simpleUnit;
1194
0
        if (!MeasureUnit::findBySubType(singleUnit->getSimpleUnitID(), &simpleUnit)) {
1195
            // Ideally all simple units should be known, but they're not:
1196
            // 100-kilometer is internally treated as a simple unit, but it is
1197
            // not a built-in unit and does not have formatting data in CLDR 39.
1198
            //
1199
            // TODO(icu-units#28): test (desirable) invariants in unit tests.
1200
0
            status = U_UNSUPPORTED_ERROR;
1201
0
            return;
1202
0
        }
1203
0
        const char *gender = getGenderString(getGenderForBuiltin(loc, simpleUnit, status), status);
1204
1205
        // 4.3. If singleUnit starts with a dimensionality_prefix, such as 'square-'
1206
0
        U_ASSERT(singleUnit->dimensionality > 0);
1207
0
        int32_t dimensionality = singleUnit->dimensionality;
1208
0
        UnicodeString dimensionalityPrefixPatterns[ARRAY_LENGTH];
1209
0
        if (dimensionality != 1) {
1210
            // 4.3.1. set dimensionalityPrefixPattern to be
1211
            //   getValue(that dimensionality_prefix, locale, length, singlePluralCategory, singleCaseVariant, gender),
1212
            //   such as "{0} kwadratowym"
1213
0
            CharString dimensionalityKey("compound/power", status);
1214
0
            dimensionalityKey.appendNumber(dimensionality, status);
1215
0
            getInflectedMeasureData(dimensionalityKey.toStringPiece(), loc, width, gender,
1216
0
                                    singleCaseVariant, dimensionalityPrefixPatterns, status);
1217
0
            if (U_FAILURE(status)) {
1218
                // At the time of writing, only pow2 and pow3 are supported.
1219
                // Attempting to format other powers results in a
1220
                // U_RESOURCE_TYPE_MISMATCH. We convert the error if we
1221
                // understand it:
1222
0
                if (status == U_RESOURCE_TYPE_MISMATCH && dimensionality > 3) {
1223
0
                    status = U_UNSUPPORTED_ERROR;
1224
0
                }
1225
0
                return;
1226
0
            }
1227
1228
            // TODO(icu-units#139):
1229
            // 4.3.2. set singlePluralCategory to be power0(singlePluralCategory)
1230
1231
            // 4.3.3. set singleCaseVariant to be power0(singleCaseVariant)
1232
0
            singleCaseVariant = derivedPowerCases.value0(singleCaseVariant);
1233
            // 4.3.4. remove the dimensionality_prefix from singleUnit
1234
0
            singleUnit->dimensionality = 1;
1235
0
        }
1236
1237
        // 4.4. if singleUnit starts with an si_prefix, such as 'centi'
1238
0
        UMeasurePrefix prefix = singleUnit->unitPrefix;
1239
0
        UnicodeString prefixPattern;
1240
0
        if (prefix != UMEASURE_PREFIX_ONE) {
1241
            // 4.4.1. set siPrefixPattern to be getValue(that si_prefix, locale,
1242
            //        length), such as "centy{0}"
1243
0
            CharString prefixKey;
1244
            // prefixKey looks like "1024p3" or "10p-2":
1245
0
            prefixKey.appendNumber(umeas_getPrefixBase(prefix), status);
1246
0
            prefixKey.append('p', status);
1247
0
            prefixKey.appendNumber(umeas_getPrefixPower(prefix), status);
1248
            // Contains a pattern like "centy{0}".
1249
0
            prefixPattern = getCompoundValue(prefixKey.toStringPiece(), loc, width, status);
1250
1251
            // 4.4.2. set singlePluralCategory to be prefix0(singlePluralCategory)
1252
            //
1253
            // TODO(icu-units#139): that refers to these rules:
1254
            // <deriveComponent feature="plural" structure="prefix" value0="one" value1="compound"/>
1255
            // though I'm not sure what other value they might end up having.
1256
            //
1257
            // 4.4.3. set singleCaseVariant to be prefix0(singleCaseVariant)
1258
            //
1259
            // TODO(icu-units#139): that refers to:
1260
            // <deriveComponent feature="case" structure="prefix" value0="nominative"
1261
            // value1="compound"/> but the prefix (value0) doesn't have case, the rest simply
1262
            // propagates.
1263
1264
            // 4.4.4. remove the si_prefix from singleUnit
1265
0
            singleUnit->unitPrefix = UMEASURE_PREFIX_ONE;
1266
0
        }
1267
1268
        // 4.5. Set corePattern to be the getValue(singleUnit, locale, length,
1269
        //      singlePluralCategory, singleCaseVariant), such as "{0} metrem"
1270
0
        UnicodeString singleUnitArray[ARRAY_LENGTH];
1271
        // At this point we are left with a Simple Unit:
1272
0
        U_ASSERT(uprv_strcmp(singleUnit->build(status).getIdentifier(), singleUnit->getSimpleUnitID()) ==
1273
0
                 0);
1274
0
        getMeasureData(loc, singleUnit->build(status), width, singleCaseVariant, singleUnitArray,
1275
0
                       status);
1276
0
        if (U_FAILURE(status)) {
1277
            // Shouldn't happen if we have data for all single units
1278
0
            return;
1279
0
        }
1280
1281
        // Calculate output gender
1282
0
        if (!singleUnitArray[GENDER_INDEX].isBogus()) {
1283
0
            U_ASSERT(!singleUnitArray[GENDER_INDEX].isEmpty());
1284
0
            UnicodeString uVal;
1285
1286
0
            if (prefix != UMEASURE_PREFIX_ONE) {
1287
0
                singleUnitArray[GENDER_INDEX] =
1288
0
                    getDerivedGender(loc, "prefix", singleUnitArray, nullptr, status);
1289
0
            }
1290
1291
0
            if (dimensionality != 1) {
1292
0
                singleUnitArray[GENDER_INDEX] =
1293
0
                    getDerivedGender(loc, "power", singleUnitArray, nullptr, status);
1294
0
            }
1295
1296
0
            UnicodeString timesGenderRule = getDeriveCompoundRule(loc, "gender", "times", status);
1297
0
            if (timesGenderRule.length() == 1) {
1298
0
                switch (timesGenderRule[0]) {
1299
0
                case u'0':
1300
0
                    if (singleUnitIndex == 0) {
1301
0
                        U_ASSERT(outArray[GENDER_INDEX].isBogus());
1302
0
                        outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1303
0
                    }
1304
0
                    break;
1305
0
                case u'1':
1306
0
                    if (singleUnitIndex == productUnit.singleUnits.length() - 1) {
1307
0
                        U_ASSERT(outArray[GENDER_INDEX].isBogus());
1308
0
                        outArray[GENDER_INDEX] = singleUnitArray[GENDER_INDEX];
1309
0
                    }
1310
0
                }
1311
0
            } else {
1312
0
                if (outArray[GENDER_INDEX].isBogus()) {
1313
0
                    outArray[GENDER_INDEX] = timesGenderRule;
1314
0
                }
1315
0
            }
1316
0
        }
1317
1318
        // Calculate resulting patterns for each plural form
1319
0
        for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1320
0
            StandardPlural::Form plural = static_cast<StandardPlural::Form>(pluralIndex);
1321
1322
            // singleUnitArray[pluralIndex] looks something like "{0} Meter"
1323
0
            if (outArray[pluralIndex].isBogus()) {
1324
0
                if (singleUnitArray[pluralIndex].isBogus()) {
1325
                    // Let the usual plural fallback mechanism take care of this
1326
                    // plural form
1327
0
                    continue;
1328
0
                } else {
1329
                    // Since our singleUnit can have a plural form that outArray
1330
                    // doesn't yet have (relying on fallback to OTHER), we start
1331
                    // by grabbing it with the normal plural fallback mechanism
1332
0
                    outArray[pluralIndex] = getWithPlural(outArray, plural, status);
1333
0
                    if (U_FAILURE(status)) {
1334
0
                        return;
1335
0
                    }
1336
0
                }
1337
0
            }
1338
1339
0
            if (uprv_strcmp(singlePluralCategory, "") != 0) {
1340
0
                plural = static_cast<StandardPlural::Form>(getIndex(singlePluralCategory, status));
1341
0
            }
1342
1343
            // 4.6. Extract(corePattern, coreUnit, placeholder, placeholderPosition) from that pattern.
1344
0
            UnicodeString coreUnit;
1345
0
            PlaceholderPosition placeholderPosition;
1346
0
            UChar joinerChar;
1347
0
            extractCorePattern(getWithPlural(singleUnitArray, plural, status), coreUnit,
1348
0
                               placeholderPosition, joinerChar);
1349
1350
            // 4.7 If the position is middle, then fail
1351
0
            if (placeholderPosition == PH_MIDDLE) {
1352
0
                status = U_UNSUPPORTED_ERROR;
1353
0
                return;
1354
0
            }
1355
1356
            // 4.8. If globalPlaceholder is empty
1357
0
            if (globalPlaceholder[pluralIndex] == PH_EMPTY) {
1358
0
                globalPlaceholder[pluralIndex] = placeholderPosition;
1359
0
                globalJoinerChar = joinerChar;
1360
0
            } else {
1361
                // Expect all units involved to have the same placeholder position
1362
0
                U_ASSERT(globalPlaceholder[pluralIndex] == placeholderPosition);
1363
                // TODO(icu-units#28): Do we want to add a unit test that checks
1364
                // for consistent joiner chars? Probably not, given how
1365
                // inconsistent they are. File a CLDR ticket with examples?
1366
0
            }
1367
            // Now coreUnit would be just "Meter"
1368
1369
            // 4.9. If siPrefixPattern is not empty
1370
0
            if (prefix != UMEASURE_PREFIX_ONE) {
1371
0
                SimpleFormatter prefixCompiled(prefixPattern, 1, 1, status);
1372
0
                if (U_FAILURE(status)) {
1373
0
                    return;
1374
0
                }
1375
1376
                // 4.9.1. Set coreUnit to be the combineLowercasing(locale, length, siPrefixPattern,
1377
                //        coreUnit)
1378
0
                UnicodeString tmp;
1379
                // combineLowercasing(locale, length, prefixPattern, coreUnit)
1380
                //
1381
                // TODO(icu-units#28): run this only if prefixPattern does not
1382
                // contain space characters - do languages "as", "bn", "hi",
1383
                // "kk", etc have concepts of upper and lower case?:
1384
0
                if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1385
0
                    coreUnit.toLower(loc);
1386
0
                }
1387
0
                prefixCompiled.format(coreUnit, tmp, status);
1388
0
                if (U_FAILURE(status)) {
1389
0
                    return;
1390
0
                }
1391
0
                coreUnit = tmp;
1392
0
            }
1393
1394
            // 4.10. If dimensionalityPrefixPattern is not empty
1395
0
            if (dimensionality != 1) {
1396
0
                SimpleFormatter dimensionalityCompiled(
1397
0
                    getWithPlural(dimensionalityPrefixPatterns, plural, status), 1, 1, status);
1398
0
                if (U_FAILURE(status)) {
1399
0
                    return;
1400
0
                }
1401
1402
                // 4.10.1. Set coreUnit to be the combineLowercasing(locale, length,
1403
                //         dimensionalityPrefixPattern, coreUnit)
1404
0
                UnicodeString tmp;
1405
                // combineLowercasing(locale, length, prefixPattern, coreUnit)
1406
                //
1407
                // TODO(icu-units#28): run this only if prefixPattern does not
1408
                // contain space characters - do languages "as", "bn", "hi",
1409
                // "kk", etc have concepts of upper and lower case?:
1410
0
                if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1411
0
                    coreUnit.toLower(loc);
1412
0
                }
1413
0
                dimensionalityCompiled.format(coreUnit, tmp, status);
1414
0
                if (U_FAILURE(status)) {
1415
0
                    return;
1416
0
                }
1417
0
                coreUnit = tmp;
1418
0
            }
1419
1420
0
            if (outArray[pluralIndex].length() == 0) {
1421
                // 4.11. If the result is empty, set result to be coreUnit
1422
0
                outArray[pluralIndex] = coreUnit;
1423
0
            } else {
1424
                // 4.12. Otherwise set result to be format(timesPattern, result, coreUnit)
1425
0
                UnicodeString tmp;
1426
0
                timesPatternFormatter.format(outArray[pluralIndex], coreUnit, tmp, status);
1427
0
                outArray[pluralIndex] = tmp;
1428
0
            }
1429
0
        }
1430
0
    }
1431
0
    for (int32_t pluralIndex = 0; pluralIndex < StandardPlural::Form::COUNT; pluralIndex++) {
1432
0
        if (globalPlaceholder[pluralIndex] == PH_BEGINNING) {
1433
0
            UnicodeString tmp;
1434
0
            tmp.append(u"{0}", 3);
1435
0
            if (globalJoinerChar != 0) {
1436
0
                tmp.append(globalJoinerChar);
1437
0
            }
1438
0
            tmp.append(outArray[pluralIndex]);
1439
0
            outArray[pluralIndex] = tmp;
1440
0
        } else if (globalPlaceholder[pluralIndex] == PH_END) {
1441
0
            if (globalJoinerChar != 0) {
1442
0
                outArray[pluralIndex].append(globalJoinerChar);
1443
0
            }
1444
0
            outArray[pluralIndex].append(u"{0}", 3);
1445
0
        }
1446
0
    }
1447
0
}
1448
1449
UnicodeString LongNameHandler::getUnitDisplayName(
1450
        const Locale& loc,
1451
        const MeasureUnit& unit,
1452
        UNumberUnitWidth width,
1453
0
        UErrorCode& status) {
1454
0
    if (U_FAILURE(status)) {
1455
0
        return ICU_Utility::makeBogusString();
1456
0
    }
1457
0
    UnicodeString simpleFormats[ARRAY_LENGTH];
1458
0
    getMeasureData(loc, unit, width, "", simpleFormats, status);
1459
0
    return simpleFormats[DNAM_INDEX];
1460
0
}
1461
1462
UnicodeString LongNameHandler::getUnitPattern(
1463
        const Locale& loc,
1464
        const MeasureUnit& unit,
1465
        UNumberUnitWidth width,
1466
        StandardPlural::Form pluralForm,
1467
0
        UErrorCode& status) {
1468
0
    if (U_FAILURE(status)) {
1469
0
        return ICU_Utility::makeBogusString();
1470
0
    }
1471
0
    UnicodeString simpleFormats[ARRAY_LENGTH];
1472
0
    getMeasureData(loc, unit, width, "", simpleFormats, status);
1473
    // The above already handles fallback from other widths to short
1474
0
    if (U_FAILURE(status)) {
1475
0
        return ICU_Utility::makeBogusString();
1476
0
    }
1477
    // Now handle fallback from other plural forms to OTHER
1478
0
    return (!(simpleFormats[pluralForm]).isBogus())? simpleFormats[pluralForm]:
1479
0
            simpleFormats[StandardPlural::Form::OTHER];
1480
0
}
1481
1482
LongNameHandler* LongNameHandler::forCurrencyLongNames(const Locale &loc, const CurrencyUnit &currency,
1483
                                                      const PluralRules *rules,
1484
                                                      const MicroPropsGenerator *parent,
1485
0
                                                      UErrorCode &status) {
1486
0
    auto* result = new LongNameHandler(rules, parent);
1487
0
    if (result == nullptr) {
1488
0
        status = U_MEMORY_ALLOCATION_ERROR;
1489
0
        return nullptr;
1490
0
    }
1491
0
    UnicodeString simpleFormats[ARRAY_LENGTH];
1492
0
    getCurrencyLongNameData(loc, currency, simpleFormats, status);
1493
0
    if (U_FAILURE(status)) { return nullptr; }
1494
0
    result->simpleFormatsToModifiers(simpleFormats, {UFIELD_CATEGORY_NUMBER, UNUM_CURRENCY_FIELD}, status);
1495
    // TODO(icu-units#28): currency gender?
1496
0
    return result;
1497
0
}
1498
1499
void LongNameHandler::simpleFormatsToModifiers(const UnicodeString *simpleFormats, Field field,
1500
0
                                               UErrorCode &status) {
1501
0
    for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1502
0
        StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1503
0
        UnicodeString simpleFormat = getWithPlural(simpleFormats, plural, status);
1504
0
        if (U_FAILURE(status)) { return; }
1505
0
        SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1506
0
        if (U_FAILURE(status)) { return; }
1507
0
        fModifiers[i] = SimpleModifier(compiledFormatter, field, false, {this, SIGNUM_POS_ZERO, plural});
1508
0
    }
1509
0
}
1510
1511
void LongNameHandler::multiSimpleFormatsToModifiers(const UnicodeString *leadFormats, UnicodeString trailFormat,
1512
0
                                                    Field field, UErrorCode &status) {
1513
0
    SimpleFormatter trailCompiled(trailFormat, 1, 1, status);
1514
0
    if (U_FAILURE(status)) { return; }
1515
0
    for (int32_t i = 0; i < StandardPlural::Form::COUNT; i++) {
1516
0
        StandardPlural::Form plural = static_cast<StandardPlural::Form>(i);
1517
0
        UnicodeString leadFormat = getWithPlural(leadFormats, plural, status);
1518
0
        if (U_FAILURE(status)) { return; }
1519
0
        UnicodeString compoundFormat;
1520
0
        if (leadFormat.length() == 0) {
1521
0
            compoundFormat = trailFormat;
1522
0
        } else {
1523
0
            trailCompiled.format(leadFormat, compoundFormat, status);
1524
0
            if (U_FAILURE(status)) { return; }
1525
0
        }
1526
0
        SimpleFormatter compoundCompiled(compoundFormat, 0, 1, status);
1527
0
        if (U_FAILURE(status)) { return; }
1528
0
        fModifiers[i] = SimpleModifier(compoundCompiled, field, false, {this, SIGNUM_POS_ZERO, plural});
1529
0
    }
1530
0
}
1531
1532
void LongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1533
0
                                      UErrorCode &status) const {
1534
0
    if (parent != NULL) {
1535
0
        parent->processQuantity(quantity, micros, status);
1536
0
    }
1537
0
    StandardPlural::Form pluralForm = utils::getPluralSafe(micros.rounder, rules, quantity, status);
1538
0
    micros.modOuter = &fModifiers[pluralForm];
1539
0
    micros.gender = gender;
1540
0
}
1541
1542
0
const Modifier* LongNameHandler::getModifier(Signum /*signum*/, StandardPlural::Form plural) const {
1543
0
    return &fModifiers[plural];
1544
0
}
1545
1546
void MixedUnitLongNameHandler::forMeasureUnit(const Locale &loc,
1547
                                              const MeasureUnit &mixedUnit,
1548
                                              const UNumberUnitWidth &width,
1549
                                              const char *unitDisplayCase,
1550
                                              const PluralRules *rules,
1551
                                              const MicroPropsGenerator *parent,
1552
                                              MixedUnitLongNameHandler *fillIn,
1553
0
                                              UErrorCode &status) {
1554
0
    U_ASSERT(mixedUnit.getComplexity(status) == UMEASURE_UNIT_MIXED);
1555
0
    U_ASSERT(fillIn != nullptr);
1556
0
    if (U_FAILURE(status)) {
1557
0
        return;
1558
0
    }
1559
1560
0
    MeasureUnitImpl temp;
1561
0
    const MeasureUnitImpl &impl = MeasureUnitImpl::forMeasureUnit(mixedUnit, temp, status);
1562
    // Defensive, for production code:
1563
0
    if (impl.complexity != UMEASURE_UNIT_MIXED) {
1564
        // Should be using the normal LongNameHandler
1565
0
        status = U_UNSUPPORTED_ERROR;
1566
0
        return;
1567
0
    }
1568
1569
0
    fillIn->fMixedUnitCount = impl.singleUnits.length();
1570
0
    fillIn->fMixedUnitData.adoptInstead(new UnicodeString[fillIn->fMixedUnitCount * ARRAY_LENGTH]);
1571
0
    for (int32_t i = 0; i < fillIn->fMixedUnitCount; i++) {
1572
        // Grab data for each of the components.
1573
0
        UnicodeString *unitData = &fillIn->fMixedUnitData[i * ARRAY_LENGTH];
1574
        // TODO(CLDR-14502): check from the CLDR-14502 ticket whether this
1575
        // propagation of unitDisplayCase is correct:
1576
0
        getMeasureData(loc, impl.singleUnits[i]->build(status), width, unitDisplayCase, unitData,
1577
0
                       status);
1578
        // TODO(ICU-21494): if we add support for gender for mixed units, we may
1579
        // need maybeCalculateGender() here.
1580
0
    }
1581
1582
    // TODO(icu-units#120): Make sure ICU doesn't output zero-valued
1583
    // high-magnitude fields
1584
    // * for mixed units count N, produce N listFormatters, one for each subset
1585
    //   that might be formatted.
1586
0
    UListFormatterWidth listWidth = ULISTFMT_WIDTH_SHORT;
1587
0
    if (width == UNUM_UNIT_WIDTH_NARROW) {
1588
0
        listWidth = ULISTFMT_WIDTH_NARROW;
1589
0
    } else if (width == UNUM_UNIT_WIDTH_FULL_NAME) {
1590
        // This might be the same as SHORT in most languages:
1591
0
        listWidth = ULISTFMT_WIDTH_WIDE;
1592
0
    }
1593
0
    fillIn->fListFormatter.adoptInsteadAndCheckErrorCode(
1594
0
        ListFormatter::createInstance(loc, ULISTFMT_TYPE_UNITS, listWidth, status), status);
1595
    // TODO(ICU-21494): grab gender of each unit, calculate the gender
1596
    // associated with this list formatter, save it for later.
1597
0
    fillIn->rules = rules;
1598
0
    fillIn->parent = parent;
1599
1600
    // We need a localised NumberFormatter for the numbers of the bigger units
1601
    // (providing Arabic numerals, for example).
1602
0
    fillIn->fNumberFormatter = NumberFormatter::withLocale(loc);
1603
0
}
1604
1605
void MixedUnitLongNameHandler::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1606
0
                                               UErrorCode &status) const {
1607
0
    U_ASSERT(fMixedUnitCount > 1);
1608
0
    if (parent != nullptr) {
1609
0
        parent->processQuantity(quantity, micros, status);
1610
0
    }
1611
0
    micros.modOuter = getMixedUnitModifier(quantity, micros, status);
1612
0
}
1613
1614
const Modifier *MixedUnitLongNameHandler::getMixedUnitModifier(DecimalQuantity &quantity,
1615
                                                               MicroProps &micros,
1616
0
                                                               UErrorCode &status) const {
1617
0
    if (micros.mixedMeasuresCount == 0) {
1618
0
        U_ASSERT(micros.mixedMeasuresCount > 0); // Mixed unit: we must have more than one unit value
1619
0
        status = U_UNSUPPORTED_ERROR;
1620
0
        return &micros.helpers.emptyWeakModifier;
1621
0
    }
1622
1623
    // Algorithm:
1624
    //
1625
    // For the mixed-units measurement of: "3 yard, 1 foot, 2.6 inch", we should
1626
    // find "3 yard" and "1 foot" in micros.mixedMeasures.
1627
    //
1628
    // Obtain long-names with plural forms corresponding to measure values:
1629
    //   * {0} yards, {0} foot, {0} inches
1630
    //
1631
    // Format the integer values appropriately and modify with the format
1632
    // strings:
1633
    //   - 3 yards, 1 foot
1634
    //
1635
    // Use ListFormatter to combine, with one placeholder:
1636
    //   - 3 yards, 1 foot and {0} inches
1637
    //
1638
    // Return a SimpleModifier for this pattern, letting the rest of the
1639
    // pipeline take care of the remaining inches.
1640
1641
0
    LocalArray<UnicodeString> outputMeasuresList(new UnicodeString[fMixedUnitCount], status);
1642
0
    if (U_FAILURE(status)) {
1643
0
        return &micros.helpers.emptyWeakModifier;
1644
0
    }
1645
1646
0
    StandardPlural::Form quantityPlural = StandardPlural::Form::OTHER;
1647
0
    for (int32_t i = 0; i < micros.mixedMeasuresCount; i++) {
1648
0
        DecimalQuantity fdec;
1649
1650
        // If numbers are negative, only the first number needs to have its
1651
        // negative sign formatted.
1652
0
        int64_t number = i > 0 ? std::abs(micros.mixedMeasures[i]) : micros.mixedMeasures[i];
1653
1654
0
        if (micros.indexOfQuantity == i) { // Insert placeholder for `quantity`
1655
            // If quantity is not the first value and quantity is negative
1656
0
            if (micros.indexOfQuantity > 0 && quantity.isNegative()) {
1657
0
                quantity.negate();
1658
0
            }
1659
1660
0
            StandardPlural::Form quantityPlural =
1661
0
                utils::getPluralSafe(micros.rounder, rules, quantity, status);
1662
0
            UnicodeString quantityFormatWithPlural =
1663
0
                getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], quantityPlural, status);
1664
0
            SimpleFormatter quantityFormatter(quantityFormatWithPlural, 0, 1, status);
1665
0
            quantityFormatter.format(UnicodeString(u"{0}"), outputMeasuresList[i], status);
1666
0
        } else {
1667
0
            fdec.setToLong(number);
1668
0
            StandardPlural::Form pluralForm = utils::getStandardPlural(rules, fdec);
1669
0
            UnicodeString simpleFormat =
1670
0
                getWithPlural(&fMixedUnitData[i * ARRAY_LENGTH], pluralForm, status);
1671
0
            SimpleFormatter compiledFormatter(simpleFormat, 0, 1, status);
1672
0
            UnicodeString num;
1673
0
            auto appendable = UnicodeStringAppendable(num);
1674
1675
0
            fNumberFormatter.formatDecimalQuantity(fdec, status).appendTo(appendable, status);
1676
0
            compiledFormatter.format(num, outputMeasuresList[i], status);
1677
0
        }
1678
0
    }
1679
1680
    // TODO(ICU-21494): implement gender for lists of mixed units. Presumably we
1681
    // can set micros.gender to the gender associated with the list formatter in
1682
    // use below (once we have correct support for that). And then document this
1683
    // appropriately? "getMixedUnitModifier" doesn't sound like it would do
1684
    // something like this.
1685
1686
    // Combine list into a "premixed" pattern
1687
0
    UnicodeString premixedFormatPattern;
1688
0
    fListFormatter->format(outputMeasuresList.getAlias(), fMixedUnitCount, premixedFormatPattern,
1689
0
                           status);
1690
0
    SimpleFormatter premixedCompiled(premixedFormatPattern, 0, 1, status);
1691
0
    if (U_FAILURE(status)) {
1692
0
        return &micros.helpers.emptyWeakModifier;
1693
0
    }
1694
1695
0
    micros.helpers.mixedUnitModifier =
1696
0
        SimpleModifier(premixedCompiled, kUndefinedField, false, {this, SIGNUM_POS_ZERO, quantityPlural});
1697
0
    return &micros.helpers.mixedUnitModifier;
1698
0
}
1699
1700
const Modifier *MixedUnitLongNameHandler::getModifier(Signum /*signum*/,
1701
0
                                                      StandardPlural::Form /*plural*/) const {
1702
    // TODO(icu-units#28): investigate this method when investigating where
1703
    // ModifierStore::getModifier() gets used. To be sure it remains
1704
    // unreachable:
1705
0
    UPRV_UNREACHABLE;
1706
0
    return nullptr;
1707
0
}
1708
1709
LongNameMultiplexer *LongNameMultiplexer::forMeasureUnits(const Locale &loc,
1710
                                                          const MaybeStackVector<MeasureUnit> &units,
1711
                                                          const UNumberUnitWidth &width,
1712
                                                          const char *unitDisplayCase,
1713
                                                          const PluralRules *rules,
1714
                                                          const MicroPropsGenerator *parent,
1715
0
                                                          UErrorCode &status) {
1716
0
    LocalPointer<LongNameMultiplexer> result(new LongNameMultiplexer(parent), status);
1717
0
    if (U_FAILURE(status)) {
1718
0
        return nullptr;
1719
0
    }
1720
0
    U_ASSERT(units.length() > 0);
1721
0
    if (result->fHandlers.resize(units.length()) == nullptr) {
1722
0
        status = U_MEMORY_ALLOCATION_ERROR;
1723
0
        return nullptr;
1724
0
    }
1725
0
    result->fMeasureUnits.adoptInstead(new MeasureUnit[units.length()]);
1726
0
    for (int32_t i = 0, length = units.length(); i < length; i++) {
1727
0
        const MeasureUnit &unit = *units[i];
1728
0
        result->fMeasureUnits[i] = unit;
1729
0
        if (unit.getComplexity(status) == UMEASURE_UNIT_MIXED) {
1730
0
            MixedUnitLongNameHandler *mlnh = result->fMixedUnitHandlers.createAndCheckErrorCode(status);
1731
0
            MixedUnitLongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL,
1732
0
                                                     mlnh, status);
1733
0
            result->fHandlers[i] = mlnh;
1734
0
        } else {
1735
0
            LongNameHandler *lnh = result->fLongNameHandlers.createAndCheckErrorCode(status);
1736
0
            LongNameHandler::forMeasureUnit(loc, unit, width, unitDisplayCase, rules, NULL, lnh, status);
1737
0
            result->fHandlers[i] = lnh;
1738
0
        }
1739
0
        if (U_FAILURE(status)) {
1740
0
            return nullptr;
1741
0
        }
1742
0
    }
1743
0
    return result.orphan();
1744
0
}
1745
1746
void LongNameMultiplexer::processQuantity(DecimalQuantity &quantity, MicroProps &micros,
1747
0
                                          UErrorCode &status) const {
1748
    // We call parent->processQuantity() from the Multiplexer, instead of
1749
    // letting LongNameHandler handle it: we don't know which LongNameHandler to
1750
    // call until we've called the parent!
1751
0
    fParent->processQuantity(quantity, micros, status);
1752
1753
    // Call the correct LongNameHandler based on outputUnit
1754
0
    for (int i = 0; i < fHandlers.getCapacity(); i++) {
1755
0
        if (fMeasureUnits[i] == micros.outputUnit) {
1756
0
            fHandlers[i]->processQuantity(quantity, micros, status);
1757
0
            return;
1758
0
        }
1759
0
    }
1760
0
    if (U_FAILURE(status)) {
1761
0
        return;
1762
0
    }
1763
    // We shouldn't receive any outputUnit for which we haven't already got a
1764
    // LongNameHandler:
1765
0
    status = U_INTERNAL_PROGRAM_ERROR;
1766
0
}
1767
1768
#endif /* #if !UCONFIG_NO_FORMATTING */