Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/i18n/units_data.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2020 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include "unicode/utypes.h"
5
6
#if !UCONFIG_NO_FORMATTING
7
8
#include "cstring.h"
9
#include "number_decimalquantity.h"
10
#include "resource.h"
11
#include "uassert.h"
12
#include "unicode/unistr.h"
13
#include "unicode/ures.h"
14
#include "units_data.h"
15
#include "uresimp.h"
16
#include "util.h"
17
#include <utility>
18
19
U_NAMESPACE_BEGIN
20
namespace units {
21
22
namespace {
23
24
using icu::number::impl::DecimalQuantity;
25
26
0
void trimSpaces(CharString& factor, UErrorCode& status){
27
0
   CharString trimmed;
28
0
   for (int i = 0 ; i < factor.length(); i++) {
29
0
       if (factor[i] == ' ') continue;
30
31
0
       trimmed.append(factor[i], status);
32
0
   }
33
34
0
   factor = std::move(trimmed);
35
0
}
36
37
/**
38
 * A ResourceSink that collects conversion rate information.
39
 *
40
 * This class is for use by ures_getAllItemsWithFallback.
41
 */
42
class ConversionRateDataSink : public ResourceSink {
43
  public:
44
    /**
45
     * Constructor.
46
     * @param out The vector to which ConversionRateInfo instances are to be
47
     * added. This vector must outlive the use of the ResourceSink.
48
     */
49
0
    explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {}
50
51
    /**
52
     * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
53
     * conversion rates that are found in `value` to the output vector.
54
     *
55
     * @param source This string must be "convertUnits": the resource that this
56
     * class supports reading.
57
     * @param value The "convertUnits" resource, containing unit conversion rate
58
     * information.
59
     * @param noFallback Ignored.
60
     * @param status The standard ICU error code output parameter.
61
     */
62
0
    void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
63
0
        if (U_FAILURE(status)) { return; }
64
0
        if (uprv_strcmp(source, "convertUnits") != 0) {
65
            // This is very strict, however it is the cheapest way to be sure
66
            // that with `value`, we're looking at the convertUnits table.
67
0
            status = U_ILLEGAL_ARGUMENT_ERROR;
68
0
            return;
69
0
        }
70
0
        ResourceTable conversionRateTable = value.getTable(status);
71
0
        const char *srcUnit;
72
        // We're reusing `value`, which seems to be a common pattern:
73
0
        for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) {
74
0
            ResourceTable unitTable = value.getTable(status);
75
0
            const char *key;
76
0
            UnicodeString baseUnit = ICU_Utility::makeBogusString();
77
0
            UnicodeString factor = ICU_Utility::makeBogusString();
78
0
            UnicodeString offset = ICU_Utility::makeBogusString();
79
0
            for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) {
80
0
                if (uprv_strcmp(key, "target") == 0) {
81
0
                    baseUnit = value.getUnicodeString(status);
82
0
                } else if (uprv_strcmp(key, "factor") == 0) {
83
0
                    factor = value.getUnicodeString(status);
84
0
                } else if (uprv_strcmp(key, "offset") == 0) {
85
0
                    offset = value.getUnicodeString(status);
86
0
                }
87
0
            }
88
0
            if (U_FAILURE(status)) { return; }
89
0
            if (baseUnit.isBogus() || factor.isBogus()) {
90
                // We could not find a usable conversion rate: bad resource.
91
0
                status = U_MISSING_RESOURCE_ERROR;
92
0
                return;
93
0
            }
94
95
            // We don't have this ConversionRateInfo yet: add it.
96
0
            ConversionRateInfo *cr = outVector->emplaceBack();
97
0
            if (!cr) {
98
0
                status = U_MEMORY_ALLOCATION_ERROR;
99
0
                return;
100
0
            } else {
101
0
                cr->sourceUnit.append(srcUnit, status);
102
0
                cr->baseUnit.appendInvariantChars(baseUnit, status);
103
0
                cr->factor.appendInvariantChars(factor, status);
104
0
                trimSpaces(cr->factor, status);
105
0
                if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status);
106
0
            }
107
0
        }
108
0
        return;
109
0
    }
110
111
  private:
112
    MaybeStackVector<ConversionRateInfo> *outVector;
113
};
114
115
0
bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) {
116
0
    return a.compareTo(b) < 0;
117
0
}
118
119
/**
120
 * A ResourceSink that collects unit preferences information.
121
 *
122
 * This class is for use by ures_getAllItemsWithFallback.
123
 */
124
class UnitPreferencesSink : public ResourceSink {
125
  public:
126
    /**
127
     * Constructor.
128
     * @param outPrefs The vector to which UnitPreference instances are to be
129
     * added. This vector must outlive the use of the ResourceSink.
130
     * @param outMetadata  The vector to which UnitPreferenceMetadata instances
131
     * are to be added. This vector must outlive the use of the ResourceSink.
132
     */
133
    explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs,
134
                                 MaybeStackVector<UnitPreferenceMetadata> *outMetadata)
135
0
        : preferences(outPrefs), metadata(outMetadata) {}
136
137
    /**
138
     * Method for use by `ures_getAllItemsWithFallback`. Adds the unit
139
     * preferences info that are found in `value` to the output vector.
140
     *
141
     * @param source This string must be "unitPreferenceData": the resource that
142
     * this class supports reading.
143
     * @param value The "unitPreferenceData" resource, containing unit
144
     * preferences data.
145
     * @param noFallback Ignored.
146
     * @param status The standard ICU error code output parameter. Note: if an
147
     * error is returned, outPrefs and outMetadata may be inconsistent.
148
     */
149
0
    void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) {
150
0
        if (U_FAILURE(status)) { return; }
151
0
        if (uprv_strcmp(key, "unitPreferenceData") != 0) {
152
            // This is very strict, however it is the cheapest way to be sure
153
            // that with `value`, we're looking at the convertUnits table.
154
0
            status = U_ILLEGAL_ARGUMENT_ERROR;
155
0
            return;
156
0
        }
157
        // The unitPreferenceData structure (see data/misc/units.txt) contains a
158
        // hierarchy of category/usage/region, within which are a set of
159
        // preferences. Hence three for-loops and another loop for the
160
        // preferences themselves:
161
0
        ResourceTable unitPreferenceDataTable = value.getTable(status);
162
0
        const char *category;
163
0
        for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) {
164
0
            ResourceTable categoryTable = value.getTable(status);
165
0
            const char *usage;
166
0
            for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) {
167
0
                ResourceTable regionTable = value.getTable(status);
168
0
                const char *region;
169
0
                for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) {
170
                    // `value` now contains the set of preferences for
171
                    // category/usage/region.
172
0
                    ResourceArray unitPrefs = value.getArray(status);
173
0
                    if (U_FAILURE(status)) { return; }
174
0
                    int32_t prefLen = unitPrefs.getSize();
175
176
                    // Update metadata for this set of preferences.
177
0
                    UnitPreferenceMetadata *meta = metadata->emplaceBack(
178
0
                        category, usage, region, preferences->length(), prefLen, status);
179
0
                    if (!meta) {
180
0
                        status = U_MEMORY_ALLOCATION_ERROR;
181
0
                        return;
182
0
                    }
183
0
                    if (U_FAILURE(status)) { return; }
184
0
                    if (metadata->length() > 1) {
185
                        // Verify that unit preferences are sorted and
186
                        // without duplicates.
187
0
                        if (!(*(*metadata)[metadata->length() - 2] <
188
0
                              *(*metadata)[metadata->length() - 1])) {
189
0
                            status = U_INVALID_FORMAT_ERROR;
190
0
                            return;
191
0
                        }
192
0
                    }
193
194
                    // Collect the individual preferences.
195
0
                    for (int32_t i = 0; unitPrefs.getValue(i, value); i++) {
196
0
                        UnitPreference *up = preferences->emplaceBack();
197
0
                        if (!up) {
198
0
                            status = U_MEMORY_ALLOCATION_ERROR;
199
0
                            return;
200
0
                        }
201
0
                        ResourceTable unitPref = value.getTable(status);
202
0
                        if (U_FAILURE(status)) { return; }
203
0
                        for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) {
204
0
                            if (uprv_strcmp(key, "unit") == 0) {
205
0
                                int32_t length;
206
0
                                const UChar *u = value.getString(length, status);
207
0
                                up->unit.appendInvariantChars(u, length, status);
208
0
                            } else if (uprv_strcmp(key, "geq") == 0) {
209
0
                                int32_t length;
210
0
                                const UChar *g = value.getString(length, status);
211
0
                                CharString geq;
212
0
                                geq.appendInvariantChars(g, length, status);
213
0
                                DecimalQuantity dq;
214
0
                                dq.setToDecNumber(geq.data(), status);
215
0
                                up->geq = dq.toDouble();
216
0
                            } else if (uprv_strcmp(key, "skeleton") == 0) {
217
0
                                up->skeleton = value.getUnicodeString(status);
218
0
                            }
219
0
                        }
220
0
                    }
221
0
                }
222
0
            }
223
0
        }
224
0
    }
225
226
  private:
227
    MaybeStackVector<UnitPreference> *preferences;
228
    MaybeStackVector<UnitPreferenceMetadata> *metadata;
229
};
230
231
int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
232
                     const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage,
233
0
                     bool *foundRegion, UErrorCode &status) {
234
0
    if (U_FAILURE(status)) { return -1; }
235
0
    int32_t start = 0;
236
0
    int32_t end = metadata->length();
237
0
    *foundCategory = false;
238
0
    *foundUsage = false;
239
0
    *foundRegion = false;
240
0
    while (start < end) {
241
0
        int32_t mid = (start + end) / 2;
242
0
        int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion);
243
0
        if (cmp < 0) {
244
0
            start = mid + 1;
245
0
        } else if (cmp > 0) {
246
0
            end = mid;
247
0
        } else {
248
0
            return mid;
249
0
        }
250
0
    }
251
0
    return -1;
252
0
}
253
254
/**
255
 * Finds the UnitPreferenceMetadata instance that matches the given category,
256
 * usage and region: if missing, region falls back to "001", and usage
257
 * repeatedly drops tailing components, eventually trying "default"
258
 * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default").
259
 *
260
 * @param metadata The full list of UnitPreferenceMetadata instances.
261
 * @param category The category to search for. See getUnitCategory().
262
 * @param usage The usage for which formatting preferences is needed. If the
263
 * given usage is not known, automatic fallback occurs, see function description
264
 * above.
265
 * @param region The region for which preferences are needed. If there are no
266
 * region-specific preferences, this function automatically falls back to the
267
 * "001" region (global).
268
 * @param status The standard ICU error code output parameter.
269
 *   * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR.
270
 *   * If fallback to "default" or "001" didn't resolve, status will be
271
 *     U_MISSING_RESOURCE.
272
 * @return The index into the metadata vector which represents the appropriate
273
 * preferences. If appropriate preferences are not found, -1 is returned.
274
 */
275
int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata,
276
                                   StringPiece category, StringPiece usage, StringPiece region,
277
0
                                   UErrorCode &status) {
278
0
    if (U_FAILURE(status)) { return -1; }
279
0
    bool foundCategory, foundUsage, foundRegion;
280
0
    UnitPreferenceMetadata desired(category, usage, region, -1, -1, status);
281
0
    int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
282
0
    if (U_FAILURE(status)) { return -1; }
283
0
    if (idx >= 0) { return idx; }
284
0
    if (!foundCategory) {
285
        // TODO: failures can happen if units::getUnitCategory returns a category
286
        // that does not appear in unitPreferenceData. Do we want a unit test that
287
        // checks unitPreferenceData has full coverage of categories? Or just trust
288
        // CLDR?
289
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
290
0
        return -1;
291
0
    }
292
0
    U_ASSERT(foundCategory);
293
0
    while (!foundUsage) {
294
0
        int32_t lastDashIdx = desired.usage.lastIndexOf('-');
295
0
        if (lastDashIdx > 0) {
296
0
            desired.usage.truncate(lastDashIdx);
297
0
        } else if (uprv_strcmp(desired.usage.data(), "default") != 0) {
298
0
            desired.usage.truncate(0).append("default", status);
299
0
        } else {
300
            // "default" is not supposed to be missing for any valid category.
301
0
            status = U_MISSING_RESOURCE_ERROR;
302
0
            return -1;
303
0
        }
304
0
        idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
305
0
        if (U_FAILURE(status)) { return -1; }
306
0
    }
307
0
    U_ASSERT(foundCategory);
308
0
    U_ASSERT(foundUsage);
309
0
    if (!foundRegion) {
310
0
        if (uprv_strcmp(desired.region.data(), "001") != 0) {
311
0
            desired.region.truncate(0).append("001", status);
312
0
            idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status);
313
0
        }
314
0
        if (!foundRegion) {
315
            // "001" is not supposed to be missing for any valid usage.
316
0
            status = U_MISSING_RESOURCE_ERROR;
317
0
            return -1;
318
0
        }
319
0
    }
320
0
    U_ASSERT(foundCategory);
321
0
    U_ASSERT(foundUsage);
322
0
    U_ASSERT(foundRegion);
323
0
    U_ASSERT(idx >= 0);
324
0
    return idx;
325
0
}
326
327
} // namespace
328
329
UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage,
330
                                               StringPiece region, int32_t prefsOffset,
331
0
                                               int32_t prefsCount, UErrorCode &status) {
332
0
    this->category.append(category, status);
333
0
    this->usage.append(usage, status);
334
0
    this->region.append(region, status);
335
0
    this->prefsOffset = prefsOffset;
336
0
    this->prefsCount = prefsCount;
337
0
}
338
339
0
int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const {
340
0
    int32_t cmp = uprv_strcmp(category.data(), other.category.data());
341
0
    if (cmp == 0) {
342
0
        cmp = uprv_strcmp(usage.data(), other.usage.data());
343
0
    }
344
0
    if (cmp == 0) {
345
0
        cmp = uprv_strcmp(region.data(), other.region.data());
346
0
    }
347
0
    return cmp;
348
0
}
349
350
int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory,
351
0
                                          bool *foundUsage, bool *foundRegion) const {
352
0
    int32_t cmp = uprv_strcmp(category.data(), other.category.data());
353
0
    if (cmp == 0) {
354
0
        *foundCategory = true;
355
0
        cmp = uprv_strcmp(usage.data(), other.usage.data());
356
0
    }
357
0
    if (cmp == 0) {
358
0
        *foundUsage = true;
359
0
        cmp = uprv_strcmp(region.data(), other.region.data());
360
0
    }
361
0
    if (cmp == 0) {
362
0
        *foundRegion = true;
363
0
    }
364
0
    return cmp;
365
0
}
366
367
// TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace?
368
0
void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) {
369
0
    LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
370
0
    ConversionRateDataSink sink(&result);
371
0
    ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status);
372
0
}
373
374
const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source,
375
0
                                                                 UErrorCode &status) const {
376
0
    for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) {
377
0
        if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i];
378
0
    }
379
380
0
    status = U_INTERNAL_PROGRAM_ERROR;
381
0
    return nullptr;
382
0
}
383
384
0
U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) {
385
0
    LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
386
0
    UnitPreferencesSink sink(&unitPrefs_, &metadata_);
387
0
    ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status);
388
0
}
389
390
// TODO: make outPreferences const?
391
//
392
// TODO: consider replacing `UnitPreference **&outPreferences` with slice class
393
// of some kind.
394
void U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage,
395
                                                   StringPiece region,
396
                                                   const UnitPreference *const *&outPreferences,
397
0
                                                   int32_t &preferenceCount, UErrorCode &status) const {
398
0
    int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status);
399
0
    if (U_FAILURE(status)) {
400
0
        outPreferences = nullptr;
401
0
        preferenceCount = 0;
402
0
        return;
403
0
    }
404
0
    U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`.
405
0
    const UnitPreferenceMetadata *m = metadata_[idx];
406
0
    outPreferences = unitPrefs_.getAlias() + m->prefsOffset;
407
0
    preferenceCount = m->prefsCount;
408
0
}
409
410
} // namespace units
411
U_NAMESPACE_END
412
413
#endif /* #if !UCONFIG_NO_FORMATTING */