/src/icu/source/i18n/units_data.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2020 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | #include "unicode/utypes.h" |
5 | | |
6 | | #if !UCONFIG_NO_FORMATTING |
7 | | |
8 | | #include "cstring.h" |
9 | | #include "number_decimalquantity.h" |
10 | | #include "resource.h" |
11 | | #include "uassert.h" |
12 | | #include "unicode/unistr.h" |
13 | | #include "unicode/ures.h" |
14 | | #include "units_data.h" |
15 | | #include "uresimp.h" |
16 | | #include "util.h" |
17 | | #include <utility> |
18 | | |
19 | | U_NAMESPACE_BEGIN |
20 | | namespace units { |
21 | | |
22 | | namespace { |
23 | | |
24 | | using icu::number::impl::DecimalQuantity; |
25 | | |
26 | 0 | void trimSpaces(CharString& factor, UErrorCode& status){ |
27 | 0 | CharString trimmed; |
28 | 0 | for (int i = 0 ; i < factor.length(); i++) { |
29 | 0 | if (factor[i] == ' ') continue; |
30 | | |
31 | 0 | trimmed.append(factor[i], status); |
32 | 0 | } |
33 | |
|
34 | 0 | factor = std::move(trimmed); |
35 | 0 | } |
36 | | |
37 | | /** |
38 | | * A ResourceSink that collects conversion rate information. |
39 | | * |
40 | | * This class is for use by ures_getAllItemsWithFallback. |
41 | | */ |
42 | | class ConversionRateDataSink : public ResourceSink { |
43 | | public: |
44 | | /** |
45 | | * Constructor. |
46 | | * @param out The vector to which ConversionRateInfo instances are to be |
47 | | * added. This vector must outlive the use of the ResourceSink. |
48 | | */ |
49 | 0 | explicit ConversionRateDataSink(MaybeStackVector<ConversionRateInfo> *out) : outVector(out) {} |
50 | | |
51 | | /** |
52 | | * Method for use by `ures_getAllItemsWithFallback`. Adds the unit |
53 | | * conversion rates that are found in `value` to the output vector. |
54 | | * |
55 | | * @param source This string must be "convertUnits": the resource that this |
56 | | * class supports reading. |
57 | | * @param value The "convertUnits" resource, containing unit conversion rate |
58 | | * information. |
59 | | * @param noFallback Ignored. |
60 | | * @param status The standard ICU error code output parameter. |
61 | | */ |
62 | 0 | void put(const char *source, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) { |
63 | 0 | if (U_FAILURE(status)) { return; } |
64 | 0 | if (uprv_strcmp(source, "convertUnits") != 0) { |
65 | | // This is very strict, however it is the cheapest way to be sure |
66 | | // that with `value`, we're looking at the convertUnits table. |
67 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; |
68 | 0 | return; |
69 | 0 | } |
70 | 0 | ResourceTable conversionRateTable = value.getTable(status); |
71 | 0 | const char *srcUnit; |
72 | | // We're reusing `value`, which seems to be a common pattern: |
73 | 0 | for (int32_t unit = 0; conversionRateTable.getKeyAndValue(unit, srcUnit, value); unit++) { |
74 | 0 | ResourceTable unitTable = value.getTable(status); |
75 | 0 | const char *key; |
76 | 0 | UnicodeString baseUnit = ICU_Utility::makeBogusString(); |
77 | 0 | UnicodeString factor = ICU_Utility::makeBogusString(); |
78 | 0 | UnicodeString offset = ICU_Utility::makeBogusString(); |
79 | 0 | for (int32_t i = 0; unitTable.getKeyAndValue(i, key, value); i++) { |
80 | 0 | if (uprv_strcmp(key, "target") == 0) { |
81 | 0 | baseUnit = value.getUnicodeString(status); |
82 | 0 | } else if (uprv_strcmp(key, "factor") == 0) { |
83 | 0 | factor = value.getUnicodeString(status); |
84 | 0 | } else if (uprv_strcmp(key, "offset") == 0) { |
85 | 0 | offset = value.getUnicodeString(status); |
86 | 0 | } |
87 | 0 | } |
88 | 0 | if (U_FAILURE(status)) { return; } |
89 | 0 | if (baseUnit.isBogus() || factor.isBogus()) { |
90 | | // We could not find a usable conversion rate: bad resource. |
91 | 0 | status = U_MISSING_RESOURCE_ERROR; |
92 | 0 | return; |
93 | 0 | } |
94 | | |
95 | | // We don't have this ConversionRateInfo yet: add it. |
96 | 0 | ConversionRateInfo *cr = outVector->emplaceBack(); |
97 | 0 | if (!cr) { |
98 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
99 | 0 | return; |
100 | 0 | } else { |
101 | 0 | cr->sourceUnit.append(srcUnit, status); |
102 | 0 | cr->baseUnit.appendInvariantChars(baseUnit, status); |
103 | 0 | cr->factor.appendInvariantChars(factor, status); |
104 | 0 | trimSpaces(cr->factor, status); |
105 | 0 | if (!offset.isBogus()) cr->offset.appendInvariantChars(offset, status); |
106 | 0 | } |
107 | 0 | } |
108 | 0 | return; |
109 | 0 | } |
110 | | |
111 | | private: |
112 | | MaybeStackVector<ConversionRateInfo> *outVector; |
113 | | }; |
114 | | |
115 | 0 | bool operator<(const UnitPreferenceMetadata &a, const UnitPreferenceMetadata &b) { |
116 | 0 | return a.compareTo(b) < 0; |
117 | 0 | } |
118 | | |
119 | | /** |
120 | | * A ResourceSink that collects unit preferences information. |
121 | | * |
122 | | * This class is for use by ures_getAllItemsWithFallback. |
123 | | */ |
124 | | class UnitPreferencesSink : public ResourceSink { |
125 | | public: |
126 | | /** |
127 | | * Constructor. |
128 | | * @param outPrefs The vector to which UnitPreference instances are to be |
129 | | * added. This vector must outlive the use of the ResourceSink. |
130 | | * @param outMetadata The vector to which UnitPreferenceMetadata instances |
131 | | * are to be added. This vector must outlive the use of the ResourceSink. |
132 | | */ |
133 | | explicit UnitPreferencesSink(MaybeStackVector<UnitPreference> *outPrefs, |
134 | | MaybeStackVector<UnitPreferenceMetadata> *outMetadata) |
135 | 0 | : preferences(outPrefs), metadata(outMetadata) {} |
136 | | |
137 | | /** |
138 | | * Method for use by `ures_getAllItemsWithFallback`. Adds the unit |
139 | | * preferences info that are found in `value` to the output vector. |
140 | | * |
141 | | * @param source This string must be "unitPreferenceData": the resource that |
142 | | * this class supports reading. |
143 | | * @param value The "unitPreferenceData" resource, containing unit |
144 | | * preferences data. |
145 | | * @param noFallback Ignored. |
146 | | * @param status The standard ICU error code output parameter. Note: if an |
147 | | * error is returned, outPrefs and outMetadata may be inconsistent. |
148 | | */ |
149 | 0 | void put(const char *key, ResourceValue &value, UBool /*noFallback*/, UErrorCode &status) { |
150 | 0 | if (U_FAILURE(status)) { return; } |
151 | 0 | if (uprv_strcmp(key, "unitPreferenceData") != 0) { |
152 | | // This is very strict, however it is the cheapest way to be sure |
153 | | // that with `value`, we're looking at the convertUnits table. |
154 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; |
155 | 0 | return; |
156 | 0 | } |
157 | | // The unitPreferenceData structure (see data/misc/units.txt) contains a |
158 | | // hierarchy of category/usage/region, within which are a set of |
159 | | // preferences. Hence three for-loops and another loop for the |
160 | | // preferences themselves: |
161 | 0 | ResourceTable unitPreferenceDataTable = value.getTable(status); |
162 | 0 | const char *category; |
163 | 0 | for (int32_t i = 0; unitPreferenceDataTable.getKeyAndValue(i, category, value); i++) { |
164 | 0 | ResourceTable categoryTable = value.getTable(status); |
165 | 0 | const char *usage; |
166 | 0 | for (int32_t j = 0; categoryTable.getKeyAndValue(j, usage, value); j++) { |
167 | 0 | ResourceTable regionTable = value.getTable(status); |
168 | 0 | const char *region; |
169 | 0 | for (int32_t k = 0; regionTable.getKeyAndValue(k, region, value); k++) { |
170 | | // `value` now contains the set of preferences for |
171 | | // category/usage/region. |
172 | 0 | ResourceArray unitPrefs = value.getArray(status); |
173 | 0 | if (U_FAILURE(status)) { return; } |
174 | 0 | int32_t prefLen = unitPrefs.getSize(); |
175 | | |
176 | | // Update metadata for this set of preferences. |
177 | 0 | UnitPreferenceMetadata *meta = metadata->emplaceBack( |
178 | 0 | category, usage, region, preferences->length(), prefLen, status); |
179 | 0 | if (!meta) { |
180 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
181 | 0 | return; |
182 | 0 | } |
183 | 0 | if (U_FAILURE(status)) { return; } |
184 | 0 | if (metadata->length() > 1) { |
185 | | // Verify that unit preferences are sorted and |
186 | | // without duplicates. |
187 | 0 | if (!(*(*metadata)[metadata->length() - 2] < |
188 | 0 | *(*metadata)[metadata->length() - 1])) { |
189 | 0 | status = U_INVALID_FORMAT_ERROR; |
190 | 0 | return; |
191 | 0 | } |
192 | 0 | } |
193 | | |
194 | | // Collect the individual preferences. |
195 | 0 | for (int32_t i = 0; unitPrefs.getValue(i, value); i++) { |
196 | 0 | UnitPreference *up = preferences->emplaceBack(); |
197 | 0 | if (!up) { |
198 | 0 | status = U_MEMORY_ALLOCATION_ERROR; |
199 | 0 | return; |
200 | 0 | } |
201 | 0 | ResourceTable unitPref = value.getTable(status); |
202 | 0 | if (U_FAILURE(status)) { return; } |
203 | 0 | for (int32_t i = 0; unitPref.getKeyAndValue(i, key, value); ++i) { |
204 | 0 | if (uprv_strcmp(key, "unit") == 0) { |
205 | 0 | int32_t length; |
206 | 0 | const UChar *u = value.getString(length, status); |
207 | 0 | up->unit.appendInvariantChars(u, length, status); |
208 | 0 | } else if (uprv_strcmp(key, "geq") == 0) { |
209 | 0 | int32_t length; |
210 | 0 | const UChar *g = value.getString(length, status); |
211 | 0 | CharString geq; |
212 | 0 | geq.appendInvariantChars(g, length, status); |
213 | 0 | DecimalQuantity dq; |
214 | 0 | dq.setToDecNumber(geq.data(), status); |
215 | 0 | up->geq = dq.toDouble(); |
216 | 0 | } else if (uprv_strcmp(key, "skeleton") == 0) { |
217 | 0 | up->skeleton = value.getUnicodeString(status); |
218 | 0 | } |
219 | 0 | } |
220 | 0 | } |
221 | 0 | } |
222 | 0 | } |
223 | 0 | } |
224 | 0 | } |
225 | | |
226 | | private: |
227 | | MaybeStackVector<UnitPreference> *preferences; |
228 | | MaybeStackVector<UnitPreferenceMetadata> *metadata; |
229 | | }; |
230 | | |
231 | | int32_t binarySearch(const MaybeStackVector<UnitPreferenceMetadata> *metadata, |
232 | | const UnitPreferenceMetadata &desired, bool *foundCategory, bool *foundUsage, |
233 | 0 | bool *foundRegion, UErrorCode &status) { |
234 | 0 | if (U_FAILURE(status)) { return -1; } |
235 | 0 | int32_t start = 0; |
236 | 0 | int32_t end = metadata->length(); |
237 | 0 | *foundCategory = false; |
238 | 0 | *foundUsage = false; |
239 | 0 | *foundRegion = false; |
240 | 0 | while (start < end) { |
241 | 0 | int32_t mid = (start + end) / 2; |
242 | 0 | int32_t cmp = (*metadata)[mid]->compareTo(desired, foundCategory, foundUsage, foundRegion); |
243 | 0 | if (cmp < 0) { |
244 | 0 | start = mid + 1; |
245 | 0 | } else if (cmp > 0) { |
246 | 0 | end = mid; |
247 | 0 | } else { |
248 | 0 | return mid; |
249 | 0 | } |
250 | 0 | } |
251 | 0 | return -1; |
252 | 0 | } |
253 | | |
254 | | /** |
255 | | * Finds the UnitPreferenceMetadata instance that matches the given category, |
256 | | * usage and region: if missing, region falls back to "001", and usage |
257 | | * repeatedly drops tailing components, eventually trying "default" |
258 | | * ("land-agriculture-grain" -> "land-agriculture" -> "land" -> "default"). |
259 | | * |
260 | | * @param metadata The full list of UnitPreferenceMetadata instances. |
261 | | * @param category The category to search for. See getUnitCategory(). |
262 | | * @param usage The usage for which formatting preferences is needed. If the |
263 | | * given usage is not known, automatic fallback occurs, see function description |
264 | | * above. |
265 | | * @param region The region for which preferences are needed. If there are no |
266 | | * region-specific preferences, this function automatically falls back to the |
267 | | * "001" region (global). |
268 | | * @param status The standard ICU error code output parameter. |
269 | | * * If an invalid category is given, status will be U_ILLEGAL_ARGUMENT_ERROR. |
270 | | * * If fallback to "default" or "001" didn't resolve, status will be |
271 | | * U_MISSING_RESOURCE. |
272 | | * @return The index into the metadata vector which represents the appropriate |
273 | | * preferences. If appropriate preferences are not found, -1 is returned. |
274 | | */ |
275 | | int32_t getPreferenceMetadataIndex(const MaybeStackVector<UnitPreferenceMetadata> *metadata, |
276 | | StringPiece category, StringPiece usage, StringPiece region, |
277 | 0 | UErrorCode &status) { |
278 | 0 | if (U_FAILURE(status)) { return -1; } |
279 | 0 | bool foundCategory, foundUsage, foundRegion; |
280 | 0 | UnitPreferenceMetadata desired(category, usage, region, -1, -1, status); |
281 | 0 | int32_t idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); |
282 | 0 | if (U_FAILURE(status)) { return -1; } |
283 | 0 | if (idx >= 0) { return idx; } |
284 | 0 | if (!foundCategory) { |
285 | | // TODO: failures can happen if units::getUnitCategory returns a category |
286 | | // that does not appear in unitPreferenceData. Do we want a unit test that |
287 | | // checks unitPreferenceData has full coverage of categories? Or just trust |
288 | | // CLDR? |
289 | 0 | status = U_ILLEGAL_ARGUMENT_ERROR; |
290 | 0 | return -1; |
291 | 0 | } |
292 | 0 | U_ASSERT(foundCategory); |
293 | 0 | while (!foundUsage) { |
294 | 0 | int32_t lastDashIdx = desired.usage.lastIndexOf('-'); |
295 | 0 | if (lastDashIdx > 0) { |
296 | 0 | desired.usage.truncate(lastDashIdx); |
297 | 0 | } else if (uprv_strcmp(desired.usage.data(), "default") != 0) { |
298 | 0 | desired.usage.truncate(0).append("default", status); |
299 | 0 | } else { |
300 | | // "default" is not supposed to be missing for any valid category. |
301 | 0 | status = U_MISSING_RESOURCE_ERROR; |
302 | 0 | return -1; |
303 | 0 | } |
304 | 0 | idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); |
305 | 0 | if (U_FAILURE(status)) { return -1; } |
306 | 0 | } |
307 | 0 | U_ASSERT(foundCategory); |
308 | 0 | U_ASSERT(foundUsage); |
309 | 0 | if (!foundRegion) { |
310 | 0 | if (uprv_strcmp(desired.region.data(), "001") != 0) { |
311 | 0 | desired.region.truncate(0).append("001", status); |
312 | 0 | idx = binarySearch(metadata, desired, &foundCategory, &foundUsage, &foundRegion, status); |
313 | 0 | } |
314 | 0 | if (!foundRegion) { |
315 | | // "001" is not supposed to be missing for any valid usage. |
316 | 0 | status = U_MISSING_RESOURCE_ERROR; |
317 | 0 | return -1; |
318 | 0 | } |
319 | 0 | } |
320 | 0 | U_ASSERT(foundCategory); |
321 | 0 | U_ASSERT(foundUsage); |
322 | 0 | U_ASSERT(foundRegion); |
323 | 0 | U_ASSERT(idx >= 0); |
324 | 0 | return idx; |
325 | 0 | } |
326 | | |
327 | | } // namespace |
328 | | |
329 | | UnitPreferenceMetadata::UnitPreferenceMetadata(StringPiece category, StringPiece usage, |
330 | | StringPiece region, int32_t prefsOffset, |
331 | 0 | int32_t prefsCount, UErrorCode &status) { |
332 | 0 | this->category.append(category, status); |
333 | 0 | this->usage.append(usage, status); |
334 | 0 | this->region.append(region, status); |
335 | 0 | this->prefsOffset = prefsOffset; |
336 | 0 | this->prefsCount = prefsCount; |
337 | 0 | } |
338 | | |
339 | 0 | int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other) const { |
340 | 0 | int32_t cmp = uprv_strcmp(category.data(), other.category.data()); |
341 | 0 | if (cmp == 0) { |
342 | 0 | cmp = uprv_strcmp(usage.data(), other.usage.data()); |
343 | 0 | } |
344 | 0 | if (cmp == 0) { |
345 | 0 | cmp = uprv_strcmp(region.data(), other.region.data()); |
346 | 0 | } |
347 | 0 | return cmp; |
348 | 0 | } |
349 | | |
350 | | int32_t UnitPreferenceMetadata::compareTo(const UnitPreferenceMetadata &other, bool *foundCategory, |
351 | 0 | bool *foundUsage, bool *foundRegion) const { |
352 | 0 | int32_t cmp = uprv_strcmp(category.data(), other.category.data()); |
353 | 0 | if (cmp == 0) { |
354 | 0 | *foundCategory = true; |
355 | 0 | cmp = uprv_strcmp(usage.data(), other.usage.data()); |
356 | 0 | } |
357 | 0 | if (cmp == 0) { |
358 | 0 | *foundUsage = true; |
359 | 0 | cmp = uprv_strcmp(region.data(), other.region.data()); |
360 | 0 | } |
361 | 0 | if (cmp == 0) { |
362 | 0 | *foundRegion = true; |
363 | 0 | } |
364 | 0 | return cmp; |
365 | 0 | } |
366 | | |
367 | | // TODO: this may be unnecessary. Fold into ConversionRates class? Or move to anonymous namespace? |
368 | 0 | void U_I18N_API getAllConversionRates(MaybeStackVector<ConversionRateInfo> &result, UErrorCode &status) { |
369 | 0 | LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); |
370 | 0 | ConversionRateDataSink sink(&result); |
371 | 0 | ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", sink, status); |
372 | 0 | } |
373 | | |
374 | | const ConversionRateInfo *ConversionRates::extractConversionInfo(StringPiece source, |
375 | 0 | UErrorCode &status) const { |
376 | 0 | for (size_t i = 0, n = conversionInfo_.length(); i < n; ++i) { |
377 | 0 | if (conversionInfo_[i]->sourceUnit.toStringPiece() == source) return conversionInfo_[i]; |
378 | 0 | } |
379 | | |
380 | 0 | status = U_INTERNAL_PROGRAM_ERROR; |
381 | 0 | return nullptr; |
382 | 0 | } |
383 | | |
384 | 0 | U_I18N_API UnitPreferences::UnitPreferences(UErrorCode &status) { |
385 | 0 | LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status)); |
386 | 0 | UnitPreferencesSink sink(&unitPrefs_, &metadata_); |
387 | 0 | ures_getAllItemsWithFallback(unitsBundle.getAlias(), "unitPreferenceData", sink, status); |
388 | 0 | } |
389 | | |
390 | | // TODO: make outPreferences const? |
391 | | // |
392 | | // TODO: consider replacing `UnitPreference **&outPreferences` with slice class |
393 | | // of some kind. |
394 | | void U_I18N_API UnitPreferences::getPreferencesFor(StringPiece category, StringPiece usage, |
395 | | StringPiece region, |
396 | | const UnitPreference *const *&outPreferences, |
397 | 0 | int32_t &preferenceCount, UErrorCode &status) const { |
398 | 0 | int32_t idx = getPreferenceMetadataIndex(&metadata_, category, usage, region, status); |
399 | 0 | if (U_FAILURE(status)) { |
400 | 0 | outPreferences = nullptr; |
401 | 0 | preferenceCount = 0; |
402 | 0 | return; |
403 | 0 | } |
404 | 0 | U_ASSERT(idx >= 0); // Failures should have been taken care of by `status`. |
405 | 0 | const UnitPreferenceMetadata *m = metadata_[idx]; |
406 | 0 | outPreferences = unitPrefs_.getAlias() + m->prefsOffset; |
407 | 0 | preferenceCount = m->prefsCount; |
408 | 0 | } |
409 | | |
410 | | } // namespace units |
411 | | U_NAMESPACE_END |
412 | | |
413 | | #endif /* #if !UCONFIG_NO_FORMATTING */ |