/src/icu/icu4c/source/common/localebuilder.cpp
Line | Count | Source |
1 | | // © 2019 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | |
4 | | #include <optional> |
5 | | #include <string_view> |
6 | | #include <utility> |
7 | | |
8 | | #include "bytesinkutil.h" // StringByteSink<CharString> |
9 | | #include "charstr.h" |
10 | | #include "cstring.h" |
11 | | #include "fixedstring.h" |
12 | | #include "ulocimp.h" |
13 | | #include "unicode/localebuilder.h" |
14 | | #include "unicode/locid.h" |
15 | | |
16 | | namespace { |
17 | | |
18 | 62 | inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; } |
19 | 558 | inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); } |
20 | | |
21 | | constexpr const char* kAttributeKey = "attribute"; |
22 | | |
23 | 558 | bool _isExtensionSubtags(char key, const char* s, int32_t len) { |
24 | 558 | switch (uprv_tolower(key)) { |
25 | 0 | case 'u': |
26 | 0 | return ultag_isUnicodeExtensionSubtags(s, len); |
27 | 258 | case 't': |
28 | 258 | return ultag_isTransformedExtensionSubtags(s, len); |
29 | 38 | case 'x': |
30 | 38 | return ultag_isPrivateuseValueSubtags(s, len); |
31 | 262 | default: |
32 | 262 | return ultag_isExtensionSubtags(s, len); |
33 | 558 | } |
34 | 558 | } |
35 | | |
36 | | } // namespace |
37 | | |
38 | | U_NAMESPACE_BEGIN |
39 | | |
40 | 5.80k | LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(), |
41 | 5.80k | script_(), region_(), variant_(nullptr), extensions_(nullptr) |
42 | 5.80k | { |
43 | 5.80k | language_[0] = 0; |
44 | 5.80k | script_[0] = 0; |
45 | 5.80k | region_[0] = 0; |
46 | 5.80k | } |
47 | | |
48 | | LocaleBuilder::~LocaleBuilder() |
49 | 5.80k | { |
50 | 5.80k | delete variant_; |
51 | 5.80k | delete extensions_; |
52 | 5.80k | } |
53 | | |
54 | | LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale) |
55 | 5.39k | { |
56 | 5.39k | clear(); |
57 | 5.39k | setLanguage(locale.getLanguage()); |
58 | 5.39k | setScript(locale.getScript()); |
59 | 5.39k | setRegion(locale.getCountry()); |
60 | 5.39k | setVariant(locale.getVariant()); |
61 | 5.39k | extensions_ = locale.clone(); |
62 | 5.39k | if (extensions_ == nullptr) { |
63 | 0 | status_ = U_MEMORY_ALLOCATION_ERROR; |
64 | 0 | } |
65 | 5.39k | return *this; |
66 | 5.39k | } |
67 | | |
68 | | LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag) |
69 | 1.23k | { |
70 | 1.23k | Locale l = Locale::forLanguageTag(tag, status_); |
71 | 1.23k | if (U_FAILURE(status_)) { return *this; } |
72 | | // Because setLocale will reset status_ we need to return |
73 | | // first if we have error in forLanguageTag. |
74 | 1.14k | setLocale(l); |
75 | 1.14k | return *this; |
76 | 1.23k | } |
77 | | |
78 | | namespace { |
79 | | |
80 | | void setField(StringPiece input, char* dest, UErrorCode& errorCode, |
81 | 16.8k | bool (*test)(const char*, int32_t)) { |
82 | 16.8k | if (U_FAILURE(errorCode)) { return; } |
83 | 16.7k | if (input.empty()) { |
84 | 7.30k | dest[0] = '\0'; |
85 | 9.49k | } else if (test(input.data(), input.length())) { |
86 | 9.48k | uprv_memcpy(dest, input.data(), input.length()); |
87 | 9.48k | dest[input.length()] = '\0'; |
88 | 9.48k | } else { |
89 | 9 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
90 | 9 | } |
91 | 16.7k | } |
92 | | |
93 | | } // namespace |
94 | | |
95 | | LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language) |
96 | 5.70k | { |
97 | 5.70k | setField(language, language_, status_, &ultag_isLanguageSubtag); |
98 | 5.70k | return *this; |
99 | 5.70k | } |
100 | | |
101 | | LocaleBuilder& LocaleBuilder::setScript(StringPiece script) |
102 | 5.70k | { |
103 | 5.70k | setField(script, script_, status_, &ultag_isScriptSubtag); |
104 | 5.70k | return *this; |
105 | 5.70k | } |
106 | | |
107 | | LocaleBuilder& LocaleBuilder::setRegion(StringPiece region) |
108 | 5.39k | { |
109 | 5.39k | setField(region, region_, status_, &ultag_isRegionSubtag); |
110 | 5.39k | return *this; |
111 | 5.39k | } |
112 | | |
113 | | namespace { |
114 | | |
115 | 377 | void transform(char* data, int32_t len) { |
116 | 10.4k | for (int32_t i = 0; i < len; i++, data++) { |
117 | 10.0k | if (*data == '_') { |
118 | 773 | *data = '-'; |
119 | 9.26k | } else { |
120 | 9.26k | *data = uprv_tolower(*data); |
121 | 9.26k | } |
122 | 10.0k | } |
123 | 377 | } |
124 | | |
125 | | } // namespace |
126 | | |
127 | | LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant) |
128 | 5.39k | { |
129 | 5.39k | if (U_FAILURE(status_)) { return *this; } |
130 | 5.39k | if (variant.empty()) { |
131 | 5.17k | delete variant_; |
132 | 5.17k | variant_ = nullptr; |
133 | 5.17k | return *this; |
134 | 5.17k | } |
135 | 225 | FixedString* new_variant = new FixedString(variant); |
136 | 225 | if (new_variant == nullptr || new_variant->isEmpty()) { |
137 | 0 | status_ = U_MEMORY_ALLOCATION_ERROR; |
138 | 0 | return *this; |
139 | 0 | } |
140 | 225 | transform(new_variant->getAlias(), variant.length()); |
141 | 225 | if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) { |
142 | 10 | delete new_variant; |
143 | 10 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
144 | 10 | return *this; |
145 | 10 | } |
146 | 215 | delete variant_; |
147 | 215 | variant_ = new_variant; |
148 | 215 | return *this; |
149 | 225 | } |
150 | | |
151 | | namespace { |
152 | | |
153 | | bool |
154 | | _isKeywordValue(const char* key, const char* value, int32_t value_len) |
155 | 7.61k | { |
156 | 7.61k | if (key[1] == '\0') { |
157 | | // one char key |
158 | 558 | return (UPRV_ISALPHANUM(uprv_tolower(key[0])) && |
159 | 558 | _isExtensionSubtags(key[0], value, value_len)); |
160 | 7.05k | } else if (uprv_strcmp(key, kAttributeKey) == 0) { |
161 | | // unicode attributes |
162 | 152 | return ultag_isUnicodeLocaleAttributes(value, value_len); |
163 | 152 | } |
164 | | // otherwise: unicode extension value |
165 | | // We need to convert from legacy key/value to unicode |
166 | | // key/value |
167 | 6.90k | std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key); |
168 | 6.90k | std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value); |
169 | | |
170 | 6.90k | return unicode_locale_key.has_value() && |
171 | 6.90k | unicode_locale_type.has_value() && |
172 | 6.90k | ultag_isUnicodeLocaleKey(unicode_locale_key->data(), |
173 | 6.90k | static_cast<int32_t>(unicode_locale_key->size())) && |
174 | 6.90k | ultag_isUnicodeLocaleType(unicode_locale_type->data(), |
175 | 6.90k | static_cast<int32_t>(unicode_locale_type->size())); |
176 | 7.61k | } |
177 | | |
178 | | void |
179 | | _copyExtensions(const Locale& from, icu::StringEnumeration *keywords, |
180 | | Locale& to, bool validate, UErrorCode& errorCode) |
181 | 5.38k | { |
182 | 5.38k | if (U_FAILURE(errorCode)) { return; } |
183 | 5.38k | LocalPointer<icu::StringEnumeration> ownedKeywords; |
184 | 5.38k | if (keywords == nullptr) { |
185 | 5.38k | ownedKeywords.adoptInstead(from.createKeywords(errorCode)); |
186 | 5.38k | if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; } |
187 | 4.93k | keywords = ownedKeywords.getAlias(); |
188 | 4.93k | } |
189 | 4.93k | const char* key; |
190 | 12.4k | while ((key = keywords->next(nullptr, errorCode)) != nullptr) { |
191 | 7.61k | auto value = from.getKeywordValue<CharString>(key, errorCode); |
192 | 7.61k | if (U_FAILURE(errorCode)) { return; } |
193 | 7.61k | if (uprv_strcmp(key, kAttributeKey) == 0) { |
194 | 152 | transform(value.data(), value.length()); |
195 | 152 | } |
196 | 7.61k | if (validate && |
197 | 7.61k | !_isKeywordValue(key, value.data(), value.length())) { |
198 | 75 | errorCode = U_ILLEGAL_ARGUMENT_ERROR; |
199 | 75 | return; |
200 | 75 | } |
201 | 7.53k | to.setKeywordValue(key, value.data(), errorCode); |
202 | 7.53k | if (U_FAILURE(errorCode)) { return; } |
203 | 7.53k | } |
204 | 4.93k | } |
205 | | |
206 | | void |
207 | | _clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode) |
208 | 0 | { |
209 | 0 | if (U_FAILURE(errorCode)) { return; } |
210 | | // Clear Unicode attributes |
211 | 0 | locale.setKeywordValue(kAttributeKey, "", errorCode); |
212 | | |
213 | | // Clear all Unicode keyword values |
214 | 0 | LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode)); |
215 | 0 | if (U_FAILURE(errorCode) || iter.isNull()) { return; } |
216 | 0 | const char* key; |
217 | 0 | while ((key = iter->next(nullptr, errorCode)) != nullptr) { |
218 | 0 | locale.setUnicodeKeywordValue(key, nullptr, errorCode); |
219 | 0 | } |
220 | 0 | } |
221 | | |
222 | | void |
223 | | _setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode) |
224 | 0 | { |
225 | 0 | if (U_FAILURE(errorCode)) { return; } |
226 | | // Add the unicode extensions to extensions_ |
227 | 0 | CharString locale_str("und-u-", errorCode); |
228 | 0 | locale_str.append(value, errorCode); |
229 | 0 | _copyExtensions( |
230 | 0 | Locale::forLanguageTag(locale_str.data(), errorCode), nullptr, |
231 | 0 | locale, false, errorCode); |
232 | 0 | } |
233 | | |
234 | | } // namespace |
235 | | |
236 | | LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value) |
237 | 0 | { |
238 | 0 | if (U_FAILURE(status_)) { return *this; } |
239 | 0 | if (!UPRV_ISALPHANUM(key)) { |
240 | 0 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
241 | 0 | return *this; |
242 | 0 | } |
243 | 0 | CharString value_str(value, status_); |
244 | 0 | if (U_FAILURE(status_)) { return *this; } |
245 | 0 | transform(value_str.data(), value_str.length()); |
246 | 0 | if (!value_str.isEmpty() && |
247 | 0 | !_isExtensionSubtags(key, value_str.data(), value_str.length())) { |
248 | 0 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
249 | 0 | return *this; |
250 | 0 | } |
251 | 0 | if (extensions_ == nullptr) { |
252 | 0 | extensions_ = Locale::getRoot().clone(); |
253 | 0 | if (extensions_ == nullptr) { |
254 | 0 | status_ = U_MEMORY_ALLOCATION_ERROR; |
255 | 0 | return *this; |
256 | 0 | } |
257 | 0 | } |
258 | 0 | if (uprv_tolower(key) != 'u') { |
259 | | // for t, x and others extension. |
260 | 0 | extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(), |
261 | 0 | status_); |
262 | 0 | return *this; |
263 | 0 | } |
264 | 0 | _clearUAttributesAndKeyType(*extensions_, status_); |
265 | 0 | if (U_FAILURE(status_)) { return *this; } |
266 | 0 | if (!value.empty()) { |
267 | 0 | _setUnicodeExtensions(*extensions_, value_str, status_); |
268 | 0 | } |
269 | 0 | return *this; |
270 | 0 | } |
271 | | |
272 | | LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword( |
273 | | StringPiece key, StringPiece type) |
274 | 4.25k | { |
275 | 4.25k | if (U_FAILURE(status_)) { return *this; } |
276 | 4.25k | if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) || |
277 | 4.25k | (!type.empty() && |
278 | 4.25k | !ultag_isUnicodeLocaleType(type.data(), type.length()))) { |
279 | 0 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
280 | 0 | return *this; |
281 | 0 | } |
282 | 4.25k | if (extensions_ == nullptr) { |
283 | 0 | extensions_ = Locale::getRoot().clone(); |
284 | 0 | if (extensions_ == nullptr) { |
285 | 0 | status_ = U_MEMORY_ALLOCATION_ERROR; |
286 | 0 | return *this; |
287 | 0 | } |
288 | 0 | } |
289 | 4.25k | extensions_->setUnicodeKeywordValue(key, type, status_); |
290 | 4.25k | return *this; |
291 | 4.25k | } |
292 | | |
293 | | LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute( |
294 | | StringPiece value) |
295 | 0 | { |
296 | 0 | CharString value_str(value, status_); |
297 | 0 | if (U_FAILURE(status_)) { return *this; } |
298 | 0 | transform(value_str.data(), value_str.length()); |
299 | 0 | if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { |
300 | 0 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
301 | 0 | return *this; |
302 | 0 | } |
303 | 0 | if (extensions_ == nullptr) { |
304 | 0 | extensions_ = Locale::getRoot().clone(); |
305 | 0 | if (extensions_ == nullptr) { |
306 | 0 | status_ = U_MEMORY_ALLOCATION_ERROR; |
307 | 0 | return *this; |
308 | 0 | } |
309 | 0 | extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_); |
310 | 0 | return *this; |
311 | 0 | } |
312 | | |
313 | 0 | UErrorCode localErrorCode = U_ZERO_ERROR; |
314 | 0 | auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode); |
315 | 0 | if (U_FAILURE(localErrorCode)) { |
316 | 0 | CharString new_attributes(value_str.data(), status_); |
317 | | // No attributes, set the attribute. |
318 | 0 | extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); |
319 | 0 | return *this; |
320 | 0 | } |
321 | | |
322 | 0 | transform(attributes.data(),attributes.length()); |
323 | 0 | const char* start = attributes.data(); |
324 | 0 | const char* limit = attributes.data() + attributes.length(); |
325 | 0 | CharString new_attributes; |
326 | 0 | bool inserted = false; |
327 | 0 | while (start < limit) { |
328 | 0 | if (!inserted) { |
329 | 0 | int cmp = uprv_strcmp(start, value_str.data()); |
330 | 0 | if (cmp == 0) { return *this; } // Found it in attributes: Just return |
331 | 0 | if (cmp > 0) { |
332 | 0 | if (!new_attributes.isEmpty()) new_attributes.append('_', status_); |
333 | 0 | new_attributes.append(value_str.data(), status_); |
334 | 0 | inserted = true; |
335 | 0 | } |
336 | 0 | } |
337 | 0 | if (!new_attributes.isEmpty()) { |
338 | 0 | new_attributes.append('_', status_); |
339 | 0 | } |
340 | 0 | new_attributes.append(start, status_); |
341 | 0 | start += uprv_strlen(start) + 1; |
342 | 0 | } |
343 | 0 | if (!inserted) { |
344 | 0 | if (!new_attributes.isEmpty()) { |
345 | 0 | new_attributes.append('_', status_); |
346 | 0 | } |
347 | 0 | new_attributes.append(value_str.data(), status_); |
348 | 0 | } |
349 | | // Not yet in the attributes, set the attribute. |
350 | 0 | extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); |
351 | 0 | return *this; |
352 | 0 | } |
353 | | |
354 | | LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute( |
355 | | StringPiece value) |
356 | 0 | { |
357 | 0 | CharString value_str(value, status_); |
358 | 0 | if (U_FAILURE(status_)) { return *this; } |
359 | 0 | transform(value_str.data(), value_str.length()); |
360 | 0 | if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { |
361 | 0 | status_ = U_ILLEGAL_ARGUMENT_ERROR; |
362 | 0 | return *this; |
363 | 0 | } |
364 | 0 | if (extensions_ == nullptr) { return *this; } |
365 | 0 | UErrorCode localErrorCode = U_ZERO_ERROR; |
366 | 0 | auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode); |
367 | | // get failure, just return |
368 | 0 | if (U_FAILURE(localErrorCode)) { return *this; } |
369 | | // Do not have any attributes, just return. |
370 | 0 | if (attributes.isEmpty()) { return *this; } |
371 | | |
372 | 0 | char* p = attributes.data(); |
373 | | // Replace null terminiator in place for _ and - so later |
374 | | // we can use uprv_strcmp to compare. |
375 | 0 | for (int32_t i = 0; i < attributes.length(); i++, p++) { |
376 | 0 | *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p); |
377 | 0 | } |
378 | |
|
379 | 0 | const char* start = attributes.data(); |
380 | 0 | const char* limit = attributes.data() + attributes.length(); |
381 | 0 | CharString new_attributes; |
382 | 0 | bool found = false; |
383 | 0 | while (start < limit) { |
384 | 0 | if (uprv_strcmp(start, value_str.data()) == 0) { |
385 | 0 | found = true; |
386 | 0 | } else { |
387 | 0 | if (!new_attributes.isEmpty()) { |
388 | 0 | new_attributes.append('_', status_); |
389 | 0 | } |
390 | 0 | new_attributes.append(start, status_); |
391 | 0 | } |
392 | 0 | start += uprv_strlen(start) + 1; |
393 | 0 | } |
394 | | // Found the value in attributes, set the attribute. |
395 | 0 | if (found) { |
396 | 0 | extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); |
397 | 0 | } |
398 | 0 | return *this; |
399 | 0 | } |
400 | | |
401 | | LocaleBuilder& LocaleBuilder::clear() |
402 | 5.39k | { |
403 | 5.39k | status_ = U_ZERO_ERROR; |
404 | 5.39k | language_[0] = 0; |
405 | 5.39k | script_[0] = 0; |
406 | 5.39k | region_[0] = 0; |
407 | 5.39k | delete variant_; |
408 | 5.39k | variant_ = nullptr; |
409 | 5.39k | clearExtensions(); |
410 | 5.39k | return *this; |
411 | 5.39k | } |
412 | | |
413 | | LocaleBuilder& LocaleBuilder::clearExtensions() |
414 | 5.39k | { |
415 | 5.39k | delete extensions_; |
416 | 5.39k | extensions_ = nullptr; |
417 | 5.39k | return *this; |
418 | 5.39k | } |
419 | | |
420 | 189 | Locale makeBogusLocale() { |
421 | 189 | Locale bogus; |
422 | 189 | bogus.setToBogus(); |
423 | 189 | return bogus; |
424 | 189 | } |
425 | | |
426 | | void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode) |
427 | 0 | { |
428 | 0 | if (U_FAILURE(errorCode)) { return; } |
429 | 0 | LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode)); |
430 | 0 | if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) { |
431 | | // Error, or no extensions to copy. |
432 | 0 | return; |
433 | 0 | } |
434 | 0 | if (extensions_ == nullptr) { |
435 | 0 | extensions_ = Locale::getRoot().clone(); |
436 | 0 | if (extensions_ == nullptr) { |
437 | 0 | status_ = U_MEMORY_ALLOCATION_ERROR; |
438 | 0 | return; |
439 | 0 | } |
440 | 0 | } |
441 | 0 | _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode); |
442 | 0 | } |
443 | | |
444 | | Locale LocaleBuilder::build(UErrorCode& errorCode) |
445 | 5.80k | { |
446 | 5.80k | if (U_FAILURE(errorCode)) { |
447 | 0 | return makeBogusLocale(); |
448 | 0 | } |
449 | 5.80k | if (U_FAILURE(status_)) { |
450 | 114 | errorCode = status_; |
451 | 114 | return makeBogusLocale(); |
452 | 114 | } |
453 | 5.68k | CharString locale_str(language_, errorCode); |
454 | 5.68k | if (uprv_strlen(script_) > 0) { |
455 | 914 | locale_str.append('-', errorCode).append(StringPiece(script_), errorCode); |
456 | 914 | } |
457 | 5.68k | if (uprv_strlen(region_) > 0) { |
458 | 2.94k | locale_str.append('-', errorCode).append(StringPiece(region_), errorCode); |
459 | 2.94k | } |
460 | 5.68k | if (variant_ != nullptr) { |
461 | 215 | locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode); |
462 | 215 | } |
463 | 5.68k | if (U_FAILURE(errorCode)) { |
464 | 0 | return makeBogusLocale(); |
465 | 0 | } |
466 | 5.68k | Locale product(locale_str.data()); |
467 | 5.68k | if (extensions_ != nullptr) { |
468 | 5.38k | _copyExtensions(*extensions_, nullptr, product, true, errorCode); |
469 | 5.38k | } |
470 | 5.68k | if (U_FAILURE(errorCode)) { |
471 | 75 | return makeBogusLocale(); |
472 | 75 | } |
473 | 5.61k | return product; |
474 | 5.68k | } |
475 | | |
476 | 0 | UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const { |
477 | 0 | if (U_FAILURE(outErrorCode)) { |
478 | | // Do not overwrite the older error code |
479 | 0 | return true; |
480 | 0 | } |
481 | 0 | outErrorCode = status_; |
482 | 0 | return U_FAILURE(outErrorCode); |
483 | 0 | } |
484 | | |
485 | | U_NAMESPACE_END |