Coverage Report

Created: 2026-06-13 06:44

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/source/common/localebuilder.cpp
Line
Count
Source
1
// © 2019 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include <optional>
5
#include <string_view>
6
#include <utility>
7
8
#include "bytesinkutil.h"  // StringByteSink<CharString>
9
#include "charstr.h"
10
#include "cstring.h"
11
#include "fixedstring.h"
12
#include "ulocimp.h"
13
#include "unicode/localebuilder.h"
14
#include "unicode/locid.h"
15
16
namespace {
17
18
0
inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
19
0
inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
20
21
constexpr const char* kAttributeKey = "attribute";
22
23
0
bool _isExtensionSubtags(char key, const char* s, int32_t len) {
24
0
    switch (uprv_tolower(key)) {
25
0
        case 'u':
26
0
            return ultag_isUnicodeExtensionSubtags(s, len);
27
0
        case 't':
28
0
            return ultag_isTransformedExtensionSubtags(s, len);
29
0
        case 'x':
30
0
            return ultag_isPrivateuseValueSubtags(s, len);
31
0
        default:
32
0
            return ultag_isExtensionSubtags(s, len);
33
0
    }
34
0
}
35
36
}  // namespace
37
38
U_NAMESPACE_BEGIN
39
40
0
LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
41
0
    script_(), region_(), variant_(nullptr), extensions_(nullptr)
42
0
{
43
0
    language_[0] = 0;
44
0
    script_[0] = 0;
45
0
    region_[0] = 0;
46
0
}
47
48
LocaleBuilder::~LocaleBuilder()
49
0
{
50
0
    delete variant_;
51
0
    delete extensions_;
52
0
}
53
54
LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
55
0
{
56
0
    clear();
57
0
    setLanguage(locale.getLanguage());
58
0
    setScript(locale.getScript());
59
0
    setRegion(locale.getCountry());
60
0
    setVariant(locale.getVariant());
61
0
    extensions_ = locale.clone();
62
0
    if (extensions_ == nullptr) {
63
0
        status_ = U_MEMORY_ALLOCATION_ERROR;
64
0
    }
65
0
    return *this;
66
0
}
67
68
LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
69
0
{
70
0
    Locale l = Locale::forLanguageTag(tag, status_);
71
0
    if (U_FAILURE(status_)) { return *this; }
72
    // Because setLocale will reset status_ we need to return
73
    // first if we have error in forLanguageTag.
74
0
    setLocale(l);
75
0
    return *this;
76
0
}
77
78
namespace {
79
80
void setField(StringPiece input, char* dest, UErrorCode& errorCode,
81
0
              bool (*test)(const char*, int32_t)) {
82
0
    if (U_FAILURE(errorCode)) { return; }
83
0
    if (input.empty()) {
84
0
        dest[0] = '\0';
85
0
    } else if (test(input.data(), input.length())) {
86
0
        uprv_memcpy(dest, input.data(), input.length());
87
0
        dest[input.length()] = '\0';
88
0
    } else {
89
0
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
90
0
    }
91
0
}
92
93
}  // namespace
94
95
LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
96
0
{
97
0
    setField(language, language_, status_, &ultag_isLanguageSubtag);
98
0
    return *this;
99
0
}
100
101
LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
102
0
{
103
0
    setField(script, script_, status_, &ultag_isScriptSubtag);
104
0
    return *this;
105
0
}
106
107
LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
108
0
{
109
0
    setField(region, region_, status_, &ultag_isRegionSubtag);
110
0
    return *this;
111
0
}
112
113
namespace {
114
115
0
void transform(char* data, int32_t len) {
116
0
    for (int32_t i = 0; i < len; i++, data++) {
117
0
        if (*data == '_') {
118
0
            *data = '-';
119
0
        } else {
120
0
            *data = uprv_tolower(*data);
121
0
        }
122
0
    }
123
0
}
124
125
}  // namespace
126
127
LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
128
0
{
129
0
    if (U_FAILURE(status_)) { return *this; }
130
0
    if (variant.empty()) {
131
0
        delete variant_;
132
0
        variant_ = nullptr;
133
0
        return *this;
134
0
    }
135
0
    FixedString* new_variant = new FixedString(variant);
136
0
    if (new_variant == nullptr || new_variant->isEmpty()) {
137
0
        status_ = U_MEMORY_ALLOCATION_ERROR;
138
0
        return *this;
139
0
    }
140
0
    transform(new_variant->getAlias(), variant.length());
141
0
    if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) {
142
0
        delete new_variant;
143
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
144
0
        return *this;
145
0
    }
146
0
    delete variant_;
147
0
    variant_ = new_variant;
148
0
    return *this;
149
0
}
150
151
namespace {
152
153
bool
154
_isKeywordValue(const char* key, const char* value, int32_t value_len)
155
0
{
156
0
    if (key[1] == '\0') {
157
        // one char key
158
0
        return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
159
0
                _isExtensionSubtags(key[0], value, value_len));
160
0
    } else if (uprv_strcmp(key, kAttributeKey) == 0) {
161
        // unicode attributes
162
0
        return ultag_isUnicodeLocaleAttributes(value, value_len);
163
0
    }
164
    // otherwise: unicode extension value
165
    // We need to convert from legacy key/value to unicode
166
    // key/value
167
0
    std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);
168
0
    std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);
169
170
0
    return unicode_locale_key.has_value() &&
171
0
           unicode_locale_type.has_value() &&
172
0
           ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
173
0
                                    static_cast<int32_t>(unicode_locale_key->size())) &&
174
0
           ultag_isUnicodeLocaleType(unicode_locale_type->data(),
175
0
                                     static_cast<int32_t>(unicode_locale_type->size()));
176
0
}
177
178
void
179
_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
180
                Locale& to, bool validate, UErrorCode& errorCode)
181
0
{
182
0
    if (U_FAILURE(errorCode)) { return; }
183
0
    LocalPointer<icu::StringEnumeration> ownedKeywords;
184
0
    if (keywords == nullptr) {
185
0
        ownedKeywords.adoptInstead(from.createKeywords(errorCode));
186
0
        if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
187
0
        keywords = ownedKeywords.getAlias();
188
0
    }
189
0
    const char* key;
190
0
    while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
191
0
        auto value = from.getKeywordValue<CharString>(key, errorCode);
192
0
        if (U_FAILURE(errorCode)) { return; }
193
0
        if (uprv_strcmp(key, kAttributeKey) == 0) {
194
0
            transform(value.data(), value.length());
195
0
        }
196
0
        if (validate &&
197
0
            !_isKeywordValue(key, value.data(), value.length())) {
198
0
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
199
0
            return;
200
0
        }
201
0
        to.setKeywordValue(key, value.data(), errorCode);
202
0
        if (U_FAILURE(errorCode)) { return; }
203
0
    }
204
0
}
205
206
void
207
_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
208
0
{
209
0
    if (U_FAILURE(errorCode)) { return; }
210
    // Clear Unicode attributes
211
0
    locale.setKeywordValue(kAttributeKey, "", errorCode);
212
213
    // Clear all Unicode keyword values
214
0
    LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
215
0
    if (U_FAILURE(errorCode) || iter.isNull()) { return; }
216
0
    const char* key;
217
0
    while ((key = iter->next(nullptr, errorCode)) != nullptr) {
218
0
        locale.setUnicodeKeywordValue(key, nullptr, errorCode);
219
0
    }
220
0
}
221
222
void
223
_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
224
0
{
225
0
    if (U_FAILURE(errorCode)) { return; }
226
    // Add the unicode extensions to extensions_
227
0
    CharString locale_str("und-u-", errorCode);
228
0
    locale_str.append(value, errorCode);
229
0
    _copyExtensions(
230
0
        Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
231
0
        locale, false, errorCode);
232
0
}
233
234
}  // namespace
235
236
LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
237
0
{
238
0
    if (U_FAILURE(status_)) { return *this; }
239
0
    if (!UPRV_ISALPHANUM(key)) {
240
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
241
0
        return *this;
242
0
    }
243
0
    CharString value_str(value, status_);
244
0
    if (U_FAILURE(status_)) { return *this; }
245
0
    transform(value_str.data(), value_str.length());
246
0
    if (!value_str.isEmpty() &&
247
0
            !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
248
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
249
0
        return *this;
250
0
    }
251
0
    if (extensions_ == nullptr) {
252
0
        extensions_ = Locale::getRoot().clone();
253
0
        if (extensions_ == nullptr) {
254
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
255
0
            return *this;
256
0
        }
257
0
    }
258
0
    if (uprv_tolower(key) != 'u') {
259
        // for t, x and others extension.
260
0
        extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
261
0
                                     status_);
262
0
        return *this;
263
0
    }
264
0
    _clearUAttributesAndKeyType(*extensions_, status_);
265
0
    if (U_FAILURE(status_)) { return *this; }
266
0
    if (!value.empty()) {
267
0
        _setUnicodeExtensions(*extensions_, value_str, status_);
268
0
    }
269
0
    return *this;
270
0
}
271
272
LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
273
      StringPiece key, StringPiece type)
274
0
{
275
0
    if (U_FAILURE(status_)) { return *this; }
276
0
    if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
277
0
            (!type.empty() &&
278
0
                 !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
279
0
      status_ = U_ILLEGAL_ARGUMENT_ERROR;
280
0
      return *this;
281
0
    }
282
0
    if (extensions_ == nullptr) {
283
0
        extensions_ = Locale::getRoot().clone();
284
0
        if (extensions_ == nullptr) {
285
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
286
0
            return *this;
287
0
        }
288
0
    }
289
0
    extensions_->setUnicodeKeywordValue(key, type, status_);
290
0
    return *this;
291
0
}
292
293
LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
294
    StringPiece value)
295
0
{
296
0
    CharString value_str(value, status_);
297
0
    if (U_FAILURE(status_)) { return *this; }
298
0
    transform(value_str.data(), value_str.length());
299
0
    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
300
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
301
0
        return *this;
302
0
    }
303
0
    if (extensions_ == nullptr) {
304
0
        extensions_ = Locale::getRoot().clone();
305
0
        if (extensions_ == nullptr) {
306
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
307
0
            return *this;
308
0
        }
309
0
        extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
310
0
        return *this;
311
0
    }
312
313
0
    UErrorCode localErrorCode = U_ZERO_ERROR;
314
0
    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
315
0
    if (U_FAILURE(localErrorCode)) {
316
0
        CharString new_attributes(value_str.data(), status_);
317
        // No attributes, set the attribute.
318
0
        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
319
0
        return *this;
320
0
    }
321
322
0
    transform(attributes.data(),attributes.length());
323
0
    const char* start = attributes.data();
324
0
    const char* limit = attributes.data() + attributes.length();
325
0
    CharString new_attributes;
326
0
    bool inserted = false;
327
0
    while (start < limit) {
328
0
        if (!inserted) {
329
0
            int cmp = uprv_strcmp(start, value_str.data());
330
0
            if (cmp == 0) { return *this; }  // Found it in attributes: Just return
331
0
            if (cmp > 0) {
332
0
                if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
333
0
                new_attributes.append(value_str.data(), status_);
334
0
                inserted = true;
335
0
            }
336
0
        }
337
0
        if (!new_attributes.isEmpty()) {
338
0
            new_attributes.append('_', status_);
339
0
        }
340
0
        new_attributes.append(start, status_);
341
0
        start += uprv_strlen(start) + 1;
342
0
    }
343
0
    if (!inserted) {
344
0
        if (!new_attributes.isEmpty()) {
345
0
            new_attributes.append('_', status_);
346
0
        }
347
0
        new_attributes.append(value_str.data(), status_);
348
0
    }
349
    // Not yet in the attributes, set the attribute.
350
0
    extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
351
0
    return *this;
352
0
}
353
354
LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
355
    StringPiece value)
356
0
{
357
0
    CharString value_str(value, status_);
358
0
    if (U_FAILURE(status_)) { return *this; }
359
0
    transform(value_str.data(), value_str.length());
360
0
    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
361
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
362
0
        return *this;
363
0
    }
364
0
    if (extensions_ == nullptr) { return *this; }
365
0
    UErrorCode localErrorCode = U_ZERO_ERROR;
366
0
    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
367
    // get failure, just return
368
0
    if (U_FAILURE(localErrorCode)) { return *this; }
369
    // Do not have any attributes, just return.
370
0
    if (attributes.isEmpty()) { return *this; }
371
372
0
    char* p = attributes.data();
373
    // Replace null terminiator in place for _ and - so later
374
    // we can use uprv_strcmp to compare.
375
0
    for (int32_t i = 0; i < attributes.length(); i++, p++) {
376
0
        *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
377
0
    }
378
379
0
    const char* start = attributes.data();
380
0
    const char* limit = attributes.data() + attributes.length();
381
0
    CharString new_attributes;
382
0
    bool found = false;
383
0
    while (start < limit) {
384
0
        if (uprv_strcmp(start, value_str.data()) == 0) {
385
0
            found = true;
386
0
        } else {
387
0
            if (!new_attributes.isEmpty()) {
388
0
                new_attributes.append('_', status_);
389
0
            }
390
0
            new_attributes.append(start, status_);
391
0
        }
392
0
        start += uprv_strlen(start) + 1;
393
0
    }
394
    // Found the value in attributes, set the attribute.
395
0
    if (found) {
396
0
        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
397
0
    }
398
0
    return *this;
399
0
}
400
401
LocaleBuilder& LocaleBuilder::clear()
402
0
{
403
0
    status_ = U_ZERO_ERROR;
404
0
    language_[0] = 0;
405
0
    script_[0] = 0;
406
0
    region_[0] = 0;
407
0
    delete variant_;
408
0
    variant_ = nullptr;
409
0
    clearExtensions();
410
0
    return *this;
411
0
}
412
413
LocaleBuilder& LocaleBuilder::clearExtensions()
414
0
{
415
0
    delete extensions_;
416
0
    extensions_ = nullptr;
417
0
    return *this;
418
0
}
419
420
0
Locale makeBogusLocale() {
421
0
  Locale bogus;
422
0
  bogus.setToBogus();
423
0
  return bogus;
424
0
}
425
426
void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
427
0
{
428
0
    if (U_FAILURE(errorCode)) { return; }
429
0
    LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
430
0
    if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
431
        // Error, or no extensions to copy.
432
0
        return;
433
0
    }
434
0
    if (extensions_ == nullptr) {
435
0
        extensions_ = Locale::getRoot().clone();
436
0
        if (extensions_ == nullptr) {
437
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
438
0
            return;
439
0
        }
440
0
    }
441
0
    _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
442
0
}
443
444
Locale LocaleBuilder::build(UErrorCode& errorCode)
445
0
{
446
0
    if (U_FAILURE(errorCode)) {
447
0
        return makeBogusLocale();
448
0
    }
449
0
    if (U_FAILURE(status_)) {
450
0
        errorCode = status_;
451
0
        return makeBogusLocale();
452
0
    }
453
0
    CharString locale_str(language_, errorCode);
454
0
    if (uprv_strlen(script_) > 0) {
455
0
        locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
456
0
    }
457
0
    if (uprv_strlen(region_) > 0) {
458
0
        locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
459
0
    }
460
0
    if (variant_ != nullptr) {
461
0
        locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
462
0
    }
463
0
    if (U_FAILURE(errorCode)) {
464
0
        return makeBogusLocale();
465
0
    }
466
0
    Locale product(locale_str.data());
467
0
    if (extensions_ != nullptr) {
468
0
        _copyExtensions(*extensions_, nullptr, product, true, errorCode);
469
0
    }
470
0
    if (U_FAILURE(errorCode)) {
471
0
        return makeBogusLocale();
472
0
    }
473
0
    return product;
474
0
}
475
476
0
UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
477
0
    if (U_FAILURE(outErrorCode)) {
478
        // Do not overwrite the older error code
479
0
        return true;
480
0
    }
481
0
    outErrorCode = status_;
482
0
    return U_FAILURE(outErrorCode);
483
0
}
484
485
U_NAMESPACE_END