Coverage Report

Created: 2026-01-25 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/localebuilder.cpp
Line
Count
Source
1
// © 2019 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include <optional>
5
#include <string_view>
6
#include <utility>
7
8
#include "bytesinkutil.h"  // StringByteSink<CharString>
9
#include "charstr.h"
10
#include "cstring.h"
11
#include "fixedstring.h"
12
#include "ulocimp.h"
13
#include "unicode/localebuilder.h"
14
#include "unicode/locid.h"
15
16
namespace {
17
18
75
inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
19
612
inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
20
21
constexpr const char* kAttributeKey = "attribute";
22
23
612
bool _isExtensionSubtags(char key, const char* s, int32_t len) {
24
612
    switch (uprv_tolower(key)) {
25
0
        case 'u':
26
0
            return ultag_isUnicodeExtensionSubtags(s, len);
27
265
        case 't':
28
265
            return ultag_isTransformedExtensionSubtags(s, len);
29
41
        case 'x':
30
41
            return ultag_isPrivateuseValueSubtags(s, len);
31
306
        default:
32
306
            return ultag_isExtensionSubtags(s, len);
33
612
    }
34
612
}
35
36
}  // namespace
37
38
U_NAMESPACE_BEGIN
39
40
6.06k
LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
41
6.06k
    script_(), region_(), variant_(nullptr), extensions_(nullptr)
42
6.06k
{
43
6.06k
    language_[0] = 0;
44
6.06k
    script_[0] = 0;
45
6.06k
    region_[0] = 0;
46
6.06k
}
47
48
LocaleBuilder::~LocaleBuilder()
49
6.06k
{
50
6.06k
    delete variant_;
51
6.06k
    delete extensions_;
52
6.06k
}
53
54
LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
55
5.65k
{
56
5.65k
    clear();
57
5.65k
    setLanguage(locale.getLanguage());
58
5.65k
    setScript(locale.getScript());
59
5.65k
    setRegion(locale.getCountry());
60
5.65k
    setVariant(locale.getVariant());
61
5.65k
    extensions_ = locale.clone();
62
5.65k
    if (extensions_ == nullptr) {
63
0
        status_ = U_MEMORY_ALLOCATION_ERROR;
64
0
    }
65
5.65k
    return *this;
66
5.65k
}
67
68
LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
69
1.27k
{
70
1.27k
    Locale l = Locale::forLanguageTag(tag, status_);
71
1.27k
    if (U_FAILURE(status_)) { return *this; }
72
    // Because setLocale will reset status_ we need to return
73
    // first if we have error in forLanguageTag.
74
1.17k
    setLocale(l);
75
1.17k
    return *this;
76
1.27k
}
77
78
namespace {
79
80
void setField(StringPiece input, char* dest, UErrorCode& errorCode,
81
17.6k
              bool (*test)(const char*, int32_t)) {
82
17.6k
    if (U_FAILURE(errorCode)) { return; }
83
17.5k
    if (input.empty()) {
84
7.61k
        dest[0] = '\0';
85
9.97k
    } else if (test(input.data(), input.length())) {
86
9.96k
        uprv_memcpy(dest, input.data(), input.length());
87
9.96k
        dest[input.length()] = '\0';
88
9.96k
    } else {
89
12
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
90
12
    }
91
17.5k
}
92
93
}  // namespace
94
95
LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
96
5.97k
{
97
5.97k
    setField(language, language_, status_, &ultag_isLanguageSubtag);
98
5.97k
    return *this;
99
5.97k
}
100
101
LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
102
5.97k
{
103
5.97k
    setField(script, script_, status_, &ultag_isScriptSubtag);
104
5.97k
    return *this;
105
5.97k
}
106
107
LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
108
5.65k
{
109
5.65k
    setField(region, region_, status_, &ultag_isRegionSubtag);
110
5.65k
    return *this;
111
5.65k
}
112
113
namespace {
114
115
397
void transform(char* data, int32_t len) {
116
11.8k
    for (int32_t i = 0; i < len; i++, data++) {
117
11.4k
        if (*data == '_') {
118
952
            *data = '-';
119
10.4k
        } else {
120
10.4k
            *data = uprv_tolower(*data);
121
10.4k
        }
122
11.4k
    }
123
397
}
124
125
}  // namespace
126
127
LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
128
5.65k
{
129
5.65k
    if (U_FAILURE(status_)) { return *this; }
130
5.65k
    if (variant.empty()) {
131
5.42k
        delete variant_;
132
5.42k
        variant_ = nullptr;
133
5.42k
        return *this;
134
5.42k
    }
135
233
    FixedString* new_variant = new FixedString(variant);
136
233
    if (new_variant == nullptr || new_variant->isEmpty()) {
137
0
        status_ = U_MEMORY_ALLOCATION_ERROR;
138
0
        return *this;
139
0
    }
140
233
    transform(new_variant->getAlias(), variant.length());
141
233
    if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) {
142
10
        delete new_variant;
143
10
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
144
10
        return *this;
145
10
    }
146
223
    delete variant_;
147
223
    variant_ = new_variant;
148
223
    return *this;
149
233
}
150
151
namespace {
152
153
bool
154
_isKeywordValue(const char* key, const char* value, int32_t value_len)
155
8.05k
{
156
8.05k
    if (key[1] == '\0') {
157
        // one char key
158
612
        return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
159
612
                _isExtensionSubtags(key[0], value, value_len));
160
7.44k
    } else if (uprv_strcmp(key, kAttributeKey) == 0) {
161
        // unicode attributes
162
164
        return ultag_isUnicodeLocaleAttributes(value, value_len);
163
164
    }
164
    // otherwise: unicode extension value
165
    // We need to convert from legacy key/value to unicode
166
    // key/value
167
7.28k
    std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);
168
7.28k
    std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);
169
170
7.28k
    return unicode_locale_key.has_value() &&
171
7.28k
           unicode_locale_type.has_value() &&
172
7.27k
           ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
173
7.27k
                                    static_cast<int32_t>(unicode_locale_key->size())) &&
174
7.27k
           ultag_isUnicodeLocaleType(unicode_locale_type->data(),
175
7.27k
                                     static_cast<int32_t>(unicode_locale_type->size()));
176
8.05k
}
177
178
void
179
_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
180
                Locale& to, bool validate, UErrorCode& errorCode)
181
5.64k
{
182
5.64k
    if (U_FAILURE(errorCode)) { return; }
183
5.64k
    LocalPointer<icu::StringEnumeration> ownedKeywords;
184
5.64k
    if (keywords == nullptr) {
185
5.64k
        ownedKeywords.adoptInstead(from.createKeywords(errorCode));
186
5.64k
        if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
187
5.17k
        keywords = ownedKeywords.getAlias();
188
5.17k
    }
189
5.17k
    const char* key;
190
13.1k
    while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
191
8.05k
        auto value = from.getKeywordValue<CharString>(key, errorCode);
192
8.05k
        if (U_FAILURE(errorCode)) { return; }
193
8.05k
        if (uprv_strcmp(key, kAttributeKey) == 0) {
194
164
            transform(value.data(), value.length());
195
164
        }
196
8.05k
        if (validate &&
197
8.05k
            !_isKeywordValue(key, value.data(), value.length())) {
198
68
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
199
68
            return;
200
68
        }
201
7.99k
        to.setKeywordValue(key, value.data(), errorCode);
202
7.99k
        if (U_FAILURE(errorCode)) { return; }
203
7.99k
    }
204
5.17k
}
205
206
void
207
_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
208
0
{
209
0
    if (U_FAILURE(errorCode)) { return; }
210
    // Clear Unicode attributes
211
0
    locale.setKeywordValue(kAttributeKey, "", errorCode);
212
213
    // Clear all Unicode keyword values
214
0
    LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
215
0
    if (U_FAILURE(errorCode) || iter.isNull()) { return; }
216
0
    const char* key;
217
0
    while ((key = iter->next(nullptr, errorCode)) != nullptr) {
218
0
        locale.setUnicodeKeywordValue(key, nullptr, errorCode);
219
0
    }
220
0
}
221
222
void
223
_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
224
0
{
225
0
    if (U_FAILURE(errorCode)) { return; }
226
    // Add the unicode extensions to extensions_
227
0
    CharString locale_str("und-u-", errorCode);
228
0
    locale_str.append(value, errorCode);
229
0
    _copyExtensions(
230
0
        Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
231
0
        locale, false, errorCode);
232
0
}
233
234
}  // namespace
235
236
LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
237
0
{
238
0
    if (U_FAILURE(status_)) { return *this; }
239
0
    if (!UPRV_ISALPHANUM(key)) {
240
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
241
0
        return *this;
242
0
    }
243
0
    CharString value_str(value, status_);
244
0
    if (U_FAILURE(status_)) { return *this; }
245
0
    transform(value_str.data(), value_str.length());
246
0
    if (!value_str.isEmpty() &&
247
0
            !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
248
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
249
0
        return *this;
250
0
    }
251
0
    if (extensions_ == nullptr) {
252
0
        extensions_ = Locale::getRoot().clone();
253
0
        if (extensions_ == nullptr) {
254
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
255
0
            return *this;
256
0
        }
257
0
    }
258
0
    if (uprv_tolower(key) != 'u') {
259
        // for t, x and others extension.
260
0
        extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
261
0
                                     status_);
262
0
        return *this;
263
0
    }
264
0
    _clearUAttributesAndKeyType(*extensions_, status_);
265
0
    if (U_FAILURE(status_)) { return *this; }
266
0
    if (!value.empty()) {
267
0
        _setUnicodeExtensions(*extensions_, value_str, status_);
268
0
    }
269
0
    return *this;
270
0
}
271
272
LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
273
      StringPiece key, StringPiece type)
274
4.48k
{
275
4.48k
    if (U_FAILURE(status_)) { return *this; }
276
4.48k
    if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
277
4.48k
            (!type.empty() &&
278
4.48k
                 !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
279
0
      status_ = U_ILLEGAL_ARGUMENT_ERROR;
280
0
      return *this;
281
0
    }
282
4.48k
    if (extensions_ == nullptr) {
283
0
        extensions_ = Locale::getRoot().clone();
284
0
        if (extensions_ == nullptr) {
285
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
286
0
            return *this;
287
0
        }
288
0
    }
289
4.48k
    extensions_->setUnicodeKeywordValue(key, type, status_);
290
4.48k
    return *this;
291
4.48k
}
292
293
LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
294
    StringPiece value)
295
0
{
296
0
    CharString value_str(value, status_);
297
0
    if (U_FAILURE(status_)) { return *this; }
298
0
    transform(value_str.data(), value_str.length());
299
0
    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
300
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
301
0
        return *this;
302
0
    }
303
0
    if (extensions_ == nullptr) {
304
0
        extensions_ = Locale::getRoot().clone();
305
0
        if (extensions_ == nullptr) {
306
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
307
0
            return *this;
308
0
        }
309
0
        extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
310
0
        return *this;
311
0
    }
312
313
0
    UErrorCode localErrorCode = U_ZERO_ERROR;
314
0
    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
315
0
    if (U_FAILURE(localErrorCode)) {
316
0
        CharString new_attributes(value_str.data(), status_);
317
        // No attributes, set the attribute.
318
0
        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
319
0
        return *this;
320
0
    }
321
322
0
    transform(attributes.data(),attributes.length());
323
0
    const char* start = attributes.data();
324
0
    const char* limit = attributes.data() + attributes.length();
325
0
    CharString new_attributes;
326
0
    bool inserted = false;
327
0
    while (start < limit) {
328
0
        if (!inserted) {
329
0
            int cmp = uprv_strcmp(start, value_str.data());
330
0
            if (cmp == 0) { return *this; }  // Found it in attributes: Just return
331
0
            if (cmp > 0) {
332
0
                if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
333
0
                new_attributes.append(value_str.data(), status_);
334
0
                inserted = true;
335
0
            }
336
0
        }
337
0
        if (!new_attributes.isEmpty()) {
338
0
            new_attributes.append('_', status_);
339
0
        }
340
0
        new_attributes.append(start, status_);
341
0
        start += uprv_strlen(start) + 1;
342
0
    }
343
0
    if (!inserted) {
344
0
        if (!new_attributes.isEmpty()) {
345
0
            new_attributes.append('_', status_);
346
0
        }
347
0
        new_attributes.append(value_str.data(), status_);
348
0
    }
349
    // Not yet in the attributes, set the attribute.
350
0
    extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
351
0
    return *this;
352
0
}
353
354
LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
355
    StringPiece value)
356
0
{
357
0
    CharString value_str(value, status_);
358
0
    if (U_FAILURE(status_)) { return *this; }
359
0
    transform(value_str.data(), value_str.length());
360
0
    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
361
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
362
0
        return *this;
363
0
    }
364
0
    if (extensions_ == nullptr) { return *this; }
365
0
    UErrorCode localErrorCode = U_ZERO_ERROR;
366
0
    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
367
    // get failure, just return
368
0
    if (U_FAILURE(localErrorCode)) { return *this; }
369
    // Do not have any attributes, just return.
370
0
    if (attributes.isEmpty()) { return *this; }
371
372
0
    char* p = attributes.data();
373
    // Replace null terminiator in place for _ and - so later
374
    // we can use uprv_strcmp to compare.
375
0
    for (int32_t i = 0; i < attributes.length(); i++, p++) {
376
0
        *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
377
0
    }
378
379
0
    const char* start = attributes.data();
380
0
    const char* limit = attributes.data() + attributes.length();
381
0
    CharString new_attributes;
382
0
    bool found = false;
383
0
    while (start < limit) {
384
0
        if (uprv_strcmp(start, value_str.data()) == 0) {
385
0
            found = true;
386
0
        } else {
387
0
            if (!new_attributes.isEmpty()) {
388
0
                new_attributes.append('_', status_);
389
0
            }
390
0
            new_attributes.append(start, status_);
391
0
        }
392
0
        start += uprv_strlen(start) + 1;
393
0
    }
394
    // Found the value in attributes, set the attribute.
395
0
    if (found) {
396
0
        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
397
0
    }
398
0
    return *this;
399
0
}
400
401
LocaleBuilder& LocaleBuilder::clear()
402
5.65k
{
403
5.65k
    status_ = U_ZERO_ERROR;
404
5.65k
    language_[0] = 0;
405
5.65k
    script_[0] = 0;
406
5.65k
    region_[0] = 0;
407
5.65k
    delete variant_;
408
5.65k
    variant_ = nullptr;
409
5.65k
    clearExtensions();
410
5.65k
    return *this;
411
5.65k
}
412
413
LocaleBuilder& LocaleBuilder::clearExtensions()
414
5.65k
{
415
5.65k
    delete extensions_;
416
5.65k
    extensions_ = nullptr;
417
5.65k
    return *this;
418
5.65k
}
419
420
185
Locale makeBogusLocale() {
421
185
  Locale bogus;
422
185
  bogus.setToBogus();
423
185
  return bogus;
424
185
}
425
426
void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
427
0
{
428
0
    if (U_FAILURE(errorCode)) { return; }
429
0
    LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
430
0
    if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
431
        // Error, or no extensions to copy.
432
0
        return;
433
0
    }
434
0
    if (extensions_ == nullptr) {
435
0
        extensions_ = Locale::getRoot().clone();
436
0
        if (extensions_ == nullptr) {
437
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
438
0
            return;
439
0
        }
440
0
    }
441
0
    _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
442
0
}
443
444
Locale LocaleBuilder::build(UErrorCode& errorCode)
445
6.06k
{
446
6.06k
    if (U_FAILURE(errorCode)) {
447
0
        return makeBogusLocale();
448
0
    }
449
6.06k
    if (U_FAILURE(status_)) {
450
117
        errorCode = status_;
451
117
        return makeBogusLocale();
452
117
    }
453
5.95k
    CharString locale_str(language_, errorCode);
454
5.95k
    if (uprv_strlen(script_) > 0) {
455
974
        locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
456
974
    }
457
5.95k
    if (uprv_strlen(region_) > 0) {
458
3.09k
        locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
459
3.09k
    }
460
5.95k
    if (variant_ != nullptr) {
461
223
        locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
462
223
    }
463
5.95k
    if (U_FAILURE(errorCode)) {
464
0
        return makeBogusLocale();
465
0
    }
466
5.95k
    Locale product(locale_str.data());
467
5.95k
    if (extensions_ != nullptr) {
468
5.64k
        _copyExtensions(*extensions_, nullptr, product, true, errorCode);
469
5.64k
    }
470
5.95k
    if (U_FAILURE(errorCode)) {
471
68
        return makeBogusLocale();
472
68
    }
473
5.88k
    return product;
474
5.95k
}
475
476
0
UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
477
0
    if (U_FAILURE(outErrorCode)) {
478
        // Do not overwrite the older error code
479
0
        return true;
480
0
    }
481
0
    outErrorCode = status_;
482
0
    return U_FAILURE(outErrorCode);
483
0
}
484
485
U_NAMESPACE_END