Coverage Report

Created: 2025-12-07 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/icu4c/source/common/localebuilder.cpp
Line
Count
Source
1
// © 2019 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
4
#include <optional>
5
#include <string_view>
6
#include <utility>
7
8
#include "bytesinkutil.h"  // StringByteSink<CharString>
9
#include "charstr.h"
10
#include "cstring.h"
11
#include "fixedstring.h"
12
#include "ulocimp.h"
13
#include "unicode/localebuilder.h"
14
#include "unicode/locid.h"
15
16
namespace {
17
18
62
inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
19
558
inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
20
21
constexpr const char* kAttributeKey = "attribute";
22
23
558
bool _isExtensionSubtags(char key, const char* s, int32_t len) {
24
558
    switch (uprv_tolower(key)) {
25
0
        case 'u':
26
0
            return ultag_isUnicodeExtensionSubtags(s, len);
27
258
        case 't':
28
258
            return ultag_isTransformedExtensionSubtags(s, len);
29
38
        case 'x':
30
38
            return ultag_isPrivateuseValueSubtags(s, len);
31
262
        default:
32
262
            return ultag_isExtensionSubtags(s, len);
33
558
    }
34
558
}
35
36
}  // namespace
37
38
U_NAMESPACE_BEGIN
39
40
5.80k
LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(),
41
5.80k
    script_(), region_(), variant_(nullptr), extensions_(nullptr)
42
5.80k
{
43
5.80k
    language_[0] = 0;
44
5.80k
    script_[0] = 0;
45
5.80k
    region_[0] = 0;
46
5.80k
}
47
48
LocaleBuilder::~LocaleBuilder()
49
5.80k
{
50
5.80k
    delete variant_;
51
5.80k
    delete extensions_;
52
5.80k
}
53
54
LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale)
55
5.39k
{
56
5.39k
    clear();
57
5.39k
    setLanguage(locale.getLanguage());
58
5.39k
    setScript(locale.getScript());
59
5.39k
    setRegion(locale.getCountry());
60
5.39k
    setVariant(locale.getVariant());
61
5.39k
    extensions_ = locale.clone();
62
5.39k
    if (extensions_ == nullptr) {
63
0
        status_ = U_MEMORY_ALLOCATION_ERROR;
64
0
    }
65
5.39k
    return *this;
66
5.39k
}
67
68
LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag)
69
1.23k
{
70
1.23k
    Locale l = Locale::forLanguageTag(tag, status_);
71
1.23k
    if (U_FAILURE(status_)) { return *this; }
72
    // Because setLocale will reset status_ we need to return
73
    // first if we have error in forLanguageTag.
74
1.14k
    setLocale(l);
75
1.14k
    return *this;
76
1.23k
}
77
78
namespace {
79
80
void setField(StringPiece input, char* dest, UErrorCode& errorCode,
81
16.8k
              bool (*test)(const char*, int32_t)) {
82
16.8k
    if (U_FAILURE(errorCode)) { return; }
83
16.7k
    if (input.empty()) {
84
7.30k
        dest[0] = '\0';
85
9.49k
    } else if (test(input.data(), input.length())) {
86
9.48k
        uprv_memcpy(dest, input.data(), input.length());
87
9.48k
        dest[input.length()] = '\0';
88
9.48k
    } else {
89
9
        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
90
9
    }
91
16.7k
}
92
93
}  // namespace
94
95
LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language)
96
5.70k
{
97
5.70k
    setField(language, language_, status_, &ultag_isLanguageSubtag);
98
5.70k
    return *this;
99
5.70k
}
100
101
LocaleBuilder& LocaleBuilder::setScript(StringPiece script)
102
5.70k
{
103
5.70k
    setField(script, script_, status_, &ultag_isScriptSubtag);
104
5.70k
    return *this;
105
5.70k
}
106
107
LocaleBuilder& LocaleBuilder::setRegion(StringPiece region)
108
5.39k
{
109
5.39k
    setField(region, region_, status_, &ultag_isRegionSubtag);
110
5.39k
    return *this;
111
5.39k
}
112
113
namespace {
114
115
377
void transform(char* data, int32_t len) {
116
10.4k
    for (int32_t i = 0; i < len; i++, data++) {
117
10.0k
        if (*data == '_') {
118
773
            *data = '-';
119
9.26k
        } else {
120
9.26k
            *data = uprv_tolower(*data);
121
9.26k
        }
122
10.0k
    }
123
377
}
124
125
}  // namespace
126
127
LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant)
128
5.39k
{
129
5.39k
    if (U_FAILURE(status_)) { return *this; }
130
5.39k
    if (variant.empty()) {
131
5.17k
        delete variant_;
132
5.17k
        variant_ = nullptr;
133
5.17k
        return *this;
134
5.17k
    }
135
225
    FixedString* new_variant = new FixedString(variant);
136
225
    if (new_variant == nullptr || new_variant->isEmpty()) {
137
0
        status_ = U_MEMORY_ALLOCATION_ERROR;
138
0
        return *this;
139
0
    }
140
225
    transform(new_variant->getAlias(), variant.length());
141
225
    if (!ultag_isVariantSubtags(new_variant->data(), variant.length())) {
142
10
        delete new_variant;
143
10
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
144
10
        return *this;
145
10
    }
146
215
    delete variant_;
147
215
    variant_ = new_variant;
148
215
    return *this;
149
225
}
150
151
namespace {
152
153
bool
154
_isKeywordValue(const char* key, const char* value, int32_t value_len)
155
7.61k
{
156
7.61k
    if (key[1] == '\0') {
157
        // one char key
158
558
        return (UPRV_ISALPHANUM(uprv_tolower(key[0])) &&
159
558
                _isExtensionSubtags(key[0], value, value_len));
160
7.05k
    } else if (uprv_strcmp(key, kAttributeKey) == 0) {
161
        // unicode attributes
162
152
        return ultag_isUnicodeLocaleAttributes(value, value_len);
163
152
    }
164
    // otherwise: unicode extension value
165
    // We need to convert from legacy key/value to unicode
166
    // key/value
167
6.90k
    std::optional<std::string_view> unicode_locale_key = ulocimp_toBcpKeyWithFallback(key);
168
6.90k
    std::optional<std::string_view> unicode_locale_type = ulocimp_toBcpTypeWithFallback(key, value);
169
170
6.90k
    return unicode_locale_key.has_value() &&
171
6.90k
           unicode_locale_type.has_value() &&
172
6.90k
           ultag_isUnicodeLocaleKey(unicode_locale_key->data(),
173
6.90k
                                    static_cast<int32_t>(unicode_locale_key->size())) &&
174
6.90k
           ultag_isUnicodeLocaleType(unicode_locale_type->data(),
175
6.90k
                                     static_cast<int32_t>(unicode_locale_type->size()));
176
7.61k
}
177
178
void
179
_copyExtensions(const Locale& from, icu::StringEnumeration *keywords,
180
                Locale& to, bool validate, UErrorCode& errorCode)
181
5.38k
{
182
5.38k
    if (U_FAILURE(errorCode)) { return; }
183
5.38k
    LocalPointer<icu::StringEnumeration> ownedKeywords;
184
5.38k
    if (keywords == nullptr) {
185
5.38k
        ownedKeywords.adoptInstead(from.createKeywords(errorCode));
186
5.38k
        if (U_FAILURE(errorCode) || ownedKeywords.isNull()) { return; }
187
4.93k
        keywords = ownedKeywords.getAlias();
188
4.93k
    }
189
4.93k
    const char* key;
190
12.4k
    while ((key = keywords->next(nullptr, errorCode)) != nullptr) {
191
7.61k
        auto value = from.getKeywordValue<CharString>(key, errorCode);
192
7.61k
        if (U_FAILURE(errorCode)) { return; }
193
7.61k
        if (uprv_strcmp(key, kAttributeKey) == 0) {
194
152
            transform(value.data(), value.length());
195
152
        }
196
7.61k
        if (validate &&
197
7.61k
            !_isKeywordValue(key, value.data(), value.length())) {
198
75
            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
199
75
            return;
200
75
        }
201
7.53k
        to.setKeywordValue(key, value.data(), errorCode);
202
7.53k
        if (U_FAILURE(errorCode)) { return; }
203
7.53k
    }
204
4.93k
}
205
206
void
207
_clearUAttributesAndKeyType(Locale& locale, UErrorCode& errorCode)
208
0
{
209
0
    if (U_FAILURE(errorCode)) { return; }
210
    // Clear Unicode attributes
211
0
    locale.setKeywordValue(kAttributeKey, "", errorCode);
212
213
    // Clear all Unicode keyword values
214
0
    LocalPointer<icu::StringEnumeration> iter(locale.createUnicodeKeywords(errorCode));
215
0
    if (U_FAILURE(errorCode) || iter.isNull()) { return; }
216
0
    const char* key;
217
0
    while ((key = iter->next(nullptr, errorCode)) != nullptr) {
218
0
        locale.setUnicodeKeywordValue(key, nullptr, errorCode);
219
0
    }
220
0
}
221
222
void
223
_setUnicodeExtensions(Locale& locale, const CharString& value, UErrorCode& errorCode)
224
0
{
225
0
    if (U_FAILURE(errorCode)) { return; }
226
    // Add the unicode extensions to extensions_
227
0
    CharString locale_str("und-u-", errorCode);
228
0
    locale_str.append(value, errorCode);
229
0
    _copyExtensions(
230
0
        Locale::forLanguageTag(locale_str.data(), errorCode), nullptr,
231
0
        locale, false, errorCode);
232
0
}
233
234
}  // namespace
235
236
LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value)
237
0
{
238
0
    if (U_FAILURE(status_)) { return *this; }
239
0
    if (!UPRV_ISALPHANUM(key)) {
240
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
241
0
        return *this;
242
0
    }
243
0
    CharString value_str(value, status_);
244
0
    if (U_FAILURE(status_)) { return *this; }
245
0
    transform(value_str.data(), value_str.length());
246
0
    if (!value_str.isEmpty() &&
247
0
            !_isExtensionSubtags(key, value_str.data(), value_str.length())) {
248
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
249
0
        return *this;
250
0
    }
251
0
    if (extensions_ == nullptr) {
252
0
        extensions_ = Locale::getRoot().clone();
253
0
        if (extensions_ == nullptr) {
254
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
255
0
            return *this;
256
0
        }
257
0
    }
258
0
    if (uprv_tolower(key) != 'u') {
259
        // for t, x and others extension.
260
0
        extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(),
261
0
                                     status_);
262
0
        return *this;
263
0
    }
264
0
    _clearUAttributesAndKeyType(*extensions_, status_);
265
0
    if (U_FAILURE(status_)) { return *this; }
266
0
    if (!value.empty()) {
267
0
        _setUnicodeExtensions(*extensions_, value_str, status_);
268
0
    }
269
0
    return *this;
270
0
}
271
272
LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword(
273
      StringPiece key, StringPiece type)
274
4.25k
{
275
4.25k
    if (U_FAILURE(status_)) { return *this; }
276
4.25k
    if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) ||
277
4.25k
            (!type.empty() &&
278
4.25k
                 !ultag_isUnicodeLocaleType(type.data(), type.length()))) {
279
0
      status_ = U_ILLEGAL_ARGUMENT_ERROR;
280
0
      return *this;
281
0
    }
282
4.25k
    if (extensions_ == nullptr) {
283
0
        extensions_ = Locale::getRoot().clone();
284
0
        if (extensions_ == nullptr) {
285
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
286
0
            return *this;
287
0
        }
288
0
    }
289
4.25k
    extensions_->setUnicodeKeywordValue(key, type, status_);
290
4.25k
    return *this;
291
4.25k
}
292
293
LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute(
294
    StringPiece value)
295
0
{
296
0
    CharString value_str(value, status_);
297
0
    if (U_FAILURE(status_)) { return *this; }
298
0
    transform(value_str.data(), value_str.length());
299
0
    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
300
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
301
0
        return *this;
302
0
    }
303
0
    if (extensions_ == nullptr) {
304
0
        extensions_ = Locale::getRoot().clone();
305
0
        if (extensions_ == nullptr) {
306
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
307
0
            return *this;
308
0
        }
309
0
        extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_);
310
0
        return *this;
311
0
    }
312
313
0
    UErrorCode localErrorCode = U_ZERO_ERROR;
314
0
    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
315
0
    if (U_FAILURE(localErrorCode)) {
316
0
        CharString new_attributes(value_str.data(), status_);
317
        // No attributes, set the attribute.
318
0
        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
319
0
        return *this;
320
0
    }
321
322
0
    transform(attributes.data(),attributes.length());
323
0
    const char* start = attributes.data();
324
0
    const char* limit = attributes.data() + attributes.length();
325
0
    CharString new_attributes;
326
0
    bool inserted = false;
327
0
    while (start < limit) {
328
0
        if (!inserted) {
329
0
            int cmp = uprv_strcmp(start, value_str.data());
330
0
            if (cmp == 0) { return *this; }  // Found it in attributes: Just return
331
0
            if (cmp > 0) {
332
0
                if (!new_attributes.isEmpty()) new_attributes.append('_', status_);
333
0
                new_attributes.append(value_str.data(), status_);
334
0
                inserted = true;
335
0
            }
336
0
        }
337
0
        if (!new_attributes.isEmpty()) {
338
0
            new_attributes.append('_', status_);
339
0
        }
340
0
        new_attributes.append(start, status_);
341
0
        start += uprv_strlen(start) + 1;
342
0
    }
343
0
    if (!inserted) {
344
0
        if (!new_attributes.isEmpty()) {
345
0
            new_attributes.append('_', status_);
346
0
        }
347
0
        new_attributes.append(value_str.data(), status_);
348
0
    }
349
    // Not yet in the attributes, set the attribute.
350
0
    extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
351
0
    return *this;
352
0
}
353
354
LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute(
355
    StringPiece value)
356
0
{
357
0
    CharString value_str(value, status_);
358
0
    if (U_FAILURE(status_)) { return *this; }
359
0
    transform(value_str.data(), value_str.length());
360
0
    if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) {
361
0
        status_ = U_ILLEGAL_ARGUMENT_ERROR;
362
0
        return *this;
363
0
    }
364
0
    if (extensions_ == nullptr) { return *this; }
365
0
    UErrorCode localErrorCode = U_ZERO_ERROR;
366
0
    auto attributes = extensions_->getKeywordValue<CharString>(kAttributeKey, localErrorCode);
367
    // get failure, just return
368
0
    if (U_FAILURE(localErrorCode)) { return *this; }
369
    // Do not have any attributes, just return.
370
0
    if (attributes.isEmpty()) { return *this; }
371
372
0
    char* p = attributes.data();
373
    // Replace null terminiator in place for _ and - so later
374
    // we can use uprv_strcmp to compare.
375
0
    for (int32_t i = 0; i < attributes.length(); i++, p++) {
376
0
        *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p);
377
0
    }
378
379
0
    const char* start = attributes.data();
380
0
    const char* limit = attributes.data() + attributes.length();
381
0
    CharString new_attributes;
382
0
    bool found = false;
383
0
    while (start < limit) {
384
0
        if (uprv_strcmp(start, value_str.data()) == 0) {
385
0
            found = true;
386
0
        } else {
387
0
            if (!new_attributes.isEmpty()) {
388
0
                new_attributes.append('_', status_);
389
0
            }
390
0
            new_attributes.append(start, status_);
391
0
        }
392
0
        start += uprv_strlen(start) + 1;
393
0
    }
394
    // Found the value in attributes, set the attribute.
395
0
    if (found) {
396
0
        extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_);
397
0
    }
398
0
    return *this;
399
0
}
400
401
LocaleBuilder& LocaleBuilder::clear()
402
5.39k
{
403
5.39k
    status_ = U_ZERO_ERROR;
404
5.39k
    language_[0] = 0;
405
5.39k
    script_[0] = 0;
406
5.39k
    region_[0] = 0;
407
5.39k
    delete variant_;
408
5.39k
    variant_ = nullptr;
409
5.39k
    clearExtensions();
410
5.39k
    return *this;
411
5.39k
}
412
413
LocaleBuilder& LocaleBuilder::clearExtensions()
414
5.39k
{
415
5.39k
    delete extensions_;
416
5.39k
    extensions_ = nullptr;
417
5.39k
    return *this;
418
5.39k
}
419
420
189
Locale makeBogusLocale() {
421
189
  Locale bogus;
422
189
  bogus.setToBogus();
423
189
  return bogus;
424
189
}
425
426
void LocaleBuilder::copyExtensionsFrom(const Locale& src, UErrorCode& errorCode)
427
0
{
428
0
    if (U_FAILURE(errorCode)) { return; }
429
0
    LocalPointer<icu::StringEnumeration> keywords(src.createKeywords(errorCode));
430
0
    if (U_FAILURE(errorCode) || keywords.isNull() || keywords->count(errorCode) == 0) {
431
        // Error, or no extensions to copy.
432
0
        return;
433
0
    }
434
0
    if (extensions_ == nullptr) {
435
0
        extensions_ = Locale::getRoot().clone();
436
0
        if (extensions_ == nullptr) {
437
0
            status_ = U_MEMORY_ALLOCATION_ERROR;
438
0
            return;
439
0
        }
440
0
    }
441
0
    _copyExtensions(src, keywords.getAlias(), *extensions_, false, errorCode);
442
0
}
443
444
Locale LocaleBuilder::build(UErrorCode& errorCode)
445
5.80k
{
446
5.80k
    if (U_FAILURE(errorCode)) {
447
0
        return makeBogusLocale();
448
0
    }
449
5.80k
    if (U_FAILURE(status_)) {
450
114
        errorCode = status_;
451
114
        return makeBogusLocale();
452
114
    }
453
5.68k
    CharString locale_str(language_, errorCode);
454
5.68k
    if (uprv_strlen(script_) > 0) {
455
914
        locale_str.append('-', errorCode).append(StringPiece(script_), errorCode);
456
914
    }
457
5.68k
    if (uprv_strlen(region_) > 0) {
458
2.94k
        locale_str.append('-', errorCode).append(StringPiece(region_), errorCode);
459
2.94k
    }
460
5.68k
    if (variant_ != nullptr) {
461
215
        locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode);
462
215
    }
463
5.68k
    if (U_FAILURE(errorCode)) {
464
0
        return makeBogusLocale();
465
0
    }
466
5.68k
    Locale product(locale_str.data());
467
5.68k
    if (extensions_ != nullptr) {
468
5.38k
        _copyExtensions(*extensions_, nullptr, product, true, errorCode);
469
5.38k
    }
470
5.68k
    if (U_FAILURE(errorCode)) {
471
75
        return makeBogusLocale();
472
75
    }
473
5.61k
    return product;
474
5.68k
}
475
476
0
UBool LocaleBuilder::copyErrorTo(UErrorCode &outErrorCode) const {
477
0
    if (U_FAILURE(outErrorCode)) {
478
        // Do not overwrite the older error code
479
0
        return true;
480
0
    }
481
0
    outErrorCode = status_;
482
0
    return U_FAILURE(outErrorCode);
483
0
}
484
485
U_NAMESPACE_END