Coverage Report

Created: 2025-06-13 06:34

/src/icu/icu4c/source/common/ulocimp.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 2004-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*/
9
10
#ifndef ULOCIMP_H
11
#define ULOCIMP_H
12
13
#include <cstddef>
14
#include <optional>
15
#include <string_view>
16
17
#include "unicode/bytestream.h"
18
#include "unicode/uloc.h"
19
20
#include "charstr.h"
21
22
/**
23
 * Create an iterator over the specified keywords list
24
 * @param keywordList double-null terminated list. Will be copied.
25
 * @param keywordListSize size in bytes of keywordList
26
 * @param status err code
27
 * @return enumeration (owned by caller) of the keyword list.
28
 * @internal ICU 3.0
29
 */
30
U_CAPI UEnumeration* U_EXPORT2
31
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
32
33
/**
34
 * Look up a resource bundle table item with fallback on the table level.
35
 * This is accessible so it can be called by C++ code.
36
 */
37
U_CAPI const UChar * U_EXPORT2
38
uloc_getTableStringWithFallback(
39
    const char *path,
40
    const char *locale,
41
    const char *tableKey,
42
    const char *subTableKey,
43
    const char *itemKey,
44
    int32_t *pLength,
45
    UErrorCode *pErrorCode);
46
47
namespace {
48
/*returns true if a is an ID separator false otherwise*/
49
28.0k
inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; }
Unexecuted instantiation: tzfmt.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: tzgnames.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: tznames_impl.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: calendar.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: ucal.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: locdspnm.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: locid.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: loclikely.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: locresdata.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: ucurr.cpp:(anonymous namespace)::_isIDSeparator(char)
uloc.cpp:(anonymous namespace)::_isIDSeparator(char)
Line
Count
Source
49
28.0k
inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; }
Unexecuted instantiation: uloc_keytype.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: uloc_tag.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: uresbund.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: localebuilder.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: locdispnames.cpp:(anonymous namespace)::_isIDSeparator(char)
Unexecuted instantiation: locmap.cpp:(anonymous namespace)::_isIDSeparator(char)
50
}  // namespace
51
52
U_CFUNC const char* 
53
uloc_getCurrentCountryID(const char* oldID);
54
55
U_CFUNC const char* 
56
uloc_getCurrentLanguageID(const char* oldID);
57
58
U_EXPORT std::optional<std::string_view>
59
ulocimp_toBcpKeyWithFallback(std::string_view keyword);
60
61
U_EXPORT std::optional<std::string_view>
62
ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value);
63
64
U_EXPORT std::optional<std::string_view>
65
ulocimp_toLegacyKeyWithFallback(std::string_view keyword);
66
67
U_EXPORT std::optional<std::string_view>
68
ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value);
69
70
U_EXPORT icu::CharString
71
ulocimp_getKeywords(std::string_view localeID,
72
                    char prev,
73
                    bool valuesToo,
74
                    UErrorCode& status);
75
76
U_EXPORT void
77
ulocimp_getKeywords(std::string_view localeID,
78
                    char prev,
79
                    icu::ByteSink& sink,
80
                    bool valuesToo,
81
                    UErrorCode& status);
82
83
U_EXPORT icu::CharString
84
ulocimp_getName(std::string_view localeID,
85
                UErrorCode& err);
86
87
U_EXPORT void
88
ulocimp_getName(std::string_view localeID,
89
                icu::ByteSink& sink,
90
                UErrorCode& err);
91
92
U_EXPORT icu::CharString
93
ulocimp_getBaseName(std::string_view localeID,
94
                    UErrorCode& err);
95
96
U_EXPORT void
97
ulocimp_getBaseName(std::string_view localeID,
98
                    icu::ByteSink& sink,
99
                    UErrorCode& err);
100
101
U_EXPORT icu::CharString
102
ulocimp_canonicalize(std::string_view localeID,
103
                     UErrorCode& err);
104
105
U_EXPORT void
106
ulocimp_canonicalize(std::string_view localeID,
107
                     icu::ByteSink& sink,
108
                     UErrorCode& err);
109
110
U_EXPORT icu::CharString
111
ulocimp_getKeywordValue(const char* localeID,
112
                        std::string_view keywordName,
113
                        UErrorCode& status);
114
115
U_EXPORT void
116
ulocimp_getKeywordValue(const char* localeID,
117
                        std::string_view keywordName,
118
                        icu::ByteSink& sink,
119
                        UErrorCode& status);
120
121
U_EXPORT icu::CharString
122
ulocimp_getLanguage(std::string_view localeID, UErrorCode& status);
123
124
U_EXPORT icu::CharString
125
ulocimp_getScript(std::string_view localeID, UErrorCode& status);
126
127
U_EXPORT icu::CharString
128
ulocimp_getRegion(std::string_view localeID, UErrorCode& status);
129
130
U_EXPORT icu::CharString
131
ulocimp_getVariant(std::string_view localeID, UErrorCode& status);
132
133
U_EXPORT void
134
ulocimp_setKeywordValue(std::string_view keywordName,
135
                        std::string_view keywordValue,
136
                        icu::CharString& localeID,
137
                        UErrorCode& status);
138
139
U_EXPORT int32_t
140
ulocimp_setKeywordValue(std::string_view keywords,
141
                        std::string_view keywordName,
142
                        std::string_view keywordValue,
143
                        icu::ByteSink& sink,
144
                        UErrorCode& status);
145
146
U_EXPORT void
147
ulocimp_getSubtags(
148
        std::string_view localeID,
149
        icu::CharString* language,
150
        icu::CharString* script,
151
        icu::CharString* region,
152
        icu::CharString* variant,
153
        const char** pEnd,
154
        UErrorCode& status);
155
156
U_EXPORT void
157
ulocimp_getSubtags(
158
        std::string_view localeID,
159
        icu::ByteSink* language,
160
        icu::ByteSink* script,
161
        icu::ByteSink* region,
162
        icu::ByteSink* variant,
163
        const char** pEnd,
164
        UErrorCode& status);
165
166
inline void
167
ulocimp_getSubtags(
168
        std::string_view localeID,
169
        std::nullptr_t,
170
        std::nullptr_t,
171
        std::nullptr_t,
172
        std::nullptr_t,
173
        const char** pEnd,
174
0
        UErrorCode& status) {
175
0
    ulocimp_getSubtags(
176
0
            localeID,
177
0
            static_cast<icu::ByteSink*>(nullptr),
178
0
            static_cast<icu::ByteSink*>(nullptr),
179
0
            static_cast<icu::ByteSink*>(nullptr),
180
0
            static_cast<icu::ByteSink*>(nullptr),
181
0
            pEnd,
182
0
            status);
183
0
}
184
185
U_EXPORT icu::CharString
186
ulocimp_getParent(const char* localeID,
187
                  UErrorCode& err);
188
189
U_EXPORT void
190
ulocimp_getParent(const char* localeID,
191
                  icu::ByteSink& sink,
192
                  UErrorCode& err);
193
194
U_EXPORT icu::CharString
195
ulocimp_toLanguageTag(const char* localeID,
196
                      bool strict,
197
                      UErrorCode& status);
198
199
/**
200
 * Writes a well-formed language tag for this locale ID.
201
 *
202
 * **Note**: When `strict` is false, any locale fields which do not satisfy the
203
 * BCP47 syntax requirement will be omitted from the result.  When `strict` is
204
 * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
205
 * fields do not satisfy the BCP47 syntax requirement.
206
 *
207
 * @param localeID  the input locale ID
208
 * @param sink      the output sink receiving the BCP47 language
209
 *                  tag for this Locale.
210
 * @param strict    boolean value indicating if the function returns
211
 *                  an error for an ill-formed input locale ID.
212
 * @param err       error information if receiving the language
213
 *                  tag failed.
214
 * @return          The length of the BCP47 language tag.
215
 *
216
 * @internal ICU 64
217
 */
218
U_EXPORT void
219
ulocimp_toLanguageTag(const char* localeID,
220
                      icu::ByteSink& sink,
221
                      bool strict,
222
                      UErrorCode& err);
223
224
U_EXPORT icu::CharString
225
ulocimp_forLanguageTag(const char* langtag,
226
                       int32_t tagLen,
227
                       int32_t* parsedLength,
228
                       UErrorCode& status);
229
230
/**
231
 * Returns a locale ID for the specified BCP47 language tag string.
232
 * If the specified language tag contains any ill-formed subtags,
233
 * the first such subtag and all following subtags are ignored.
234
 * <p>
235
 * This implements the 'Language-Tag' production of BCP 47, and so
236
 * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
237
 * (regular and irregular) as well as private use language tags.
238
 *
239
 * Private use tags are represented as 'x-whatever',
240
 * and legacy tags are converted to their canonical replacements where they exist.
241
 *
242
 * Note that a few legacy tags have no modern replacement;
243
 * these will be converted using the fallback described in
244
 * the first paragraph, so some information might be lost.
245
 *
246
 * @param langtag   the input BCP47 language tag.
247
 * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
248
 * @param sink      the output sink receiving a locale ID for the
249
 *                  specified BCP47 language tag.
250
 * @param parsedLength  if not NULL, successfully parsed length
251
 *                      for the input language tag is set.
252
 * @param err       error information if receiving the locald ID
253
 *                  failed.
254
 * @internal ICU 63
255
 */
256
U_EXPORT void
257
ulocimp_forLanguageTag(const char* langtag,
258
                       int32_t tagLen,
259
                       icu::ByteSink& sink,
260
                       int32_t* parsedLength,
261
                       UErrorCode& err);
262
263
/**
264
 * Get the region to use for supplemental data lookup. Uses
265
 * (1) any region specified by locale tag "rg"; if none then
266
 * (2) any unicode_region_tag in the locale ID; if none then
267
 * (3) if inferRegion is true, the region suggested by
268
 * getLikelySubtags on the localeID.
269
 * If no region is found, returns an empty string.
270
 *
271
 * @param localeID
272
 *     The complete locale ID (with keywords) from which
273
 *     to get the region to use for supplemental data.
274
 * @param inferRegion
275
 *     If true, will try to infer region from localeID if
276
 *     no other region is found.
277
 * @param status
278
 *     Pointer to in/out UErrorCode value for latest status.
279
 * @return
280
 *     The region code found, empty if none found.
281
 * @internal ICU 57
282
 */
283
U_EXPORT icu::CharString
284
ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
285
                                     UErrorCode& status);
286
287
U_EXPORT icu::CharString
288
ulocimp_addLikelySubtags(const char* localeID,
289
                         UErrorCode& status);
290
291
/**
292
 * Add the likely subtags for a provided locale ID, per the algorithm described
293
 * in the following CLDR technical report:
294
 *
295
 *   http://www.unicode.org/reports/tr35/#Likely_Subtags
296
 *
297
 * If localeID is already in the maximal form, or there is no data available
298
 * for maximization, it will be copied to the output buffer.  For example,
299
 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
300
 *
301
 * Examples:
302
 *
303
 * "en" maximizes to "en_Latn_US"
304
 *
305
 * "de" maximizes to "de_Latn_US"
306
 *
307
 * "sr" maximizes to "sr_Cyrl_RS"
308
 *
309
 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
310
 *
311
 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
312
 *
313
 * @param localeID The locale to maximize
314
 * @param sink The output sink receiving the maximized locale
315
 * @param err Error information if maximizing the locale failed.  If the length
316
 * of the localeID and the null-terminator is greater than the maximum allowed size,
317
 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
318
 * @internal ICU 64
319
 */
320
U_EXPORT void
321
ulocimp_addLikelySubtags(const char* localeID,
322
                         icu::ByteSink& sink,
323
                         UErrorCode& err);
324
325
U_EXPORT icu::CharString
326
ulocimp_minimizeSubtags(const char* localeID,
327
                        bool favorScript,
328
                        UErrorCode& status);
329
330
/**
331
 * Minimize the subtags for a provided locale ID, per the algorithm described
332
 * in the following CLDR technical report:
333
 *
334
 *   http://www.unicode.org/reports/tr35/#Likely_Subtags
335
 *
336
 * If localeID is already in the minimal form, or there is no data available
337
 * for minimization, it will be copied to the output buffer.  Since the
338
 * minimization algorithm relies on proper maximization, see the comments
339
 * for ulocimp_addLikelySubtags for reasons why there might not be any data.
340
 *
341
 * Examples:
342
 *
343
 * "en_Latn_US" minimizes to "en"
344
 *
345
 * "de_Latn_US" minimizes to "de"
346
 *
347
 * "sr_Cyrl_RS" minimizes to "sr"
348
 *
349
 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
350
 * script, and minimizing to "zh" would imply "zh_Hans_CN".)
351
 *
352
 * @param localeID The locale to minimize
353
 * @param sink The output sink receiving the maximized locale
354
 * @param favorScript favor to keep script if true, region if false.
355
 * @param err Error information if minimizing the locale failed.  If the length
356
 * of the localeID and the null-terminator is greater than the maximum allowed size,
357
 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
358
 * @internal ICU 64
359
 */
360
U_EXPORT void
361
ulocimp_minimizeSubtags(const char* localeID,
362
                        icu::ByteSink& sink,
363
                        bool favorScript,
364
                        UErrorCode& err);
365
366
U_CAPI const char * U_EXPORT2
367
locale_getKeywordsStart(std::string_view localeID);
368
369
bool
370
ultag_isExtensionSubtags(const char* s, int32_t len);
371
372
bool
373
ultag_isLanguageSubtag(const char* s, int32_t len);
374
375
bool
376
ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
377
378
bool
379
ultag_isRegionSubtag(const char* s, int32_t len);
380
381
bool
382
ultag_isScriptSubtag(const char* s, int32_t len);
383
384
bool
385
ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
386
387
bool
388
ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
389
390
bool
391
ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
392
393
bool
394
ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
395
396
bool
397
ultag_isUnicodeLocaleKey(const char* s, int32_t len);
398
399
bool
400
ultag_isUnicodeLocaleType(const char* s, int32_t len);
401
402
bool
403
ultag_isVariantSubtags(const char* s, int32_t len);
404
405
const char*
406
ultag_getTKeyStart(const char* localeID);
407
408
U_EXPORT std::optional<std::string_view>
409
ulocimp_toBcpKey(std::string_view key);
410
411
U_EXPORT std::optional<std::string_view>
412
ulocimp_toLegacyKey(std::string_view key);
413
414
U_EXPORT std::optional<std::string_view>
415
ulocimp_toBcpType(std::string_view key, std::string_view type);
416
417
U_EXPORT std::optional<std::string_view>
418
ulocimp_toLegacyType(std::string_view key, std::string_view type);
419
420
/* Function for testing purpose */
421
U_EXPORT const char* const*
422
ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length);
423
424
// Return true if the value is already canonicalized.
425
U_EXPORT bool
426
ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
427
428
#ifdef __cplusplus
429
U_NAMESPACE_BEGIN
430
class U_COMMON_API RegionValidateMap : public UObject {
431
 public:
432
  RegionValidateMap();
433
  virtual ~RegionValidateMap();
434
  bool isSet(const char* region) const;
435
  bool equals(const RegionValidateMap& that) const;
436
 protected:
437
  int32_t value(const char* region) const;
438
  uint32_t map[22]; // 26x26/32 = 22;
439
};
440
U_NAMESPACE_END
441
#endif /* __cplusplus */
442
443
#endif