Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/ulocimp.h
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 2004-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*/
9
10
#ifndef ULOCIMP_H
11
#define ULOCIMP_H
12
13
#include "unicode/bytestream.h"
14
#include "unicode/uloc.h"
15
16
#include "charstr.h"
17
18
/**
19
 * Create an iterator over the specified keywords list
20
 * @param keywordList double-null terminated list. Will be copied.
21
 * @param keywordListSize size in bytes of keywordList
22
 * @param status err code
23
 * @return enumeration (owned by caller) of the keyword list.
24
 * @internal ICU 3.0
25
 */
26
U_CAPI UEnumeration* U_EXPORT2
27
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
28
29
/**
30
 * Look up a resource bundle table item with fallback on the table level.
31
 * This is accessible so it can be called by C++ code.
32
 */
33
U_CAPI const UChar * U_EXPORT2
34
uloc_getTableStringWithFallback(
35
    const char *path,
36
    const char *locale,
37
    const char *tableKey,
38
    const char *subTableKey,
39
    const char *itemKey,
40
    int32_t *pLength,
41
    UErrorCode *pErrorCode);
42
43
/*returns true if a is an ID separator false otherwise*/
44
0
#define _isIDSeparator(a) (a == '_' || a == '-')
45
46
U_CFUNC const char* 
47
uloc_getCurrentCountryID(const char* oldID);
48
49
U_CFUNC const char* 
50
uloc_getCurrentLanguageID(const char* oldID);
51
52
U_CFUNC void
53
ulocimp_getKeywords(const char *localeID,
54
             char prev,
55
             icu::ByteSink& sink,
56
             UBool valuesToo,
57
             UErrorCode *status);
58
59
icu::CharString U_EXPORT2
60
ulocimp_getLanguage(const char *localeID,
61
                    const char **pEnd,
62
                    UErrorCode &status);
63
64
icu::CharString U_EXPORT2
65
ulocimp_getScript(const char *localeID,
66
                  const char **pEnd,
67
                  UErrorCode &status);
68
69
icu::CharString U_EXPORT2
70
ulocimp_getCountry(const char *localeID,
71
                   const char **pEnd,
72
                   UErrorCode &status);
73
74
U_CAPI void U_EXPORT2
75
ulocimp_getName(const char* localeID,
76
                icu::ByteSink& sink,
77
                UErrorCode* err);
78
79
U_CAPI void U_EXPORT2
80
ulocimp_getBaseName(const char* localeID,
81
                    icu::ByteSink& sink,
82
                    UErrorCode* err);
83
84
U_CAPI void U_EXPORT2
85
ulocimp_canonicalize(const char* localeID,
86
                     icu::ByteSink& sink,
87
                     UErrorCode* err);
88
89
U_CAPI void U_EXPORT2
90
ulocimp_getKeywordValue(const char* localeID,
91
                        const char* keywordName,
92
                        icu::ByteSink& sink,
93
                        UErrorCode* status);
94
95
/**
96
 * Writes a well-formed language tag for this locale ID.
97
 *
98
 * **Note**: When `strict` is false, any locale fields which do not satisfy the
99
 * BCP47 syntax requirement will be omitted from the result.  When `strict` is
100
 * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
101
 * fields do not satisfy the BCP47 syntax requirement.
102
 *
103
 * @param localeID  the input locale ID
104
 * @param sink      the output sink receiving the BCP47 language
105
 *                  tag for this Locale.
106
 * @param strict    boolean value indicating if the function returns
107
 *                  an error for an ill-formed input locale ID.
108
 * @param err       error information if receiving the language
109
 *                  tag failed.
110
 * @return          The length of the BCP47 language tag.
111
 *
112
 * @internal ICU 64
113
 */
114
U_CAPI void U_EXPORT2
115
ulocimp_toLanguageTag(const char* localeID,
116
                      icu::ByteSink& sink,
117
                      UBool strict,
118
                      UErrorCode* err);
119
120
/**
121
 * Returns a locale ID for the specified BCP47 language tag string.
122
 * If the specified language tag contains any ill-formed subtags,
123
 * the first such subtag and all following subtags are ignored.
124
 * <p>
125
 * This implements the 'Language-Tag' production of BCP 47, and so
126
 * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
127
 * (regular and irregular) as well as private use language tags.
128
 *
129
 * Private use tags are represented as 'x-whatever',
130
 * and legacy tags are converted to their canonical replacements where they exist.
131
 *
132
 * Note that a few legacy tags have no modern replacement;
133
 * these will be converted using the fallback described in
134
 * the first paragraph, so some information might be lost.
135
 *
136
 * @param langtag   the input BCP47 language tag.
137
 * @param tagLen    the length of langtag, or -1 to call uprv_strlen().
138
 * @param sink      the output sink receiving a locale ID for the
139
 *                  specified BCP47 language tag.
140
 * @param parsedLength  if not NULL, successfully parsed length
141
 *                      for the input language tag is set.
142
 * @param err       error information if receiving the locald ID
143
 *                  failed.
144
 * @internal ICU 63
145
 */
146
U_CAPI void U_EXPORT2
147
ulocimp_forLanguageTag(const char* langtag,
148
                       int32_t tagLen,
149
                       icu::ByteSink& sink,
150
                       int32_t* parsedLength,
151
                       UErrorCode* err);
152
153
/**
154
 * Get the region to use for supplemental data lookup. Uses
155
 * (1) any region specified by locale tag "rg"; if none then
156
 * (2) any unicode_region_tag in the locale ID; if none then
157
 * (3) if inferRegion is true, the region suggested by
158
 * getLikelySubtags on the localeID.
159
 * If no region is found, returns length 0.
160
 * 
161
 * @param localeID
162
 *     The complete locale ID (with keywords) from which
163
 *     to get the region to use for supplemental data.
164
 * @param inferRegion
165
 *     If true, will try to infer region from localeID if
166
 *     no other region is found.
167
 * @param region
168
 *     Buffer in which to put the region ID found; should
169
 *     have a capacity at least ULOC_COUNTRY_CAPACITY. 
170
 * @param regionCapacity
171
 *     The actual capacity of the region buffer.
172
 * @param status
173
 *     Pointer to in/out UErrorCode value for latest status.
174
 * @return
175
 *     The length of any region code found, or 0 if none.
176
 * @internal ICU 57
177
 */
178
U_CAPI int32_t U_EXPORT2
179
ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion,
180
                                     char *region, int32_t regionCapacity, UErrorCode* status);
181
182
/**
183
 * Add the likely subtags for a provided locale ID, per the algorithm described
184
 * in the following CLDR technical report:
185
 *
186
 *   http://www.unicode.org/reports/tr35/#Likely_Subtags
187
 *
188
 * If localeID is already in the maximal form, or there is no data available
189
 * for maximization, it will be copied to the output buffer.  For example,
190
 * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
191
 *
192
 * Examples:
193
 *
194
 * "en" maximizes to "en_Latn_US"
195
 *
196
 * "de" maximizes to "de_Latn_US"
197
 *
198
 * "sr" maximizes to "sr_Cyrl_RS"
199
 *
200
 * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
201
 *
202
 * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
203
 *
204
 * @param localeID The locale to maximize
205
 * @param sink The output sink receiving the maximized locale
206
 * @param err Error information if maximizing the locale failed.  If the length
207
 * of the localeID and the null-terminator is greater than the maximum allowed size,
208
 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
209
 * @internal ICU 64
210
 */
211
U_CAPI void U_EXPORT2
212
ulocimp_addLikelySubtags(const char* localeID,
213
                         icu::ByteSink& sink,
214
                         UErrorCode* err);
215
216
/**
217
 * Minimize the subtags for a provided locale ID, per the algorithm described
218
 * in the following CLDR technical report:
219
 *
220
 *   http://www.unicode.org/reports/tr35/#Likely_Subtags
221
 *
222
 * If localeID is already in the minimal form, or there is no data available
223
 * for minimization, it will be copied to the output buffer.  Since the
224
 * minimization algorithm relies on proper maximization, see the comments
225
 * for ulocimp_addLikelySubtags for reasons why there might not be any data.
226
 *
227
 * Examples:
228
 *
229
 * "en_Latn_US" minimizes to "en"
230
 *
231
 * "de_Latn_US" minimizes to "de"
232
 *
233
 * "sr_Cyrl_RS" minimizes to "sr"
234
 *
235
 * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
236
 * script, and minimizing to "zh" would imply "zh_Hans_CN".)
237
 *
238
 * @param localeID The locale to minimize
239
 * @param sink The output sink receiving the maximized locale
240
 * @param err Error information if minimizing the locale failed.  If the length
241
 * of the localeID and the null-terminator is greater than the maximum allowed size,
242
 * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
243
 * @internal ICU 64
244
 */
245
U_CAPI void U_EXPORT2
246
ulocimp_minimizeSubtags(const char* localeID,
247
                        icu::ByteSink& sink,
248
                        UErrorCode* err);
249
250
U_CAPI const char * U_EXPORT2
251
locale_getKeywordsStart(const char *localeID);
252
253
U_CFUNC UBool
254
ultag_isExtensionSubtags(const char* s, int32_t len);
255
256
U_CFUNC UBool
257
ultag_isLanguageSubtag(const char* s, int32_t len);
258
259
U_CFUNC UBool
260
ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
261
262
U_CFUNC UBool
263
ultag_isRegionSubtag(const char* s, int32_t len);
264
265
U_CFUNC UBool
266
ultag_isScriptSubtag(const char* s, int32_t len);
267
268
U_CFUNC UBool
269
ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
270
271
U_CFUNC UBool
272
ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
273
274
U_CFUNC UBool
275
ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
276
277
U_CFUNC UBool
278
ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
279
280
U_CFUNC UBool
281
ultag_isUnicodeLocaleKey(const char* s, int32_t len);
282
283
U_CFUNC UBool
284
ultag_isUnicodeLocaleType(const char* s, int32_t len);
285
286
U_CFUNC UBool
287
ultag_isVariantSubtags(const char* s, int32_t len);
288
289
U_CAPI const char * U_EXPORT2
290
ultag_getTKeyStart(const char *localeID);
291
292
U_CFUNC const char*
293
ulocimp_toBcpKey(const char* key);
294
295
U_CFUNC const char*
296
ulocimp_toLegacyKey(const char* key);
297
298
U_CFUNC const char*
299
ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
300
301
U_CFUNC const char*
302
ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType);
303
304
/* Function for testing purpose */
305
U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length);
306
307
// Return true if the value is already canonicalized.
308
U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
309
310
/**
311
 * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY.
312
 * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack,
313
 * and then, if it's not big enough, reallocate it on the heap and try again.
314
 *
315
 * You use it like this:
316
 * UErrorCode err = U_ZERO_ERROR;
317
 *
318
 * PreflightingLocaleIDBuffer tempBuffer;
319
 * do {
320
 *     tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err);
321
 * } while (tempBuffer.needToTryAgain(&err));
322
 * if (U_SUCCESS(err)) {
323
 *     uloc_doSomethingWithTheResult(tempBuffer.getBuffer());
324
 * }
325
 */
326
class PreflightingLocaleIDBuffer {
327
private:
328
    char stackBuffer[ULOC_FULLNAME_CAPACITY];
329
    char* heapBuffer = nullptr;
330
    int32_t capacity = ULOC_FULLNAME_CAPACITY;
331
    
332
public:
333
    int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY;
334
335
    // No heap allocation. Use only on the stack.
336
    static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete;
337
    static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete;
338
#if U_HAVE_PLACEMENT_NEW
339
    static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete;
340
#endif
341
342
0
    PreflightingLocaleIDBuffer() {}
343
    
344
0
    ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); }
345
    
346
0
    char* getBuffer() {
347
0
        if (heapBuffer == nullptr) {
348
0
            return stackBuffer;
349
0
        } else {
350
0
            return heapBuffer;
351
0
        }
352
0
    }
353
    
354
0
    int32_t getCapacity() {
355
0
        return capacity;
356
0
    }
357
    
358
0
    bool needToTryAgain(UErrorCode* err) {
359
0
        if (heapBuffer != nullptr) {
360
0
            return false;
361
0
        }
362
    
363
0
        if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) {
364
0
            int32_t newCapacity = requestedCapacity + 2;    // one for the terminating null, one just for paranoia
365
0
            heapBuffer = static_cast<char*>(uprv_malloc(newCapacity));
366
0
            if (heapBuffer == nullptr) {
367
0
                *err = U_MEMORY_ALLOCATION_ERROR;
368
0
            } else {
369
0
                *err = U_ZERO_ERROR;
370
0
                capacity = newCapacity;
371
0
            }
372
0
            return U_SUCCESS(*err);
373
0
        }
374
0
        return false;
375
0
    }
376
};
377
378
#endif