/src/icu/source/common/ulocimp.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ********************************************************************** |
5 | | * Copyright (C) 2004-2016, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ********************************************************************** |
8 | | */ |
9 | | |
10 | | #ifndef ULOCIMP_H |
11 | | #define ULOCIMP_H |
12 | | |
13 | | #include "unicode/bytestream.h" |
14 | | #include "unicode/uloc.h" |
15 | | |
16 | | #include "charstr.h" |
17 | | |
18 | | /** |
19 | | * Create an iterator over the specified keywords list |
20 | | * @param keywordList double-null terminated list. Will be copied. |
21 | | * @param keywordListSize size in bytes of keywordList |
22 | | * @param status err code |
23 | | * @return enumeration (owned by caller) of the keyword list. |
24 | | * @internal ICU 3.0 |
25 | | */ |
26 | | U_CAPI UEnumeration* U_EXPORT2 |
27 | | uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status); |
28 | | |
29 | | /** |
30 | | * Look up a resource bundle table item with fallback on the table level. |
31 | | * This is accessible so it can be called by C++ code. |
32 | | */ |
33 | | U_CAPI const UChar * U_EXPORT2 |
34 | | uloc_getTableStringWithFallback( |
35 | | const char *path, |
36 | | const char *locale, |
37 | | const char *tableKey, |
38 | | const char *subTableKey, |
39 | | const char *itemKey, |
40 | | int32_t *pLength, |
41 | | UErrorCode *pErrorCode); |
42 | | |
43 | | /*returns true if a is an ID separator false otherwise*/ |
44 | 0 | #define _isIDSeparator(a) (a == '_' || a == '-') |
45 | | |
46 | | U_CFUNC const char* |
47 | | uloc_getCurrentCountryID(const char* oldID); |
48 | | |
49 | | U_CFUNC const char* |
50 | | uloc_getCurrentLanguageID(const char* oldID); |
51 | | |
52 | | U_CFUNC void |
53 | | ulocimp_getKeywords(const char *localeID, |
54 | | char prev, |
55 | | icu::ByteSink& sink, |
56 | | UBool valuesToo, |
57 | | UErrorCode *status); |
58 | | |
59 | | icu::CharString U_EXPORT2 |
60 | | ulocimp_getLanguage(const char *localeID, |
61 | | const char **pEnd, |
62 | | UErrorCode &status); |
63 | | |
64 | | icu::CharString U_EXPORT2 |
65 | | ulocimp_getScript(const char *localeID, |
66 | | const char **pEnd, |
67 | | UErrorCode &status); |
68 | | |
69 | | icu::CharString U_EXPORT2 |
70 | | ulocimp_getCountry(const char *localeID, |
71 | | const char **pEnd, |
72 | | UErrorCode &status); |
73 | | |
74 | | U_CAPI void U_EXPORT2 |
75 | | ulocimp_getName(const char* localeID, |
76 | | icu::ByteSink& sink, |
77 | | UErrorCode* err); |
78 | | |
79 | | U_CAPI void U_EXPORT2 |
80 | | ulocimp_getBaseName(const char* localeID, |
81 | | icu::ByteSink& sink, |
82 | | UErrorCode* err); |
83 | | |
84 | | U_CAPI void U_EXPORT2 |
85 | | ulocimp_canonicalize(const char* localeID, |
86 | | icu::ByteSink& sink, |
87 | | UErrorCode* err); |
88 | | |
89 | | U_CAPI void U_EXPORT2 |
90 | | ulocimp_getKeywordValue(const char* localeID, |
91 | | const char* keywordName, |
92 | | icu::ByteSink& sink, |
93 | | UErrorCode* status); |
94 | | |
95 | | /** |
96 | | * Writes a well-formed language tag for this locale ID. |
97 | | * |
98 | | * **Note**: When `strict` is false, any locale fields which do not satisfy the |
99 | | * BCP47 syntax requirement will be omitted from the result. When `strict` is |
100 | | * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale |
101 | | * fields do not satisfy the BCP47 syntax requirement. |
102 | | * |
103 | | * @param localeID the input locale ID |
104 | | * @param sink the output sink receiving the BCP47 language |
105 | | * tag for this Locale. |
106 | | * @param strict boolean value indicating if the function returns |
107 | | * an error for an ill-formed input locale ID. |
108 | | * @param err error information if receiving the language |
109 | | * tag failed. |
110 | | * @return The length of the BCP47 language tag. |
111 | | * |
112 | | * @internal ICU 64 |
113 | | */ |
114 | | U_CAPI void U_EXPORT2 |
115 | | ulocimp_toLanguageTag(const char* localeID, |
116 | | icu::ByteSink& sink, |
117 | | UBool strict, |
118 | | UErrorCode* err); |
119 | | |
120 | | /** |
121 | | * Returns a locale ID for the specified BCP47 language tag string. |
122 | | * If the specified language tag contains any ill-formed subtags, |
123 | | * the first such subtag and all following subtags are ignored. |
124 | | * <p> |
125 | | * This implements the 'Language-Tag' production of BCP 47, and so |
126 | | * supports legacy language tags (marked as “Type: grandfathered” in BCP 47) |
127 | | * (regular and irregular) as well as private use language tags. |
128 | | * |
129 | | * Private use tags are represented as 'x-whatever', |
130 | | * and legacy tags are converted to their canonical replacements where they exist. |
131 | | * |
132 | | * Note that a few legacy tags have no modern replacement; |
133 | | * these will be converted using the fallback described in |
134 | | * the first paragraph, so some information might be lost. |
135 | | * |
136 | | * @param langtag the input BCP47 language tag. |
137 | | * @param tagLen the length of langtag, or -1 to call uprv_strlen(). |
138 | | * @param sink the output sink receiving a locale ID for the |
139 | | * specified BCP47 language tag. |
140 | | * @param parsedLength if not NULL, successfully parsed length |
141 | | * for the input language tag is set. |
142 | | * @param err error information if receiving the locald ID |
143 | | * failed. |
144 | | * @internal ICU 63 |
145 | | */ |
146 | | U_CAPI void U_EXPORT2 |
147 | | ulocimp_forLanguageTag(const char* langtag, |
148 | | int32_t tagLen, |
149 | | icu::ByteSink& sink, |
150 | | int32_t* parsedLength, |
151 | | UErrorCode* err); |
152 | | |
153 | | /** |
154 | | * Get the region to use for supplemental data lookup. Uses |
155 | | * (1) any region specified by locale tag "rg"; if none then |
156 | | * (2) any unicode_region_tag in the locale ID; if none then |
157 | | * (3) if inferRegion is true, the region suggested by |
158 | | * getLikelySubtags on the localeID. |
159 | | * If no region is found, returns length 0. |
160 | | * |
161 | | * @param localeID |
162 | | * The complete locale ID (with keywords) from which |
163 | | * to get the region to use for supplemental data. |
164 | | * @param inferRegion |
165 | | * If true, will try to infer region from localeID if |
166 | | * no other region is found. |
167 | | * @param region |
168 | | * Buffer in which to put the region ID found; should |
169 | | * have a capacity at least ULOC_COUNTRY_CAPACITY. |
170 | | * @param regionCapacity |
171 | | * The actual capacity of the region buffer. |
172 | | * @param status |
173 | | * Pointer to in/out UErrorCode value for latest status. |
174 | | * @return |
175 | | * The length of any region code found, or 0 if none. |
176 | | * @internal ICU 57 |
177 | | */ |
178 | | U_CAPI int32_t U_EXPORT2 |
179 | | ulocimp_getRegionForSupplementalData(const char *localeID, UBool inferRegion, |
180 | | char *region, int32_t regionCapacity, UErrorCode* status); |
181 | | |
182 | | /** |
183 | | * Add the likely subtags for a provided locale ID, per the algorithm described |
184 | | * in the following CLDR technical report: |
185 | | * |
186 | | * http://www.unicode.org/reports/tr35/#Likely_Subtags |
187 | | * |
188 | | * If localeID is already in the maximal form, or there is no data available |
189 | | * for maximization, it will be copied to the output buffer. For example, |
190 | | * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. |
191 | | * |
192 | | * Examples: |
193 | | * |
194 | | * "en" maximizes to "en_Latn_US" |
195 | | * |
196 | | * "de" maximizes to "de_Latn_US" |
197 | | * |
198 | | * "sr" maximizes to "sr_Cyrl_RS" |
199 | | * |
200 | | * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) |
201 | | * |
202 | | * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) |
203 | | * |
204 | | * @param localeID The locale to maximize |
205 | | * @param sink The output sink receiving the maximized locale |
206 | | * @param err Error information if maximizing the locale failed. If the length |
207 | | * of the localeID and the null-terminator is greater than the maximum allowed size, |
208 | | * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. |
209 | | * @internal ICU 64 |
210 | | */ |
211 | | U_CAPI void U_EXPORT2 |
212 | | ulocimp_addLikelySubtags(const char* localeID, |
213 | | icu::ByteSink& sink, |
214 | | UErrorCode* err); |
215 | | |
216 | | /** |
217 | | * Minimize the subtags for a provided locale ID, per the algorithm described |
218 | | * in the following CLDR technical report: |
219 | | * |
220 | | * http://www.unicode.org/reports/tr35/#Likely_Subtags |
221 | | * |
222 | | * If localeID is already in the minimal form, or there is no data available |
223 | | * for minimization, it will be copied to the output buffer. Since the |
224 | | * minimization algorithm relies on proper maximization, see the comments |
225 | | * for ulocimp_addLikelySubtags for reasons why there might not be any data. |
226 | | * |
227 | | * Examples: |
228 | | * |
229 | | * "en_Latn_US" minimizes to "en" |
230 | | * |
231 | | * "de_Latn_US" minimizes to "de" |
232 | | * |
233 | | * "sr_Cyrl_RS" minimizes to "sr" |
234 | | * |
235 | | * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the |
236 | | * script, and minimizing to "zh" would imply "zh_Hans_CN".) |
237 | | * |
238 | | * @param localeID The locale to minimize |
239 | | * @param sink The output sink receiving the maximized locale |
240 | | * @param err Error information if minimizing the locale failed. If the length |
241 | | * of the localeID and the null-terminator is greater than the maximum allowed size, |
242 | | * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. |
243 | | * @internal ICU 64 |
244 | | */ |
245 | | U_CAPI void U_EXPORT2 |
246 | | ulocimp_minimizeSubtags(const char* localeID, |
247 | | icu::ByteSink& sink, |
248 | | UErrorCode* err); |
249 | | |
250 | | U_CAPI const char * U_EXPORT2 |
251 | | locale_getKeywordsStart(const char *localeID); |
252 | | |
253 | | U_CFUNC UBool |
254 | | ultag_isExtensionSubtags(const char* s, int32_t len); |
255 | | |
256 | | U_CFUNC UBool |
257 | | ultag_isLanguageSubtag(const char* s, int32_t len); |
258 | | |
259 | | U_CFUNC UBool |
260 | | ultag_isPrivateuseValueSubtags(const char* s, int32_t len); |
261 | | |
262 | | U_CFUNC UBool |
263 | | ultag_isRegionSubtag(const char* s, int32_t len); |
264 | | |
265 | | U_CFUNC UBool |
266 | | ultag_isScriptSubtag(const char* s, int32_t len); |
267 | | |
268 | | U_CFUNC UBool |
269 | | ultag_isTransformedExtensionSubtags(const char* s, int32_t len); |
270 | | |
271 | | U_CFUNC UBool |
272 | | ultag_isUnicodeExtensionSubtags(const char* s, int32_t len); |
273 | | |
274 | | U_CFUNC UBool |
275 | | ultag_isUnicodeLocaleAttribute(const char* s, int32_t len); |
276 | | |
277 | | U_CFUNC UBool |
278 | | ultag_isUnicodeLocaleAttributes(const char* s, int32_t len); |
279 | | |
280 | | U_CFUNC UBool |
281 | | ultag_isUnicodeLocaleKey(const char* s, int32_t len); |
282 | | |
283 | | U_CFUNC UBool |
284 | | ultag_isUnicodeLocaleType(const char* s, int32_t len); |
285 | | |
286 | | U_CFUNC UBool |
287 | | ultag_isVariantSubtags(const char* s, int32_t len); |
288 | | |
289 | | U_CAPI const char * U_EXPORT2 |
290 | | ultag_getTKeyStart(const char *localeID); |
291 | | |
292 | | U_CFUNC const char* |
293 | | ulocimp_toBcpKey(const char* key); |
294 | | |
295 | | U_CFUNC const char* |
296 | | ulocimp_toLegacyKey(const char* key); |
297 | | |
298 | | U_CFUNC const char* |
299 | | ulocimp_toBcpType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); |
300 | | |
301 | | U_CFUNC const char* |
302 | | ulocimp_toLegacyType(const char* key, const char* type, UBool* isKnownKey, UBool* isSpecialType); |
303 | | |
304 | | /* Function for testing purpose */ |
305 | | U_CAPI const char* const* ulocimp_getKnownCanonicalizedLocaleForTest(int32_t* length); |
306 | | |
307 | | // Return true if the value is already canonicalized. |
308 | | U_CAPI bool ulocimp_isCanonicalizedLocaleForTest(const char* localeName); |
309 | | |
310 | | /** |
311 | | * A utility class for handling locale IDs that may be longer than ULOC_FULLNAME_CAPACITY. |
312 | | * This encompasses all of the logic to allocate a temporary locale ID buffer on the stack, |
313 | | * and then, if it's not big enough, reallocate it on the heap and try again. |
314 | | * |
315 | | * You use it like this: |
316 | | * UErrorCode err = U_ZERO_ERROR; |
317 | | * |
318 | | * PreflightingLocaleIDBuffer tempBuffer; |
319 | | * do { |
320 | | * tempBuffer.requestedCapacity = uloc_doSomething(localeID, tempBuffer.getBuffer(), tempBuffer.getCapacity(), &err); |
321 | | * } while (tempBuffer.needToTryAgain(&err)); |
322 | | * if (U_SUCCESS(err)) { |
323 | | * uloc_doSomethingWithTheResult(tempBuffer.getBuffer()); |
324 | | * } |
325 | | */ |
326 | | class PreflightingLocaleIDBuffer { |
327 | | private: |
328 | | char stackBuffer[ULOC_FULLNAME_CAPACITY]; |
329 | | char* heapBuffer = nullptr; |
330 | | int32_t capacity = ULOC_FULLNAME_CAPACITY; |
331 | | |
332 | | public: |
333 | | int32_t requestedCapacity = ULOC_FULLNAME_CAPACITY; |
334 | | |
335 | | // No heap allocation. Use only on the stack. |
336 | | static void* U_EXPORT2 operator new(size_t) U_NOEXCEPT = delete; |
337 | | static void* U_EXPORT2 operator new[](size_t) U_NOEXCEPT = delete; |
338 | | #if U_HAVE_PLACEMENT_NEW |
339 | | static void* U_EXPORT2 operator new(size_t, void*) U_NOEXCEPT = delete; |
340 | | #endif |
341 | | |
342 | 0 | PreflightingLocaleIDBuffer() {} |
343 | | |
344 | 0 | ~PreflightingLocaleIDBuffer() { uprv_free(heapBuffer); } |
345 | | |
346 | 0 | char* getBuffer() { |
347 | 0 | if (heapBuffer == nullptr) { |
348 | 0 | return stackBuffer; |
349 | 0 | } else { |
350 | 0 | return heapBuffer; |
351 | 0 | } |
352 | 0 | } |
353 | | |
354 | 0 | int32_t getCapacity() { |
355 | 0 | return capacity; |
356 | 0 | } |
357 | | |
358 | 0 | bool needToTryAgain(UErrorCode* err) { |
359 | 0 | if (heapBuffer != nullptr) { |
360 | 0 | return false; |
361 | 0 | } |
362 | | |
363 | 0 | if (*err == U_BUFFER_OVERFLOW_ERROR || *err == U_STRING_NOT_TERMINATED_WARNING) { |
364 | 0 | int32_t newCapacity = requestedCapacity + 2; // one for the terminating null, one just for paranoia |
365 | 0 | heapBuffer = static_cast<char*>(uprv_malloc(newCapacity)); |
366 | 0 | if (heapBuffer == nullptr) { |
367 | 0 | *err = U_MEMORY_ALLOCATION_ERROR; |
368 | 0 | } else { |
369 | 0 | *err = U_ZERO_ERROR; |
370 | 0 | capacity = newCapacity; |
371 | 0 | } |
372 | 0 | return U_SUCCESS(*err); |
373 | 0 | } |
374 | 0 | return false; |
375 | 0 | } |
376 | | }; |
377 | | |
378 | | #endif |