/src/icu/icu4c/source/common/ulocimp.h
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ********************************************************************** |
5 | | * Copyright (C) 2004-2016, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ********************************************************************** |
8 | | */ |
9 | | |
10 | | #ifndef ULOCIMP_H |
11 | | #define ULOCIMP_H |
12 | | |
13 | | #include <cstddef> |
14 | | #include <optional> |
15 | | #include <string_view> |
16 | | |
17 | | #include "unicode/bytestream.h" |
18 | | #include "unicode/uloc.h" |
19 | | |
20 | | #include "charstr.h" |
21 | | |
22 | | /** |
23 | | * Create an iterator over the specified keywords list |
24 | | * @param keywordList double-null terminated list. Will be copied. |
25 | | * @param keywordListSize size in bytes of keywordList |
26 | | * @param status err code |
27 | | * @return enumeration (owned by caller) of the keyword list. |
28 | | * @internal ICU 3.0 |
29 | | */ |
30 | | U_CAPI UEnumeration* U_EXPORT2 |
31 | | uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status); |
32 | | |
33 | | /** |
34 | | * Look up a resource bundle table item with fallback on the table level. |
35 | | * This is accessible so it can be called by C++ code. |
36 | | */ |
37 | | U_CAPI const UChar * U_EXPORT2 |
38 | | uloc_getTableStringWithFallback( |
39 | | const char *path, |
40 | | const char *locale, |
41 | | const char *tableKey, |
42 | | const char *subTableKey, |
43 | | const char *itemKey, |
44 | | int32_t *pLength, |
45 | | UErrorCode *pErrorCode); |
46 | | |
47 | | namespace { |
48 | | /*returns true if a is an ID separator false otherwise*/ |
49 | 0 | inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; } Unexecuted instantiation: locid.cpp:(anonymous namespace)::_isIDSeparator(char) Unexecuted instantiation: loclikely.cpp:(anonymous namespace)::_isIDSeparator(char) Unexecuted instantiation: uloc.cpp:(anonymous namespace)::_isIDSeparator(char) Unexecuted instantiation: uloc_keytype.cpp:(anonymous namespace)::_isIDSeparator(char) Unexecuted instantiation: uloc_tag.cpp:(anonymous namespace)::_isIDSeparator(char) Unexecuted instantiation: uresbund.cpp:(anonymous namespace)::_isIDSeparator(char) Unexecuted instantiation: localebuilder.cpp:(anonymous namespace)::_isIDSeparator(char) Unexecuted instantiation: locmap.cpp:(anonymous namespace)::_isIDSeparator(char) |
50 | | } // namespace |
51 | | |
52 | | U_CFUNC const char* |
53 | | uloc_getCurrentCountryID(const char* oldID); |
54 | | |
55 | | U_CFUNC const char* |
56 | | uloc_getCurrentLanguageID(const char* oldID); |
57 | | |
58 | | U_EXPORT std::optional<std::string_view> |
59 | | ulocimp_toBcpKeyWithFallback(std::string_view keyword); |
60 | | |
61 | | U_EXPORT std::optional<std::string_view> |
62 | | ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value); |
63 | | |
64 | | U_EXPORT std::optional<std::string_view> |
65 | | ulocimp_toLegacyKeyWithFallback(std::string_view keyword); |
66 | | |
67 | | U_EXPORT std::optional<std::string_view> |
68 | | ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value); |
69 | | |
70 | | U_EXPORT icu::CharString |
71 | | ulocimp_getKeywords(std::string_view localeID, |
72 | | char prev, |
73 | | bool valuesToo, |
74 | | UErrorCode& status); |
75 | | |
76 | | U_EXPORT void |
77 | | ulocimp_getKeywords(std::string_view localeID, |
78 | | char prev, |
79 | | icu::ByteSink& sink, |
80 | | bool valuesToo, |
81 | | UErrorCode& status); |
82 | | |
83 | | U_EXPORT icu::CharString |
84 | | ulocimp_getName(std::string_view localeID, |
85 | | UErrorCode& err); |
86 | | |
87 | | U_EXPORT void |
88 | | ulocimp_getName(std::string_view localeID, |
89 | | icu::ByteSink& sink, |
90 | | UErrorCode& err); |
91 | | |
92 | | U_EXPORT icu::CharString |
93 | | ulocimp_getBaseName(std::string_view localeID, |
94 | | UErrorCode& err); |
95 | | |
96 | | U_EXPORT void |
97 | | ulocimp_getBaseName(std::string_view localeID, |
98 | | icu::ByteSink& sink, |
99 | | UErrorCode& err); |
100 | | |
101 | | U_EXPORT icu::CharString |
102 | | ulocimp_canonicalize(std::string_view localeID, |
103 | | UErrorCode& err); |
104 | | |
105 | | U_EXPORT void |
106 | | ulocimp_canonicalize(std::string_view localeID, |
107 | | icu::ByteSink& sink, |
108 | | UErrorCode& err); |
109 | | |
110 | | U_EXPORT icu::CharString |
111 | | ulocimp_getKeywordValue(const char* localeID, |
112 | | std::string_view keywordName, |
113 | | UErrorCode& status); |
114 | | |
115 | | U_EXPORT void |
116 | | ulocimp_getKeywordValue(const char* localeID, |
117 | | std::string_view keywordName, |
118 | | icu::ByteSink& sink, |
119 | | UErrorCode& status); |
120 | | |
121 | | U_EXPORT icu::CharString |
122 | | ulocimp_getLanguage(std::string_view localeID, UErrorCode& status); |
123 | | |
124 | | U_EXPORT icu::CharString |
125 | | ulocimp_getScript(std::string_view localeID, UErrorCode& status); |
126 | | |
127 | | U_EXPORT icu::CharString |
128 | | ulocimp_getRegion(std::string_view localeID, UErrorCode& status); |
129 | | |
130 | | U_EXPORT icu::CharString |
131 | | ulocimp_getVariant(std::string_view localeID, UErrorCode& status); |
132 | | |
133 | | U_EXPORT void |
134 | | ulocimp_setKeywordValue(std::string_view keywordName, |
135 | | std::string_view keywordValue, |
136 | | icu::CharString& localeID, |
137 | | UErrorCode& status); |
138 | | |
139 | | U_EXPORT int32_t |
140 | | ulocimp_setKeywordValue(std::string_view keywords, |
141 | | std::string_view keywordName, |
142 | | std::string_view keywordValue, |
143 | | icu::ByteSink& sink, |
144 | | UErrorCode& status); |
145 | | |
146 | | U_EXPORT void |
147 | | ulocimp_getSubtags( |
148 | | std::string_view localeID, |
149 | | icu::CharString* language, |
150 | | icu::CharString* script, |
151 | | icu::CharString* region, |
152 | | icu::CharString* variant, |
153 | | const char** pEnd, |
154 | | UErrorCode& status); |
155 | | |
156 | | U_EXPORT void |
157 | | ulocimp_getSubtags( |
158 | | std::string_view localeID, |
159 | | icu::ByteSink* language, |
160 | | icu::ByteSink* script, |
161 | | icu::ByteSink* region, |
162 | | icu::ByteSink* variant, |
163 | | const char** pEnd, |
164 | | UErrorCode& status); |
165 | | |
166 | | inline void |
167 | | ulocimp_getSubtags( |
168 | | std::string_view localeID, |
169 | | std::nullptr_t, |
170 | | std::nullptr_t, |
171 | | std::nullptr_t, |
172 | | std::nullptr_t, |
173 | | const char** pEnd, |
174 | 0 | UErrorCode& status) { |
175 | 0 | ulocimp_getSubtags( |
176 | 0 | localeID, |
177 | 0 | static_cast<icu::ByteSink*>(nullptr), |
178 | 0 | static_cast<icu::ByteSink*>(nullptr), |
179 | 0 | static_cast<icu::ByteSink*>(nullptr), |
180 | 0 | static_cast<icu::ByteSink*>(nullptr), |
181 | 0 | pEnd, |
182 | 0 | status); |
183 | 0 | } |
184 | | |
185 | | U_EXPORT icu::CharString |
186 | | ulocimp_getParent(const char* localeID, |
187 | | UErrorCode& err); |
188 | | |
189 | | U_EXPORT void |
190 | | ulocimp_getParent(const char* localeID, |
191 | | icu::ByteSink& sink, |
192 | | UErrorCode& err); |
193 | | |
194 | | U_EXPORT icu::CharString |
195 | | ulocimp_toLanguageTag(const char* localeID, |
196 | | bool strict, |
197 | | UErrorCode& status); |
198 | | |
199 | | /** |
200 | | * Writes a well-formed language tag for this locale ID. |
201 | | * |
202 | | * **Note**: When `strict` is false, any locale fields which do not satisfy the |
203 | | * BCP47 syntax requirement will be omitted from the result. When `strict` is |
204 | | * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale |
205 | | * fields do not satisfy the BCP47 syntax requirement. |
206 | | * |
207 | | * @param localeID the input locale ID |
208 | | * @param sink the output sink receiving the BCP47 language |
209 | | * tag for this Locale. |
210 | | * @param strict boolean value indicating if the function returns |
211 | | * an error for an ill-formed input locale ID. |
212 | | * @param err error information if receiving the language |
213 | | * tag failed. |
214 | | * @return The length of the BCP47 language tag. |
215 | | * |
216 | | * @internal ICU 64 |
217 | | */ |
218 | | U_EXPORT void |
219 | | ulocimp_toLanguageTag(const char* localeID, |
220 | | icu::ByteSink& sink, |
221 | | bool strict, |
222 | | UErrorCode& err); |
223 | | |
224 | | U_EXPORT icu::CharString |
225 | | ulocimp_forLanguageTag(const char* langtag, |
226 | | int32_t tagLen, |
227 | | int32_t* parsedLength, |
228 | | UErrorCode& status); |
229 | | |
230 | | /** |
231 | | * Returns a locale ID for the specified BCP47 language tag string. |
232 | | * If the specified language tag contains any ill-formed subtags, |
233 | | * the first such subtag and all following subtags are ignored. |
234 | | * <p> |
235 | | * This implements the 'Language-Tag' production of BCP 47, and so |
236 | | * supports legacy language tags (marked as “Type: grandfathered” in BCP 47) |
237 | | * (regular and irregular) as well as private use language tags. |
238 | | * |
239 | | * Private use tags are represented as 'x-whatever', |
240 | | * and legacy tags are converted to their canonical replacements where they exist. |
241 | | * |
242 | | * Note that a few legacy tags have no modern replacement; |
243 | | * these will be converted using the fallback described in |
244 | | * the first paragraph, so some information might be lost. |
245 | | * |
246 | | * @param langtag the input BCP47 language tag. |
247 | | * @param tagLen the length of langtag, or -1 to call uprv_strlen(). |
248 | | * @param sink the output sink receiving a locale ID for the |
249 | | * specified BCP47 language tag. |
250 | | * @param parsedLength if not NULL, successfully parsed length |
251 | | * for the input language tag is set. |
252 | | * @param err error information if receiving the locald ID |
253 | | * failed. |
254 | | * @internal ICU 63 |
255 | | */ |
256 | | U_EXPORT void |
257 | | ulocimp_forLanguageTag(const char* langtag, |
258 | | int32_t tagLen, |
259 | | icu::ByteSink& sink, |
260 | | int32_t* parsedLength, |
261 | | UErrorCode& err); |
262 | | |
263 | | /** |
264 | | * Get the region to use for supplemental data lookup. Uses |
265 | | * (1) any region specified by locale tag "rg"; if none then |
266 | | * (2) any unicode_region_tag in the locale ID; if none then |
267 | | * (3) if inferRegion is true, the region suggested by |
268 | | * getLikelySubtags on the localeID. |
269 | | * If no region is found, returns an empty string. |
270 | | * |
271 | | * @param localeID |
272 | | * The complete locale ID (with keywords) from which |
273 | | * to get the region to use for supplemental data. |
274 | | * @param inferRegion |
275 | | * If true, will try to infer region from localeID if |
276 | | * no other region is found. |
277 | | * @param status |
278 | | * Pointer to in/out UErrorCode value for latest status. |
279 | | * @return |
280 | | * The region code found, empty if none found. |
281 | | * @internal ICU 57 |
282 | | */ |
283 | | U_EXPORT icu::CharString |
284 | | ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion, |
285 | | UErrorCode& status); |
286 | | |
287 | | U_EXPORT icu::CharString |
288 | | ulocimp_addLikelySubtags(const char* localeID, |
289 | | UErrorCode& status); |
290 | | |
291 | | /** |
292 | | * Add the likely subtags for a provided locale ID, per the algorithm described |
293 | | * in the following CLDR technical report: |
294 | | * |
295 | | * http://www.unicode.org/reports/tr35/#Likely_Subtags |
296 | | * |
297 | | * If localeID is already in the maximal form, or there is no data available |
298 | | * for maximization, it will be copied to the output buffer. For example, |
299 | | * "und-Zzzz" cannot be maximized, since there is no reasonable maximization. |
300 | | * |
301 | | * Examples: |
302 | | * |
303 | | * "en" maximizes to "en_Latn_US" |
304 | | * |
305 | | * "de" maximizes to "de_Latn_US" |
306 | | * |
307 | | * "sr" maximizes to "sr_Cyrl_RS" |
308 | | * |
309 | | * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.) |
310 | | * |
311 | | * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.) |
312 | | * |
313 | | * @param localeID The locale to maximize |
314 | | * @param sink The output sink receiving the maximized locale |
315 | | * @param err Error information if maximizing the locale failed. If the length |
316 | | * of the localeID and the null-terminator is greater than the maximum allowed size, |
317 | | * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. |
318 | | * @internal ICU 64 |
319 | | */ |
320 | | U_EXPORT void |
321 | | ulocimp_addLikelySubtags(const char* localeID, |
322 | | icu::ByteSink& sink, |
323 | | UErrorCode& err); |
324 | | |
325 | | U_EXPORT icu::CharString |
326 | | ulocimp_minimizeSubtags(const char* localeID, |
327 | | bool favorScript, |
328 | | UErrorCode& status); |
329 | | |
330 | | /** |
331 | | * Minimize the subtags for a provided locale ID, per the algorithm described |
332 | | * in the following CLDR technical report: |
333 | | * |
334 | | * http://www.unicode.org/reports/tr35/#Likely_Subtags |
335 | | * |
336 | | * If localeID is already in the minimal form, or there is no data available |
337 | | * for minimization, it will be copied to the output buffer. Since the |
338 | | * minimization algorithm relies on proper maximization, see the comments |
339 | | * for ulocimp_addLikelySubtags for reasons why there might not be any data. |
340 | | * |
341 | | * Examples: |
342 | | * |
343 | | * "en_Latn_US" minimizes to "en" |
344 | | * |
345 | | * "de_Latn_US" minimizes to "de" |
346 | | * |
347 | | * "sr_Cyrl_RS" minimizes to "sr" |
348 | | * |
349 | | * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the |
350 | | * script, and minimizing to "zh" would imply "zh_Hans_CN".) |
351 | | * |
352 | | * @param localeID The locale to minimize |
353 | | * @param sink The output sink receiving the maximized locale |
354 | | * @param favorScript favor to keep script if true, region if false. |
355 | | * @param err Error information if minimizing the locale failed. If the length |
356 | | * of the localeID and the null-terminator is greater than the maximum allowed size, |
357 | | * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR. |
358 | | * @internal ICU 64 |
359 | | */ |
360 | | U_EXPORT void |
361 | | ulocimp_minimizeSubtags(const char* localeID, |
362 | | icu::ByteSink& sink, |
363 | | bool favorScript, |
364 | | UErrorCode& err); |
365 | | |
366 | | U_CAPI const char * U_EXPORT2 |
367 | | locale_getKeywordsStart(std::string_view localeID); |
368 | | |
369 | | bool |
370 | | ultag_isExtensionSubtags(const char* s, int32_t len); |
371 | | |
372 | | bool |
373 | | ultag_isLanguageSubtag(const char* s, int32_t len); |
374 | | |
375 | | bool |
376 | | ultag_isPrivateuseValueSubtags(const char* s, int32_t len); |
377 | | |
378 | | bool |
379 | | ultag_isRegionSubtag(const char* s, int32_t len); |
380 | | |
381 | | bool |
382 | | ultag_isScriptSubtag(const char* s, int32_t len); |
383 | | |
384 | | bool |
385 | | ultag_isTransformedExtensionSubtags(const char* s, int32_t len); |
386 | | |
387 | | bool |
388 | | ultag_isUnicodeExtensionSubtags(const char* s, int32_t len); |
389 | | |
390 | | bool |
391 | | ultag_isUnicodeLocaleAttribute(const char* s, int32_t len); |
392 | | |
393 | | bool |
394 | | ultag_isUnicodeLocaleAttributes(const char* s, int32_t len); |
395 | | |
396 | | bool |
397 | | ultag_isUnicodeLocaleKey(const char* s, int32_t len); |
398 | | |
399 | | bool |
400 | | ultag_isUnicodeLocaleType(const char* s, int32_t len); |
401 | | |
402 | | bool |
403 | | ultag_isVariantSubtags(const char* s, int32_t len); |
404 | | |
405 | | const char* |
406 | | ultag_getTKeyStart(const char* localeID); |
407 | | |
408 | | U_EXPORT std::optional<std::string_view> |
409 | | ulocimp_toBcpKey(std::string_view key); |
410 | | |
411 | | U_EXPORT std::optional<std::string_view> |
412 | | ulocimp_toLegacyKey(std::string_view key); |
413 | | |
414 | | U_EXPORT std::optional<std::string_view> |
415 | | ulocimp_toBcpType(std::string_view key, std::string_view type); |
416 | | |
417 | | U_EXPORT std::optional<std::string_view> |
418 | | ulocimp_toLegacyType(std::string_view key, std::string_view type); |
419 | | |
420 | | /* Function for testing purpose */ |
421 | | U_EXPORT const char* const* |
422 | | ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length); |
423 | | |
424 | | // Return true if the value is already canonicalized. |
425 | | U_EXPORT bool |
426 | | ulocimp_isCanonicalizedLocaleForTest(const char* localeName); |
427 | | |
428 | | #ifdef __cplusplus |
429 | | U_NAMESPACE_BEGIN |
430 | | class U_COMMON_API RegionValidateMap : public UObject { |
431 | | public: |
432 | | RegionValidateMap(); |
433 | | virtual ~RegionValidateMap(); |
434 | | bool isSet(const char* region) const; |
435 | | bool equals(const RegionValidateMap& that) const; |
436 | | protected: |
437 | | int32_t value(const char* region) const; |
438 | | uint32_t map[22]; // 26x26/32 = 22; |
439 | | }; |
440 | | U_NAMESPACE_END |
441 | | #endif /* __cplusplus */ |
442 | | |
443 | | #endif |