/src/icu/source/common/uloc_tag.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | // © 2016 and later: Unicode, Inc. and others. |
2 | | // License & terms of use: http://www.unicode.org/copyright.html |
3 | | /* |
4 | | ********************************************************************** |
5 | | * Copyright (C) 2009-2015, International Business Machines |
6 | | * Corporation and others. All Rights Reserved. |
7 | | ********************************************************************** |
8 | | */ |
9 | | |
10 | | #include "unicode/utypes.h" |
11 | | #include "unicode/ures.h" |
12 | | #include "unicode/putil.h" |
13 | | #include "unicode/uloc.h" |
14 | | #include "ustr_imp.h" |
15 | | #include "cmemory.h" |
16 | | #include "cstring.h" |
17 | | #include "putilimp.h" |
18 | | #include "uinvchar.h" |
19 | | #include "ulocimp.h" |
20 | | #include "uassert.h" |
21 | | |
22 | | |
23 | | /* struct holding a single variant */ |
24 | | typedef struct VariantListEntry { |
25 | | const char *variant; |
26 | | struct VariantListEntry *next; |
27 | | } VariantListEntry; |
28 | | |
29 | | /* struct holding a single attribute value */ |
30 | | typedef struct AttributeListEntry { |
31 | | const char *attribute; |
32 | | struct AttributeListEntry *next; |
33 | | } AttributeListEntry; |
34 | | |
35 | | /* struct holding a single extension */ |
36 | | typedef struct ExtensionListEntry { |
37 | | const char *key; |
38 | | const char *value; |
39 | | struct ExtensionListEntry *next; |
40 | | } ExtensionListEntry; |
41 | | |
42 | 0 | #define MAXEXTLANG 3 |
43 | | typedef struct ULanguageTag { |
44 | | char *buf; /* holding parsed subtags */ |
45 | | const char *language; |
46 | | const char *extlang[MAXEXTLANG]; |
47 | | const char *script; |
48 | | const char *region; |
49 | | VariantListEntry *variants; |
50 | | ExtensionListEntry *extensions; |
51 | | const char *privateuse; |
52 | | const char *grandfathered; |
53 | | } ULanguageTag; |
54 | | |
55 | 0 | #define MINLEN 2 |
56 | 0 | #define SEP '-' |
57 | 0 | #define PRIVATEUSE 'x' |
58 | 0 | #define LDMLEXT 'u' |
59 | | |
60 | 0 | #define LOCALE_SEP '_' |
61 | 0 | #define LOCALE_EXT_SEP '@' |
62 | 0 | #define LOCALE_KEYWORD_SEP ';' |
63 | 0 | #define LOCALE_KEY_TYPE_SEP '=' |
64 | | |
65 | 0 | #define ISALPHA(c) uprv_isASCIILetter(c) |
66 | 0 | #define ISNUMERIC(c) ((c)>='0' && (c)<='9') |
67 | | |
68 | | static const char EMPTY[] = ""; |
69 | | static const char LANG_UND[] = "und"; |
70 | | static const char PRIVATEUSE_KEY[] = "x"; |
71 | | static const char _POSIX[] = "_POSIX"; |
72 | | static const char POSIX_KEY[] = "va"; |
73 | | static const char POSIX_VALUE[] = "posix"; |
74 | | static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; |
75 | | static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; |
76 | | static const char LOCALE_TYPE_YES[] = "yes"; |
77 | | |
78 | 0 | #define LANG_UND_LEN 3 |
79 | | |
80 | | static const char* const GRANDFATHERED[] = { |
81 | | /* grandfathered preferred */ |
82 | | "art-lojban", "jbo", |
83 | | "cel-gaulish", "xtg-x-cel-gaulish", |
84 | | "en-GB-oed", "en-GB-x-oed", |
85 | | "i-ami", "ami", |
86 | | "i-bnn", "bnn", |
87 | | "i-default", "en-x-i-default", |
88 | | "i-enochian", "und-x-i-enochian", |
89 | | "i-hak", "hak", |
90 | | "i-klingon", "tlh", |
91 | | "i-lux", "lb", |
92 | | "i-mingo", "see-x-i-mingo", |
93 | | "i-navajo", "nv", |
94 | | "i-pwn", "pwn", |
95 | | "i-tao", "tao", |
96 | | "i-tay", "tay", |
97 | | "i-tsu", "tsu", |
98 | | "no-bok", "nb", |
99 | | "no-nyn", "nn", |
100 | | "sgn-be-fr", "sfb", |
101 | | "sgn-be-nl", "vgt", |
102 | | "sgn-ch-de", "sgg", |
103 | | "zh-guoyu", "cmn", |
104 | | "zh-hakka", "hak", |
105 | | "zh-min", "nan-x-zh-min", |
106 | | "zh-min-nan", "nan", |
107 | | "zh-xiang", "hsn", |
108 | | NULL, NULL |
109 | | }; |
110 | | |
111 | | static const char DEPRECATEDLANGS[][4] = { |
112 | | /* deprecated new */ |
113 | | "iw", "he", |
114 | | "ji", "yi", |
115 | | "in", "id" |
116 | | }; |
117 | | |
118 | | /* |
119 | | * ------------------------------------------------- |
120 | | * |
121 | | * These ultag_ functions may be exposed as APIs later |
122 | | * |
123 | | * ------------------------------------------------- |
124 | | */ |
125 | | |
126 | | static ULanguageTag* |
127 | | ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); |
128 | | |
129 | | static void |
130 | | ultag_close(ULanguageTag* langtag); |
131 | | |
132 | | static const char* |
133 | | ultag_getLanguage(const ULanguageTag* langtag); |
134 | | |
135 | | #if 0 |
136 | | static const char* |
137 | | ultag_getJDKLanguage(const ULanguageTag* langtag); |
138 | | #endif |
139 | | |
140 | | static const char* |
141 | | ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); |
142 | | |
143 | | static int32_t |
144 | | ultag_getExtlangSize(const ULanguageTag* langtag); |
145 | | |
146 | | static const char* |
147 | | ultag_getScript(const ULanguageTag* langtag); |
148 | | |
149 | | static const char* |
150 | | ultag_getRegion(const ULanguageTag* langtag); |
151 | | |
152 | | static const char* |
153 | | ultag_getVariant(const ULanguageTag* langtag, int32_t idx); |
154 | | |
155 | | static int32_t |
156 | | ultag_getVariantsSize(const ULanguageTag* langtag); |
157 | | |
158 | | static const char* |
159 | | ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); |
160 | | |
161 | | static const char* |
162 | | ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); |
163 | | |
164 | | static int32_t |
165 | | ultag_getExtensionsSize(const ULanguageTag* langtag); |
166 | | |
167 | | static const char* |
168 | | ultag_getPrivateUse(const ULanguageTag* langtag); |
169 | | |
170 | | #if 0 |
171 | | static const char* |
172 | | ultag_getGrandfathered(const ULanguageTag* langtag); |
173 | | #endif |
174 | | |
175 | | /* |
176 | | * ------------------------------------------------- |
177 | | * |
178 | | * Language subtag syntax validation functions |
179 | | * |
180 | | * ------------------------------------------------- |
181 | | */ |
182 | | |
183 | | static UBool |
184 | 0 | _isAlphaString(const char* s, int32_t len) { |
185 | 0 | int32_t i; |
186 | 0 | for (i = 0; i < len; i++) { |
187 | 0 | if (!ISALPHA(*(s + i))) { |
188 | 0 | return FALSE; |
189 | 0 | } |
190 | 0 | } |
191 | 0 | return TRUE; |
192 | 0 | } |
193 | | |
194 | | static UBool |
195 | 0 | _isNumericString(const char* s, int32_t len) { |
196 | 0 | int32_t i; |
197 | 0 | for (i = 0; i < len; i++) { |
198 | 0 | if (!ISNUMERIC(*(s + i))) { |
199 | 0 | return FALSE; |
200 | 0 | } |
201 | 0 | } |
202 | 0 | return TRUE; |
203 | 0 | } |
204 | | |
205 | | static UBool |
206 | 0 | _isAlphaNumericString(const char* s, int32_t len) { |
207 | 0 | int32_t i; |
208 | 0 | for (i = 0; i < len; i++) { |
209 | 0 | if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { |
210 | 0 | return FALSE; |
211 | 0 | } |
212 | 0 | } |
213 | 0 | return TRUE; |
214 | 0 | } |
215 | | |
216 | | static UBool |
217 | 0 | _isLanguageSubtag(const char* s, int32_t len) { |
218 | | /* |
219 | | * language = 2*3ALPHA ; shortest ISO 639 code |
220 | | * ["-" extlang] ; sometimes followed by |
221 | | * ; extended language subtags |
222 | | * / 4ALPHA ; or reserved for future use |
223 | | * / 5*8ALPHA ; or registered language subtag |
224 | | */ |
225 | 0 | if (len < 0) { |
226 | 0 | len = (int32_t)uprv_strlen(s); |
227 | 0 | } |
228 | 0 | if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { |
229 | 0 | return TRUE; |
230 | 0 | } |
231 | 0 | return FALSE; |
232 | 0 | } |
233 | | |
234 | | static UBool |
235 | 0 | _isExtlangSubtag(const char* s, int32_t len) { |
236 | | /* |
237 | | * extlang = 3ALPHA ; selected ISO 639 codes |
238 | | * *2("-" 3ALPHA) ; permanently reserved |
239 | | */ |
240 | 0 | if (len < 0) { |
241 | 0 | len = (int32_t)uprv_strlen(s); |
242 | 0 | } |
243 | 0 | if (len == 3 && _isAlphaString(s, len)) { |
244 | 0 | return TRUE; |
245 | 0 | } |
246 | 0 | return FALSE; |
247 | 0 | } |
248 | | |
249 | | static UBool |
250 | 0 | _isScriptSubtag(const char* s, int32_t len) { |
251 | | /* |
252 | | * script = 4ALPHA ; ISO 15924 code |
253 | | */ |
254 | 0 | if (len < 0) { |
255 | 0 | len = (int32_t)uprv_strlen(s); |
256 | 0 | } |
257 | 0 | if (len == 4 && _isAlphaString(s, len)) { |
258 | 0 | return TRUE; |
259 | 0 | } |
260 | 0 | return FALSE; |
261 | 0 | } |
262 | | |
263 | | static UBool |
264 | 0 | _isRegionSubtag(const char* s, int32_t len) { |
265 | | /* |
266 | | * region = 2ALPHA ; ISO 3166-1 code |
267 | | * / 3DIGIT ; UN M.49 code |
268 | | */ |
269 | 0 | if (len < 0) { |
270 | 0 | len = (int32_t)uprv_strlen(s); |
271 | 0 | } |
272 | 0 | if (len == 2 && _isAlphaString(s, len)) { |
273 | 0 | return TRUE; |
274 | 0 | } |
275 | 0 | if (len == 3 && _isNumericString(s, len)) { |
276 | 0 | return TRUE; |
277 | 0 | } |
278 | 0 | return FALSE; |
279 | 0 | } |
280 | | |
281 | | static UBool |
282 | 0 | _isVariantSubtag(const char* s, int32_t len) { |
283 | | /* |
284 | | * variant = 5*8alphanum ; registered variants |
285 | | * / (DIGIT 3alphanum) |
286 | | */ |
287 | 0 | if (len < 0) { |
288 | 0 | len = (int32_t)uprv_strlen(s); |
289 | 0 | } |
290 | 0 | if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { |
291 | 0 | return TRUE; |
292 | 0 | } |
293 | 0 | if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { |
294 | 0 | return TRUE; |
295 | 0 | } |
296 | 0 | return FALSE; |
297 | 0 | } |
298 | | |
299 | | static UBool |
300 | 0 | _isPrivateuseVariantSubtag(const char* s, int32_t len) { |
301 | | /* |
302 | | * variant = 1*8alphanum ; registered variants |
303 | | * / (DIGIT 3alphanum) |
304 | | */ |
305 | 0 | if (len < 0) { |
306 | 0 | len = (int32_t)uprv_strlen(s); |
307 | 0 | } |
308 | 0 | if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { |
309 | 0 | return TRUE; |
310 | 0 | } |
311 | 0 | return FALSE; |
312 | 0 | } |
313 | | |
314 | | static UBool |
315 | 0 | _isExtensionSingleton(const char* s, int32_t len) { |
316 | | /* |
317 | | * extension = singleton 1*("-" (2*8alphanum)) |
318 | | */ |
319 | 0 | if (len < 0) { |
320 | 0 | len = (int32_t)uprv_strlen(s); |
321 | 0 | } |
322 | 0 | if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { |
323 | 0 | return TRUE; |
324 | 0 | } |
325 | 0 | return FALSE; |
326 | 0 | } |
327 | | |
328 | | static UBool |
329 | 0 | _isExtensionSubtag(const char* s, int32_t len) { |
330 | | /* |
331 | | * extension = singleton 1*("-" (2*8alphanum)) |
332 | | */ |
333 | 0 | if (len < 0) { |
334 | 0 | len = (int32_t)uprv_strlen(s); |
335 | 0 | } |
336 | 0 | if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { |
337 | 0 | return TRUE; |
338 | 0 | } |
339 | 0 | return FALSE; |
340 | 0 | } |
341 | | |
342 | | static UBool |
343 | 0 | _isExtensionSubtags(const char* s, int32_t len) { |
344 | 0 | const char *p = s; |
345 | 0 | const char *pSubtag = NULL; |
346 | |
|
347 | 0 | if (len < 0) { |
348 | 0 | len = (int32_t)uprv_strlen(s); |
349 | 0 | } |
350 | |
|
351 | 0 | while ((p - s) < len) { |
352 | 0 | if (*p == SEP) { |
353 | 0 | if (pSubtag == NULL) { |
354 | 0 | return FALSE; |
355 | 0 | } |
356 | 0 | if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { |
357 | 0 | return FALSE; |
358 | 0 | } |
359 | 0 | pSubtag = NULL; |
360 | 0 | } else if (pSubtag == NULL) { |
361 | 0 | pSubtag = p; |
362 | 0 | } |
363 | 0 | p++; |
364 | 0 | } |
365 | 0 | if (pSubtag == NULL) { |
366 | 0 | return FALSE; |
367 | 0 | } |
368 | 0 | return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); |
369 | 0 | } |
370 | | |
371 | | static UBool |
372 | 0 | _isPrivateuseValueSubtag(const char* s, int32_t len) { |
373 | | /* |
374 | | * privateuse = "x" 1*("-" (1*8alphanum)) |
375 | | */ |
376 | 0 | if (len < 0) { |
377 | 0 | len = (int32_t)uprv_strlen(s); |
378 | 0 | } |
379 | 0 | if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { |
380 | 0 | return TRUE; |
381 | 0 | } |
382 | 0 | return FALSE; |
383 | 0 | } |
384 | | |
385 | | static UBool |
386 | 0 | _isPrivateuseValueSubtags(const char* s, int32_t len) { |
387 | 0 | const char *p = s; |
388 | 0 | const char *pSubtag = NULL; |
389 | |
|
390 | 0 | if (len < 0) { |
391 | 0 | len = (int32_t)uprv_strlen(s); |
392 | 0 | } |
393 | |
|
394 | 0 | while ((p - s) < len) { |
395 | 0 | if (*p == SEP) { |
396 | 0 | if (pSubtag == NULL) { |
397 | 0 | return FALSE; |
398 | 0 | } |
399 | 0 | if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { |
400 | 0 | return FALSE; |
401 | 0 | } |
402 | 0 | pSubtag = NULL; |
403 | 0 | } else if (pSubtag == NULL) { |
404 | 0 | pSubtag = p; |
405 | 0 | } |
406 | 0 | p++; |
407 | 0 | } |
408 | 0 | if (pSubtag == NULL) { |
409 | 0 | return FALSE; |
410 | 0 | } |
411 | 0 | return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); |
412 | 0 | } |
413 | | |
414 | | U_CFUNC UBool |
415 | 0 | ultag_isUnicodeLocaleKey(const char* s, int32_t len) { |
416 | 0 | if (len < 0) { |
417 | 0 | len = (int32_t)uprv_strlen(s); |
418 | 0 | } |
419 | 0 | if (len == 2 && _isAlphaNumericString(s, len)) { |
420 | 0 | return TRUE; |
421 | 0 | } |
422 | 0 | return FALSE; |
423 | 0 | } |
424 | | |
425 | | U_CFUNC UBool |
426 | 0 | ultag_isUnicodeLocaleType(const char*s, int32_t len) { |
427 | 0 | const char* p; |
428 | 0 | int32_t subtagLen = 0; |
429 | |
|
430 | 0 | if (len < 0) { |
431 | 0 | len = (int32_t)uprv_strlen(s); |
432 | 0 | } |
433 | |
|
434 | 0 | for (p = s; len > 0; p++, len--) { |
435 | 0 | if (*p == SEP) { |
436 | 0 | if (subtagLen < 3) { |
437 | 0 | return FALSE; |
438 | 0 | } |
439 | 0 | subtagLen = 0; |
440 | 0 | } else if (ISALPHA(*p) || ISNUMERIC(*p)) { |
441 | 0 | subtagLen++; |
442 | 0 | if (subtagLen > 8) { |
443 | 0 | return FALSE; |
444 | 0 | } |
445 | 0 | } else { |
446 | 0 | return FALSE; |
447 | 0 | } |
448 | 0 | } |
449 | | |
450 | 0 | return (subtagLen >= 3); |
451 | 0 | } |
452 | | /* |
453 | | * ------------------------------------------------- |
454 | | * |
455 | | * Helper functions |
456 | | * |
457 | | * ------------------------------------------------- |
458 | | */ |
459 | | |
460 | | static UBool |
461 | 0 | _addVariantToList(VariantListEntry **first, VariantListEntry *var) { |
462 | 0 | UBool bAdded = TRUE; |
463 | |
|
464 | 0 | if (*first == NULL) { |
465 | 0 | var->next = NULL; |
466 | 0 | *first = var; |
467 | 0 | } else { |
468 | 0 | VariantListEntry *prev, *cur; |
469 | 0 | int32_t cmp; |
470 | | |
471 | | /* variants order should be preserved */ |
472 | 0 | prev = NULL; |
473 | 0 | cur = *first; |
474 | 0 | while (TRUE) { |
475 | 0 | if (cur == NULL) { |
476 | 0 | prev->next = var; |
477 | 0 | var->next = NULL; |
478 | 0 | break; |
479 | 0 | } |
480 | | |
481 | | /* Checking for duplicate variant */ |
482 | 0 | cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); |
483 | 0 | if (cmp == 0) { |
484 | | /* duplicated variant */ |
485 | 0 | bAdded = FALSE; |
486 | 0 | break; |
487 | 0 | } |
488 | 0 | prev = cur; |
489 | 0 | cur = cur->next; |
490 | 0 | } |
491 | 0 | } |
492 | |
|
493 | 0 | return bAdded; |
494 | 0 | } |
495 | | |
496 | | static UBool |
497 | 0 | _addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { |
498 | 0 | UBool bAdded = TRUE; |
499 | |
|
500 | 0 | if (*first == NULL) { |
501 | 0 | attr->next = NULL; |
502 | 0 | *first = attr; |
503 | 0 | } else { |
504 | 0 | AttributeListEntry *prev, *cur; |
505 | 0 | int32_t cmp; |
506 | | |
507 | | /* reorder variants in alphabetical order */ |
508 | 0 | prev = NULL; |
509 | 0 | cur = *first; |
510 | 0 | while (TRUE) { |
511 | 0 | if (cur == NULL) { |
512 | 0 | prev->next = attr; |
513 | 0 | attr->next = NULL; |
514 | 0 | break; |
515 | 0 | } |
516 | 0 | cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); |
517 | 0 | if (cmp < 0) { |
518 | 0 | if (prev == NULL) { |
519 | 0 | *first = attr; |
520 | 0 | } else { |
521 | 0 | prev->next = attr; |
522 | 0 | } |
523 | 0 | attr->next = cur; |
524 | 0 | break; |
525 | 0 | } |
526 | 0 | if (cmp == 0) { |
527 | | /* duplicated variant */ |
528 | 0 | bAdded = FALSE; |
529 | 0 | break; |
530 | 0 | } |
531 | 0 | prev = cur; |
532 | 0 | cur = cur->next; |
533 | 0 | } |
534 | 0 | } |
535 | |
|
536 | 0 | return bAdded; |
537 | 0 | } |
538 | | |
539 | | |
540 | | static UBool |
541 | 0 | _addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { |
542 | 0 | UBool bAdded = TRUE; |
543 | |
|
544 | 0 | if (*first == NULL) { |
545 | 0 | ext->next = NULL; |
546 | 0 | *first = ext; |
547 | 0 | } else { |
548 | 0 | ExtensionListEntry *prev, *cur; |
549 | 0 | int32_t cmp; |
550 | | |
551 | | /* reorder variants in alphabetical order */ |
552 | 0 | prev = NULL; |
553 | 0 | cur = *first; |
554 | 0 | while (TRUE) { |
555 | 0 | if (cur == NULL) { |
556 | 0 | prev->next = ext; |
557 | 0 | ext->next = NULL; |
558 | 0 | break; |
559 | 0 | } |
560 | 0 | if (localeToBCP) { |
561 | | /* special handling for locale to bcp conversion */ |
562 | 0 | int32_t len, curlen; |
563 | |
|
564 | 0 | len = (int32_t)uprv_strlen(ext->key); |
565 | 0 | curlen = (int32_t)uprv_strlen(cur->key); |
566 | |
|
567 | 0 | if (len == 1 && curlen == 1) { |
568 | 0 | if (*(ext->key) == *(cur->key)) { |
569 | 0 | cmp = 0; |
570 | 0 | } else if (*(ext->key) == PRIVATEUSE) { |
571 | 0 | cmp = 1; |
572 | 0 | } else if (*(cur->key) == PRIVATEUSE) { |
573 | 0 | cmp = -1; |
574 | 0 | } else { |
575 | 0 | cmp = *(ext->key) - *(cur->key); |
576 | 0 | } |
577 | 0 | } else if (len == 1) { |
578 | 0 | cmp = *(ext->key) - LDMLEXT; |
579 | 0 | } else if (curlen == 1) { |
580 | 0 | cmp = LDMLEXT - *(cur->key); |
581 | 0 | } else { |
582 | 0 | cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); |
583 | | /* Both are u extension keys - we need special handling for 'attribute' */ |
584 | 0 | if (cmp != 0) { |
585 | 0 | if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) { |
586 | 0 | cmp = 1; |
587 | 0 | } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { |
588 | 0 | cmp = -1; |
589 | 0 | } |
590 | 0 | } |
591 | 0 | } |
592 | 0 | } else { |
593 | 0 | cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); |
594 | 0 | } |
595 | 0 | if (cmp < 0) { |
596 | 0 | if (prev == NULL) { |
597 | 0 | *first = ext; |
598 | 0 | } else { |
599 | 0 | prev->next = ext; |
600 | 0 | } |
601 | 0 | ext->next = cur; |
602 | 0 | break; |
603 | 0 | } |
604 | 0 | if (cmp == 0) { |
605 | | /* duplicated extension key */ |
606 | 0 | bAdded = FALSE; |
607 | 0 | break; |
608 | 0 | } |
609 | 0 | prev = cur; |
610 | 0 | cur = cur->next; |
611 | 0 | } |
612 | 0 | } |
613 | |
|
614 | 0 | return bAdded; |
615 | 0 | } |
616 | | |
617 | | static void |
618 | 0 | _initializeULanguageTag(ULanguageTag* langtag) { |
619 | 0 | int32_t i; |
620 | |
|
621 | 0 | langtag->buf = NULL; |
622 | |
|
623 | 0 | langtag->language = EMPTY; |
624 | 0 | for (i = 0; i < MAXEXTLANG; i++) { |
625 | 0 | langtag->extlang[i] = NULL; |
626 | 0 | } |
627 | |
|
628 | 0 | langtag->script = EMPTY; |
629 | 0 | langtag->region = EMPTY; |
630 | |
|
631 | 0 | langtag->variants = NULL; |
632 | 0 | langtag->extensions = NULL; |
633 | |
|
634 | 0 | langtag->grandfathered = EMPTY; |
635 | 0 | langtag->privateuse = EMPTY; |
636 | 0 | } |
637 | | |
638 | | static int32_t |
639 | 0 | _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { |
640 | 0 | char buf[ULOC_LANG_CAPACITY]; |
641 | 0 | UErrorCode tmpStatus = U_ZERO_ERROR; |
642 | 0 | int32_t len, i; |
643 | 0 | int32_t reslen = 0; |
644 | |
|
645 | 0 | if (U_FAILURE(*status)) { |
646 | 0 | return 0; |
647 | 0 | } |
648 | | |
649 | 0 | len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); |
650 | 0 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
651 | 0 | if (strict) { |
652 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
653 | 0 | return 0; |
654 | 0 | } |
655 | 0 | len = 0; |
656 | 0 | } |
657 | | |
658 | | /* Note: returned language code is in lower case letters */ |
659 | | |
660 | 0 | if (len == 0) { |
661 | 0 | if (reslen < capacity) { |
662 | 0 | uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); |
663 | 0 | } |
664 | 0 | reslen += LANG_UND_LEN; |
665 | 0 | } else if (!_isLanguageSubtag(buf, len)) { |
666 | | /* invalid language code */ |
667 | 0 | if (strict) { |
668 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
669 | 0 | return 0; |
670 | 0 | } |
671 | 0 | if (reslen < capacity) { |
672 | 0 | uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); |
673 | 0 | } |
674 | 0 | reslen += LANG_UND_LEN; |
675 | 0 | } else { |
676 | | /* resolve deprecated */ |
677 | 0 | for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { |
678 | 0 | if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { |
679 | 0 | uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); |
680 | 0 | len = (int32_t)uprv_strlen(buf); |
681 | 0 | break; |
682 | 0 | } |
683 | 0 | } |
684 | 0 | if (reslen < capacity) { |
685 | 0 | uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); |
686 | 0 | } |
687 | 0 | reslen += len; |
688 | 0 | } |
689 | 0 | u_terminateChars(appendAt, capacity, reslen, status); |
690 | 0 | return reslen; |
691 | 0 | } |
692 | | |
693 | | static int32_t |
694 | 0 | _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { |
695 | 0 | char buf[ULOC_SCRIPT_CAPACITY]; |
696 | 0 | UErrorCode tmpStatus = U_ZERO_ERROR; |
697 | 0 | int32_t len; |
698 | 0 | int32_t reslen = 0; |
699 | |
|
700 | 0 | if (U_FAILURE(*status)) { |
701 | 0 | return 0; |
702 | 0 | } |
703 | | |
704 | 0 | len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); |
705 | 0 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
706 | 0 | if (strict) { |
707 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
708 | 0 | } |
709 | 0 | return 0; |
710 | 0 | } |
711 | | |
712 | 0 | if (len > 0) { |
713 | 0 | if (!_isScriptSubtag(buf, len)) { |
714 | | /* invalid script code */ |
715 | 0 | if (strict) { |
716 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
717 | 0 | } |
718 | 0 | return 0; |
719 | 0 | } else { |
720 | 0 | if (reslen < capacity) { |
721 | 0 | *(appendAt + reslen) = SEP; |
722 | 0 | } |
723 | 0 | reslen++; |
724 | |
|
725 | 0 | if (reslen < capacity) { |
726 | 0 | uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); |
727 | 0 | } |
728 | 0 | reslen += len; |
729 | 0 | } |
730 | 0 | } |
731 | 0 | u_terminateChars(appendAt, capacity, reslen, status); |
732 | 0 | return reslen; |
733 | 0 | } |
734 | | |
735 | | static int32_t |
736 | 0 | _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { |
737 | 0 | char buf[ULOC_COUNTRY_CAPACITY]; |
738 | 0 | UErrorCode tmpStatus = U_ZERO_ERROR; |
739 | 0 | int32_t len; |
740 | 0 | int32_t reslen = 0; |
741 | |
|
742 | 0 | if (U_FAILURE(*status)) { |
743 | 0 | return 0; |
744 | 0 | } |
745 | | |
746 | 0 | len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); |
747 | 0 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
748 | 0 | if (strict) { |
749 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
750 | 0 | } |
751 | 0 | return 0; |
752 | 0 | } |
753 | | |
754 | 0 | if (len > 0) { |
755 | 0 | if (!_isRegionSubtag(buf, len)) { |
756 | | /* invalid region code */ |
757 | 0 | if (strict) { |
758 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
759 | 0 | } |
760 | 0 | return 0; |
761 | 0 | } else { |
762 | 0 | if (reslen < capacity) { |
763 | 0 | *(appendAt + reslen) = SEP; |
764 | 0 | } |
765 | 0 | reslen++; |
766 | |
|
767 | 0 | if (reslen < capacity) { |
768 | 0 | uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); |
769 | 0 | } |
770 | 0 | reslen += len; |
771 | 0 | } |
772 | 0 | } |
773 | 0 | u_terminateChars(appendAt, capacity, reslen, status); |
774 | 0 | return reslen; |
775 | 0 | } |
776 | | |
777 | | static int32_t |
778 | 0 | _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { |
779 | 0 | char buf[ULOC_FULLNAME_CAPACITY]; |
780 | 0 | UErrorCode tmpStatus = U_ZERO_ERROR; |
781 | 0 | int32_t len, i; |
782 | 0 | int32_t reslen = 0; |
783 | |
|
784 | 0 | if (U_FAILURE(*status)) { |
785 | 0 | return 0; |
786 | 0 | } |
787 | | |
788 | 0 | len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); |
789 | 0 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
790 | 0 | if (strict) { |
791 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
792 | 0 | } |
793 | 0 | return 0; |
794 | 0 | } |
795 | | |
796 | 0 | if (len > 0) { |
797 | 0 | char *p, *pVar; |
798 | 0 | UBool bNext = TRUE; |
799 | 0 | VariantListEntry *var; |
800 | 0 | VariantListEntry *varFirst = NULL; |
801 | |
|
802 | 0 | pVar = NULL; |
803 | 0 | p = buf; |
804 | 0 | while (bNext) { |
805 | 0 | if (*p == SEP || *p == LOCALE_SEP || *p == 0) { |
806 | 0 | if (*p == 0) { |
807 | 0 | bNext = FALSE; |
808 | 0 | } else { |
809 | 0 | *p = 0; /* terminate */ |
810 | 0 | } |
811 | 0 | if (pVar == NULL) { |
812 | 0 | if (strict) { |
813 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
814 | 0 | break; |
815 | 0 | } |
816 | | /* ignore empty variant */ |
817 | 0 | } else { |
818 | | /* ICU uses upper case letters for variants, but |
819 | | the canonical format is lowercase in BCP47 */ |
820 | 0 | for (i = 0; *(pVar + i) != 0; i++) { |
821 | 0 | *(pVar + i) = uprv_tolower(*(pVar + i)); |
822 | 0 | } |
823 | | |
824 | | /* validate */ |
825 | 0 | if (_isVariantSubtag(pVar, -1)) { |
826 | 0 | if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) { |
827 | | /* emit the variant to the list */ |
828 | 0 | var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); |
829 | 0 | if (var == NULL) { |
830 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
831 | 0 | break; |
832 | 0 | } |
833 | 0 | var->variant = pVar; |
834 | 0 | if (!_addVariantToList(&varFirst, var)) { |
835 | | /* duplicated variant */ |
836 | 0 | uprv_free(var); |
837 | 0 | if (strict) { |
838 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
839 | 0 | break; |
840 | 0 | } |
841 | 0 | } |
842 | 0 | } else { |
843 | | /* Special handling for POSIX variant, need to remember that we had it and then */ |
844 | | /* treat it like an extension later. */ |
845 | 0 | *hadPosix = TRUE; |
846 | 0 | } |
847 | 0 | } else if (strict) { |
848 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
849 | 0 | break; |
850 | 0 | } else if (_isPrivateuseValueSubtag(pVar, -1)) { |
851 | | /* Handle private use subtags separately */ |
852 | 0 | break; |
853 | 0 | } |
854 | 0 | } |
855 | | /* reset variant starting position */ |
856 | 0 | pVar = NULL; |
857 | 0 | } else if (pVar == NULL) { |
858 | 0 | pVar = p; |
859 | 0 | } |
860 | 0 | p++; |
861 | 0 | } |
862 | |
|
863 | 0 | if (U_SUCCESS(*status)) { |
864 | 0 | if (varFirst != NULL) { |
865 | 0 | int32_t varLen; |
866 | | |
867 | | /* write out validated/normalized variants to the target */ |
868 | 0 | var = varFirst; |
869 | 0 | while (var != NULL) { |
870 | 0 | if (reslen < capacity) { |
871 | 0 | *(appendAt + reslen) = SEP; |
872 | 0 | } |
873 | 0 | reslen++; |
874 | 0 | varLen = (int32_t)uprv_strlen(var->variant); |
875 | 0 | if (reslen < capacity) { |
876 | 0 | uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); |
877 | 0 | } |
878 | 0 | reslen += varLen; |
879 | 0 | var = var->next; |
880 | 0 | } |
881 | 0 | } |
882 | 0 | } |
883 | | |
884 | | /* clean up */ |
885 | 0 | var = varFirst; |
886 | 0 | while (var != NULL) { |
887 | 0 | VariantListEntry *tmpVar = var->next; |
888 | 0 | uprv_free(var); |
889 | 0 | var = tmpVar; |
890 | 0 | } |
891 | |
|
892 | 0 | if (U_FAILURE(*status)) { |
893 | 0 | return 0; |
894 | 0 | } |
895 | 0 | } |
896 | | |
897 | 0 | u_terminateChars(appendAt, capacity, reslen, status); |
898 | 0 | return reslen; |
899 | 0 | } |
900 | | |
901 | | static int32_t |
902 | 0 | _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { |
903 | 0 | char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
904 | 0 | char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; |
905 | 0 | int32_t attrBufLength = 0; |
906 | 0 | UEnumeration *keywordEnum = NULL; |
907 | 0 | int32_t reslen = 0; |
908 | |
|
909 | 0 | keywordEnum = uloc_openKeywords(localeID, status); |
910 | 0 | if (U_FAILURE(*status) && !hadPosix) { |
911 | 0 | uenum_close(keywordEnum); |
912 | 0 | return 0; |
913 | 0 | } |
914 | 0 | if (keywordEnum != NULL || hadPosix) { |
915 | | /* reorder extensions */ |
916 | 0 | int32_t len; |
917 | 0 | const char *key; |
918 | 0 | ExtensionListEntry *firstExt = NULL; |
919 | 0 | ExtensionListEntry *ext; |
920 | 0 | AttributeListEntry *firstAttr = NULL; |
921 | 0 | AttributeListEntry *attr; |
922 | 0 | char *attrValue; |
923 | 0 | char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
924 | 0 | char *pExtBuf = extBuf; |
925 | 0 | int32_t extBufCapacity = sizeof(extBuf); |
926 | 0 | const char *bcpKey=nullptr, *bcpValue=nullptr; |
927 | 0 | UErrorCode tmpStatus = U_ZERO_ERROR; |
928 | 0 | int32_t keylen; |
929 | 0 | UBool isBcpUExt; |
930 | |
|
931 | 0 | while (TRUE) { |
932 | 0 | key = uenum_next(keywordEnum, NULL, status); |
933 | 0 | if (key == NULL) { |
934 | 0 | break; |
935 | 0 | } |
936 | 0 | len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus); |
937 | | /* buf must be null-terminated */ |
938 | 0 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
939 | 0 | if (strict) { |
940 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
941 | 0 | break; |
942 | 0 | } |
943 | | /* ignore this keyword */ |
944 | 0 | tmpStatus = U_ZERO_ERROR; |
945 | 0 | continue; |
946 | 0 | } |
947 | | |
948 | 0 | keylen = (int32_t)uprv_strlen(key); |
949 | 0 | isBcpUExt = (keylen > 1); |
950 | | |
951 | | /* special keyword used for representing Unicode locale attributes */ |
952 | 0 | if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { |
953 | 0 | if (len > 0) { |
954 | 0 | int32_t i = 0; |
955 | 0 | while (TRUE) { |
956 | 0 | attrBufLength = 0; |
957 | 0 | for (; i < len; i++) { |
958 | 0 | if (buf[i] != '-') { |
959 | 0 | attrBuf[attrBufLength++] = buf[i]; |
960 | 0 | } else { |
961 | 0 | i++; |
962 | 0 | break; |
963 | 0 | } |
964 | 0 | } |
965 | 0 | if (attrBufLength > 0) { |
966 | 0 | attrBuf[attrBufLength] = 0; |
967 | |
|
968 | 0 | } else if (i >= len){ |
969 | 0 | break; |
970 | 0 | } |
971 | | |
972 | | /* create AttributeListEntry */ |
973 | 0 | attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); |
974 | 0 | if (attr == NULL) { |
975 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
976 | 0 | break; |
977 | 0 | } |
978 | 0 | attrValue = (char*)uprv_malloc(attrBufLength + 1); |
979 | 0 | if (attrValue == NULL) { |
980 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
981 | 0 | break; |
982 | 0 | } |
983 | 0 | uprv_strcpy(attrValue, attrBuf); |
984 | 0 | attr->attribute = attrValue; |
985 | |
|
986 | 0 | if (!_addAttributeToList(&firstAttr, attr)) { |
987 | 0 | uprv_free(attr); |
988 | 0 | uprv_free(attrValue); |
989 | 0 | if (strict) { |
990 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
991 | 0 | break; |
992 | 0 | } |
993 | 0 | } |
994 | 0 | } |
995 | | /* for a place holder ExtensionListEntry */ |
996 | 0 | bcpKey = LOCALE_ATTRIBUTE_KEY; |
997 | 0 | bcpValue = NULL; |
998 | 0 | } |
999 | 0 | } else if (isBcpUExt) { |
1000 | 0 | bcpKey = uloc_toUnicodeLocaleKey(key); |
1001 | 0 | if (bcpKey == NULL) { |
1002 | 0 | if (strict) { |
1003 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1004 | 0 | break; |
1005 | 0 | } |
1006 | 0 | continue; |
1007 | 0 | } |
1008 | | |
1009 | | /* we've checked buf is null-terminated above */ |
1010 | 0 | bcpValue = uloc_toUnicodeLocaleType(key, buf); |
1011 | 0 | if (bcpValue == NULL) { |
1012 | 0 | if (strict) { |
1013 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1014 | 0 | break; |
1015 | 0 | } |
1016 | 0 | continue; |
1017 | 0 | } |
1018 | 0 | if (bcpValue == buf) { |
1019 | | /* |
1020 | | When uloc_toUnicodeLocaleType(key, buf) returns the |
1021 | | input value as is, the value is well-formed, but has |
1022 | | no known mapping. This implementation normalizes the |
1023 | | the value to lower case |
1024 | | */ |
1025 | 0 | int32_t bcpValueLen = uprv_strlen(bcpValue); |
1026 | 0 | if (bcpValueLen < extBufCapacity) { |
1027 | 0 | uprv_strcpy(pExtBuf, bcpValue); |
1028 | 0 | T_CString_toLowerCase(pExtBuf); |
1029 | |
|
1030 | 0 | bcpValue = pExtBuf; |
1031 | |
|
1032 | 0 | pExtBuf += (bcpValueLen + 1); |
1033 | 0 | extBufCapacity -= (bcpValueLen + 1); |
1034 | 0 | } else { |
1035 | 0 | if (strict) { |
1036 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1037 | 0 | break; |
1038 | 0 | } |
1039 | 0 | continue; |
1040 | 0 | } |
1041 | 0 | } |
1042 | 0 | } else { |
1043 | 0 | if (*key == PRIVATEUSE) { |
1044 | 0 | if (!_isPrivateuseValueSubtags(buf, len)) { |
1045 | 0 | if (strict) { |
1046 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1047 | 0 | break; |
1048 | 0 | } |
1049 | 0 | continue; |
1050 | 0 | } |
1051 | 0 | } else { |
1052 | 0 | if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) { |
1053 | 0 | if (strict) { |
1054 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1055 | 0 | break; |
1056 | 0 | } |
1057 | 0 | continue; |
1058 | 0 | } |
1059 | 0 | } |
1060 | 0 | bcpKey = key; |
1061 | 0 | if ((len + 1) < extBufCapacity) { |
1062 | 0 | uprv_memcpy(pExtBuf, buf, len); |
1063 | 0 | bcpValue = pExtBuf; |
1064 | |
|
1065 | 0 | pExtBuf += len; |
1066 | |
|
1067 | 0 | *pExtBuf = 0; |
1068 | 0 | pExtBuf++; |
1069 | |
|
1070 | 0 | extBufCapacity -= (len + 1); |
1071 | 0 | } else { |
1072 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1073 | 0 | break; |
1074 | 0 | } |
1075 | 0 | } |
1076 | | |
1077 | | /* create ExtensionListEntry */ |
1078 | 0 | ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
1079 | 0 | if (ext == NULL) { |
1080 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1081 | 0 | break; |
1082 | 0 | } |
1083 | 0 | ext->key = bcpKey; |
1084 | 0 | ext->value = bcpValue; |
1085 | |
|
1086 | 0 | if (!_addExtensionToList(&firstExt, ext, TRUE)) { |
1087 | 0 | uprv_free(ext); |
1088 | 0 | if (strict) { |
1089 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1090 | 0 | break; |
1091 | 0 | } |
1092 | 0 | } |
1093 | 0 | } |
1094 | | |
1095 | | /* Special handling for POSIX variant - add the keywords for POSIX */ |
1096 | 0 | if (hadPosix) { |
1097 | | /* create ExtensionListEntry for POSIX */ |
1098 | 0 | ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
1099 | 0 | if (ext == NULL) { |
1100 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1101 | 0 | goto cleanup; |
1102 | 0 | } |
1103 | 0 | ext->key = POSIX_KEY; |
1104 | 0 | ext->value = POSIX_VALUE; |
1105 | |
|
1106 | 0 | if (!_addExtensionToList(&firstExt, ext, TRUE)) { |
1107 | 0 | uprv_free(ext); |
1108 | 0 | } |
1109 | 0 | } |
1110 | | |
1111 | 0 | if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { |
1112 | 0 | UBool startLDMLExtension = FALSE; |
1113 | 0 | for (ext = firstExt; ext; ext = ext->next) { |
1114 | 0 | if (!startLDMLExtension && uprv_strlen(ext->key) > 1) { |
1115 | | /* first LDML u singlton extension */ |
1116 | 0 | if (reslen < capacity) { |
1117 | 0 | *(appendAt + reslen) = SEP; |
1118 | 0 | } |
1119 | 0 | reslen++; |
1120 | 0 | if (reslen < capacity) { |
1121 | 0 | *(appendAt + reslen) = LDMLEXT; |
1122 | 0 | } |
1123 | 0 | reslen++; |
1124 | |
|
1125 | 0 | startLDMLExtension = TRUE; |
1126 | 0 | } |
1127 | | |
1128 | | /* write out the sorted BCP47 attributes, extensions and private use */ |
1129 | 0 | if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { |
1130 | | /* write the value for the attributes */ |
1131 | 0 | for (attr = firstAttr; attr; attr = attr->next) { |
1132 | 0 | if (reslen < capacity) { |
1133 | 0 | *(appendAt + reslen) = SEP; |
1134 | 0 | } |
1135 | 0 | reslen++; |
1136 | 0 | len = (int32_t)uprv_strlen(attr->attribute); |
1137 | 0 | if (reslen < capacity) { |
1138 | 0 | uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); |
1139 | 0 | } |
1140 | 0 | reslen += len; |
1141 | 0 | } |
1142 | 0 | } else { |
1143 | 0 | if (reslen < capacity) { |
1144 | 0 | *(appendAt + reslen) = SEP; |
1145 | 0 | } |
1146 | 0 | reslen++; |
1147 | 0 | len = (int32_t)uprv_strlen(ext->key); |
1148 | 0 | if (reslen < capacity) { |
1149 | 0 | uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); |
1150 | 0 | } |
1151 | 0 | reslen += len; |
1152 | 0 | if (reslen < capacity) { |
1153 | 0 | *(appendAt + reslen) = SEP; |
1154 | 0 | } |
1155 | 0 | reslen++; |
1156 | 0 | len = (int32_t)uprv_strlen(ext->value); |
1157 | 0 | if (reslen < capacity) { |
1158 | 0 | uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); |
1159 | 0 | } |
1160 | 0 | reslen += len; |
1161 | 0 | } |
1162 | 0 | } |
1163 | 0 | } |
1164 | 0 | cleanup: |
1165 | | /* clean up */ |
1166 | 0 | ext = firstExt; |
1167 | 0 | while (ext != NULL) { |
1168 | 0 | ExtensionListEntry *tmpExt = ext->next; |
1169 | 0 | uprv_free(ext); |
1170 | 0 | ext = tmpExt; |
1171 | 0 | } |
1172 | |
|
1173 | 0 | attr = firstAttr; |
1174 | 0 | while (attr != NULL) { |
1175 | 0 | AttributeListEntry *tmpAttr = attr->next; |
1176 | 0 | char *pValue = (char *)attr->attribute; |
1177 | 0 | uprv_free(pValue); |
1178 | 0 | uprv_free(attr); |
1179 | 0 | attr = tmpAttr; |
1180 | 0 | } |
1181 | |
|
1182 | 0 | uenum_close(keywordEnum); |
1183 | |
|
1184 | 0 | if (U_FAILURE(*status)) { |
1185 | 0 | return 0; |
1186 | 0 | } |
1187 | 0 | } |
1188 | | |
1189 | 0 | return u_terminateChars(appendAt, capacity, reslen, status); |
1190 | 0 | } |
1191 | | |
1192 | | /** |
1193 | | * Append keywords parsed from LDML extension value |
1194 | | * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} |
1195 | | * Note: char* buf is used for storing keywords |
1196 | | */ |
1197 | | static void |
1198 | 0 | _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { |
1199 | 0 | const char *pTag; /* beginning of current subtag */ |
1200 | 0 | const char *pKwds; /* beginning of key-type pairs */ |
1201 | 0 | UBool variantExists = *posixVariant; |
1202 | |
|
1203 | 0 | ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ |
1204 | 0 | ExtensionListEntry *kwd, *nextKwd; |
1205 | |
|
1206 | 0 | AttributeListEntry *attrFirst = NULL; /* first attribute */ |
1207 | 0 | AttributeListEntry *attr, *nextAttr; |
1208 | |
|
1209 | 0 | int32_t len; |
1210 | 0 | int32_t bufIdx = 0; |
1211 | |
|
1212 | 0 | char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
1213 | 0 | int32_t attrBufIdx = 0; |
1214 | | |
1215 | | /* Reset the posixVariant value */ |
1216 | 0 | *posixVariant = FALSE; |
1217 | |
|
1218 | 0 | pTag = ldmlext; |
1219 | 0 | pKwds = NULL; |
1220 | | |
1221 | | /* Iterate through u extension attributes */ |
1222 | 0 | while (*pTag) { |
1223 | | /* locate next separator char */ |
1224 | 0 | for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); |
1225 | |
|
1226 | 0 | if (ultag_isUnicodeLocaleKey(pTag, len)) { |
1227 | 0 | pKwds = pTag; |
1228 | 0 | break; |
1229 | 0 | } |
1230 | | |
1231 | | /* add this attribute to the list */ |
1232 | 0 | attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); |
1233 | 0 | if (attr == NULL) { |
1234 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1235 | 0 | goto cleanup; |
1236 | 0 | } |
1237 | | |
1238 | 0 | if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { |
1239 | 0 | uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); |
1240 | 0 | attrBuf[attrBufIdx + len] = 0; |
1241 | 0 | attr->attribute = &attrBuf[attrBufIdx]; |
1242 | 0 | attrBufIdx += (len + 1); |
1243 | 0 | } else { |
1244 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1245 | 0 | goto cleanup; |
1246 | 0 | } |
1247 | | |
1248 | 0 | if (!_addAttributeToList(&attrFirst, attr)) { |
1249 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1250 | 0 | uprv_free(attr); |
1251 | 0 | goto cleanup; |
1252 | 0 | } |
1253 | | |
1254 | | /* next tag */ |
1255 | 0 | pTag += len; |
1256 | 0 | if (*pTag) { |
1257 | | /* next to the separator */ |
1258 | 0 | pTag++; |
1259 | 0 | } |
1260 | 0 | } |
1261 | | |
1262 | 0 | if (attrFirst) { |
1263 | | /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ |
1264 | |
|
1265 | 0 | if (attrBufIdx > bufSize) { |
1266 | | /* attrBufIdx == <total length of attribute subtag> + 1 */ |
1267 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1268 | 0 | goto cleanup; |
1269 | 0 | } |
1270 | | |
1271 | 0 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
1272 | 0 | if (kwd == NULL) { |
1273 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1274 | 0 | goto cleanup; |
1275 | 0 | } |
1276 | | |
1277 | 0 | kwd->key = LOCALE_ATTRIBUTE_KEY; |
1278 | 0 | kwd->value = buf; |
1279 | | |
1280 | | /* attribute subtags sorted in alphabetical order as type */ |
1281 | 0 | attr = attrFirst; |
1282 | 0 | while (attr != NULL) { |
1283 | 0 | nextAttr = attr->next; |
1284 | | |
1285 | | /* buffer size check is done above */ |
1286 | 0 | if (attr != attrFirst) { |
1287 | 0 | *(buf + bufIdx) = SEP; |
1288 | 0 | bufIdx++; |
1289 | 0 | } |
1290 | |
|
1291 | 0 | len = uprv_strlen(attr->attribute); |
1292 | 0 | uprv_memcpy(buf + bufIdx, attr->attribute, len); |
1293 | 0 | bufIdx += len; |
1294 | |
|
1295 | 0 | attr = nextAttr; |
1296 | 0 | } |
1297 | 0 | *(buf + bufIdx) = 0; |
1298 | 0 | bufIdx++; |
1299 | |
|
1300 | 0 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
1301 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1302 | 0 | uprv_free(kwd); |
1303 | 0 | goto cleanup; |
1304 | 0 | } |
1305 | | |
1306 | | /* once keyword entry is created, delete the attribute list */ |
1307 | 0 | attr = attrFirst; |
1308 | 0 | while (attr != NULL) { |
1309 | 0 | nextAttr = attr->next; |
1310 | 0 | uprv_free(attr); |
1311 | 0 | attr = nextAttr; |
1312 | 0 | } |
1313 | 0 | attrFirst = NULL; |
1314 | 0 | } |
1315 | | |
1316 | 0 | if (pKwds) { |
1317 | 0 | const char *pBcpKey = NULL; /* u extenstion key subtag */ |
1318 | 0 | const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ |
1319 | 0 | int32_t bcpKeyLen = 0; |
1320 | 0 | int32_t bcpTypeLen = 0; |
1321 | 0 | UBool isDone = FALSE; |
1322 | |
|
1323 | 0 | pTag = pKwds; |
1324 | | /* BCP47 representation of LDML key/type pairs */ |
1325 | 0 | while (!isDone) { |
1326 | 0 | const char *pNextBcpKey = NULL; |
1327 | 0 | int32_t nextBcpKeyLen = 0; |
1328 | 0 | UBool emitKeyword = FALSE; |
1329 | |
|
1330 | 0 | if (*pTag) { |
1331 | | /* locate next separator char */ |
1332 | 0 | for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); |
1333 | |
|
1334 | 0 | if (ultag_isUnicodeLocaleKey(pTag, len)) { |
1335 | 0 | if (pBcpKey) { |
1336 | 0 | emitKeyword = TRUE; |
1337 | 0 | pNextBcpKey = pTag; |
1338 | 0 | nextBcpKeyLen = len; |
1339 | 0 | } else { |
1340 | 0 | pBcpKey = pTag; |
1341 | 0 | bcpKeyLen = len; |
1342 | 0 | } |
1343 | 0 | } else { |
1344 | 0 | U_ASSERT(pBcpKey != NULL); |
1345 | | /* within LDML type subtags */ |
1346 | 0 | if (pBcpType) { |
1347 | 0 | bcpTypeLen += (len + 1); |
1348 | 0 | } else { |
1349 | 0 | pBcpType = pTag; |
1350 | 0 | bcpTypeLen = len; |
1351 | 0 | } |
1352 | 0 | } |
1353 | | |
1354 | | /* next tag */ |
1355 | 0 | pTag += len; |
1356 | 0 | if (*pTag) { |
1357 | | /* next to the separator */ |
1358 | 0 | pTag++; |
1359 | 0 | } |
1360 | 0 | } else { |
1361 | | /* processing last one */ |
1362 | 0 | emitKeyword = TRUE; |
1363 | 0 | isDone = TRUE; |
1364 | 0 | } |
1365 | |
|
1366 | 0 | if (emitKeyword) { |
1367 | 0 | const char *pKey = NULL; /* LDML key */ |
1368 | 0 | const char *pType = NULL; /* LDML type */ |
1369 | |
|
1370 | 0 | char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ |
1371 | |
|
1372 | 0 | U_ASSERT(pBcpKey != NULL); |
1373 | |
|
1374 | 0 | if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) { |
1375 | | /* the BCP key is invalid */ |
1376 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1377 | 0 | goto cleanup; |
1378 | 0 | } |
1379 | | |
1380 | 0 | uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); |
1381 | 0 | bcpKeyBuf[bcpKeyLen] = 0; |
1382 | | |
1383 | | /* u extension key to LDML key */ |
1384 | 0 | pKey = uloc_toLegacyKey(bcpKeyBuf); |
1385 | 0 | if (pKey == NULL) { |
1386 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1387 | 0 | goto cleanup; |
1388 | 0 | } |
1389 | 0 | if (pKey == bcpKeyBuf) { |
1390 | | /* |
1391 | | The key returned by toLegacyKey points to the input buffer. |
1392 | | We normalize the result key to lower case. |
1393 | | */ |
1394 | 0 | T_CString_toLowerCase(bcpKeyBuf); |
1395 | 0 | if (bufSize - bufIdx - 1 >= bcpKeyLen) { |
1396 | 0 | uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); |
1397 | 0 | pKey = buf + bufIdx; |
1398 | 0 | bufIdx += bcpKeyLen; |
1399 | 0 | *(buf + bufIdx) = 0; |
1400 | 0 | bufIdx++; |
1401 | 0 | } else { |
1402 | 0 | *status = U_BUFFER_OVERFLOW_ERROR; |
1403 | 0 | goto cleanup; |
1404 | 0 | } |
1405 | 0 | } |
1406 | | |
1407 | 0 | if (pBcpType) { |
1408 | 0 | char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ |
1409 | 0 | if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) { |
1410 | | /* the BCP type is too long */ |
1411 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1412 | 0 | goto cleanup; |
1413 | 0 | } |
1414 | | |
1415 | 0 | uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); |
1416 | 0 | bcpTypeBuf[bcpTypeLen] = 0; |
1417 | | |
1418 | | /* BCP type to locale type */ |
1419 | 0 | pType = uloc_toLegacyType(pKey, bcpTypeBuf); |
1420 | 0 | if (pType == NULL) { |
1421 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1422 | 0 | goto cleanup; |
1423 | 0 | } |
1424 | 0 | if (pType == bcpTypeBuf) { |
1425 | | /* |
1426 | | The type returned by toLegacyType points to the input buffer. |
1427 | | We normalize the result type to lower case. |
1428 | | */ |
1429 | | /* normalize to lower case */ |
1430 | 0 | T_CString_toLowerCase(bcpTypeBuf); |
1431 | 0 | if (bufSize - bufIdx - 1 >= bcpTypeLen) { |
1432 | 0 | uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); |
1433 | 0 | pType = buf + bufIdx; |
1434 | 0 | bufIdx += bcpTypeLen; |
1435 | 0 | *(buf + bufIdx) = 0; |
1436 | 0 | bufIdx++; |
1437 | 0 | } else { |
1438 | 0 | *status = U_BUFFER_OVERFLOW_ERROR; |
1439 | 0 | goto cleanup; |
1440 | 0 | } |
1441 | 0 | } |
1442 | 0 | } else { |
1443 | | /* typeless - default type value is "yes" */ |
1444 | 0 | pType = LOCALE_TYPE_YES; |
1445 | 0 | } |
1446 | | |
1447 | | /* Special handling for u-va-posix, since we want to treat this as a variant, |
1448 | | not as a keyword */ |
1449 | 0 | if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { |
1450 | 0 | *posixVariant = TRUE; |
1451 | 0 | } else { |
1452 | | /* create an ExtensionListEntry for this keyword */ |
1453 | 0 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
1454 | 0 | if (kwd == NULL) { |
1455 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1456 | 0 | goto cleanup; |
1457 | 0 | } |
1458 | | |
1459 | 0 | kwd->key = pKey; |
1460 | 0 | kwd->value = pType; |
1461 | |
|
1462 | 0 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
1463 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1464 | 0 | uprv_free(kwd); |
1465 | 0 | goto cleanup; |
1466 | 0 | } |
1467 | 0 | } |
1468 | | |
1469 | 0 | pBcpKey = pNextBcpKey; |
1470 | 0 | bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; |
1471 | 0 | pBcpType = NULL; |
1472 | 0 | bcpTypeLen = 0; |
1473 | 0 | } |
1474 | 0 | } |
1475 | 0 | } |
1476 | | |
1477 | 0 | kwd = kwdFirst; |
1478 | 0 | while (kwd != NULL) { |
1479 | 0 | nextKwd = kwd->next; |
1480 | 0 | _addExtensionToList(appendTo, kwd, FALSE); |
1481 | 0 | kwd = nextKwd; |
1482 | 0 | } |
1483 | |
|
1484 | 0 | return; |
1485 | | |
1486 | 0 | cleanup: |
1487 | 0 | attr = attrFirst; |
1488 | 0 | while (attr != NULL) { |
1489 | 0 | nextAttr = attr->next; |
1490 | 0 | uprv_free(attr); |
1491 | 0 | attr = nextAttr; |
1492 | 0 | } |
1493 | |
|
1494 | 0 | kwd = kwdFirst; |
1495 | 0 | while (kwd != NULL) { |
1496 | 0 | nextKwd = kwd->next; |
1497 | 0 | uprv_free(kwd); |
1498 | 0 | kwd = nextKwd; |
1499 | 0 | } |
1500 | 0 | } |
1501 | | |
1502 | | |
1503 | | static int32_t |
1504 | 0 | _appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { |
1505 | 0 | int32_t reslen = 0; |
1506 | 0 | int32_t i, n; |
1507 | 0 | int32_t len; |
1508 | 0 | ExtensionListEntry *kwdFirst = NULL; |
1509 | 0 | ExtensionListEntry *kwd; |
1510 | 0 | const char *key, *type; |
1511 | 0 | char *kwdBuf = NULL; |
1512 | 0 | int32_t kwdBufLength = capacity; |
1513 | 0 | UBool posixVariant = FALSE; |
1514 | |
|
1515 | 0 | if (U_FAILURE(*status)) { |
1516 | 0 | return 0; |
1517 | 0 | } |
1518 | | |
1519 | 0 | kwdBuf = (char*)uprv_malloc(kwdBufLength); |
1520 | 0 | if (kwdBuf == NULL) { |
1521 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1522 | 0 | return 0; |
1523 | 0 | } |
1524 | | |
1525 | | /* Determine if variants already exists */ |
1526 | 0 | if (ultag_getVariantsSize(langtag)) { |
1527 | 0 | posixVariant = TRUE; |
1528 | 0 | } |
1529 | |
|
1530 | 0 | n = ultag_getExtensionsSize(langtag); |
1531 | | |
1532 | | /* resolve locale keywords and reordering keys */ |
1533 | 0 | for (i = 0; i < n; i++) { |
1534 | 0 | key = ultag_getExtensionKey(langtag, i); |
1535 | 0 | type = ultag_getExtensionValue(langtag, i); |
1536 | 0 | if (*key == LDMLEXT) { |
1537 | 0 | _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); |
1538 | 0 | if (U_FAILURE(*status)) { |
1539 | 0 | break; |
1540 | 0 | } |
1541 | 0 | } else { |
1542 | 0 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
1543 | 0 | if (kwd == NULL) { |
1544 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1545 | 0 | break; |
1546 | 0 | } |
1547 | 0 | kwd->key = key; |
1548 | 0 | kwd->value = type; |
1549 | 0 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
1550 | 0 | uprv_free(kwd); |
1551 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1552 | 0 | break; |
1553 | 0 | } |
1554 | 0 | } |
1555 | 0 | } |
1556 | |
|
1557 | 0 | if (U_SUCCESS(*status)) { |
1558 | 0 | type = ultag_getPrivateUse(langtag); |
1559 | 0 | if ((int32_t)uprv_strlen(type) > 0) { |
1560 | | /* add private use as a keyword */ |
1561 | 0 | kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
1562 | 0 | if (kwd == NULL) { |
1563 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1564 | 0 | } else { |
1565 | 0 | kwd->key = PRIVATEUSE_KEY; |
1566 | 0 | kwd->value = type; |
1567 | 0 | if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { |
1568 | 0 | uprv_free(kwd); |
1569 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1570 | 0 | } |
1571 | 0 | } |
1572 | 0 | } |
1573 | 0 | } |
1574 | | |
1575 | | /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ |
1576 | |
|
1577 | 0 | if (U_SUCCESS(*status) && posixVariant) { |
1578 | 0 | len = (int32_t) uprv_strlen(_POSIX); |
1579 | 0 | if (reslen < capacity) { |
1580 | 0 | uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); |
1581 | 0 | } |
1582 | 0 | reslen += len; |
1583 | 0 | } |
1584 | |
|
1585 | 0 | if (U_SUCCESS(*status) && kwdFirst != NULL) { |
1586 | | /* write out the sorted keywords */ |
1587 | 0 | UBool firstValue = TRUE; |
1588 | 0 | kwd = kwdFirst; |
1589 | 0 | do { |
1590 | 0 | if (reslen < capacity) { |
1591 | 0 | if (firstValue) { |
1592 | | /* '@' */ |
1593 | 0 | *(appendAt + reslen) = LOCALE_EXT_SEP; |
1594 | 0 | firstValue = FALSE; |
1595 | 0 | } else { |
1596 | | /* ';' */ |
1597 | 0 | *(appendAt + reslen) = LOCALE_KEYWORD_SEP; |
1598 | 0 | } |
1599 | 0 | } |
1600 | 0 | reslen++; |
1601 | | |
1602 | | /* key */ |
1603 | 0 | len = (int32_t)uprv_strlen(kwd->key); |
1604 | 0 | if (reslen < capacity) { |
1605 | 0 | uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); |
1606 | 0 | } |
1607 | 0 | reslen += len; |
1608 | | |
1609 | | /* '=' */ |
1610 | 0 | if (reslen < capacity) { |
1611 | 0 | *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; |
1612 | 0 | } |
1613 | 0 | reslen++; |
1614 | | |
1615 | | /* type */ |
1616 | 0 | len = (int32_t)uprv_strlen(kwd->value); |
1617 | 0 | if (reslen < capacity) { |
1618 | 0 | uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); |
1619 | 0 | } |
1620 | 0 | reslen += len; |
1621 | |
|
1622 | 0 | kwd = kwd->next; |
1623 | 0 | } while (kwd); |
1624 | 0 | } |
1625 | | |
1626 | | /* clean up */ |
1627 | 0 | kwd = kwdFirst; |
1628 | 0 | while (kwd != NULL) { |
1629 | 0 | ExtensionListEntry *tmpKwd = kwd->next; |
1630 | 0 | uprv_free(kwd); |
1631 | 0 | kwd = tmpKwd; |
1632 | 0 | } |
1633 | |
|
1634 | 0 | uprv_free(kwdBuf); |
1635 | |
|
1636 | 0 | if (U_FAILURE(*status)) { |
1637 | 0 | return 0; |
1638 | 0 | } |
1639 | | |
1640 | 0 | return u_terminateChars(appendAt, capacity, reslen, status); |
1641 | 0 | } |
1642 | | |
1643 | | static int32_t |
1644 | 0 | _appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { |
1645 | 0 | (void)hadPosix; |
1646 | 0 | char buf[ULOC_FULLNAME_CAPACITY]; |
1647 | 0 | char tmpAppend[ULOC_FULLNAME_CAPACITY]; |
1648 | 0 | UErrorCode tmpStatus = U_ZERO_ERROR; |
1649 | 0 | int32_t len, i; |
1650 | 0 | int32_t reslen = 0; |
1651 | |
|
1652 | 0 | if (U_FAILURE(*status)) { |
1653 | 0 | return 0; |
1654 | 0 | } |
1655 | | |
1656 | 0 | len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); |
1657 | 0 | if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { |
1658 | 0 | if (strict) { |
1659 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1660 | 0 | } |
1661 | 0 | return 0; |
1662 | 0 | } |
1663 | | |
1664 | 0 | if (len > 0) { |
1665 | 0 | char *p, *pPriv; |
1666 | 0 | UBool bNext = TRUE; |
1667 | 0 | UBool firstValue = TRUE; |
1668 | 0 | UBool writeValue; |
1669 | |
|
1670 | 0 | pPriv = NULL; |
1671 | 0 | p = buf; |
1672 | 0 | while (bNext) { |
1673 | 0 | writeValue = FALSE; |
1674 | 0 | if (*p == SEP || *p == LOCALE_SEP || *p == 0) { |
1675 | 0 | if (*p == 0) { |
1676 | 0 | bNext = FALSE; |
1677 | 0 | } else { |
1678 | 0 | *p = 0; /* terminate */ |
1679 | 0 | } |
1680 | 0 | if (pPriv != NULL) { |
1681 | | /* Private use in the canonical format is lowercase in BCP47 */ |
1682 | 0 | for (i = 0; *(pPriv + i) != 0; i++) { |
1683 | 0 | *(pPriv + i) = uprv_tolower(*(pPriv + i)); |
1684 | 0 | } |
1685 | | |
1686 | | /* validate */ |
1687 | 0 | if (_isPrivateuseValueSubtag(pPriv, -1)) { |
1688 | 0 | if (firstValue) { |
1689 | 0 | if (!_isVariantSubtag(pPriv, -1)) { |
1690 | 0 | writeValue = TRUE; |
1691 | 0 | } |
1692 | 0 | } else { |
1693 | 0 | writeValue = TRUE; |
1694 | 0 | } |
1695 | 0 | } else if (strict) { |
1696 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
1697 | 0 | break; |
1698 | 0 | } else { |
1699 | 0 | break; |
1700 | 0 | } |
1701 | | |
1702 | 0 | if (writeValue) { |
1703 | 0 | if (reslen < capacity) { |
1704 | 0 | tmpAppend[reslen++] = SEP; |
1705 | 0 | } |
1706 | |
|
1707 | 0 | if (firstValue) { |
1708 | 0 | if (reslen < capacity) { |
1709 | 0 | tmpAppend[reslen++] = *PRIVATEUSE_KEY; |
1710 | 0 | } |
1711 | |
|
1712 | 0 | if (reslen < capacity) { |
1713 | 0 | tmpAppend[reslen++] = SEP; |
1714 | 0 | } |
1715 | |
|
1716 | 0 | len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); |
1717 | 0 | if (reslen < capacity) { |
1718 | 0 | uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); |
1719 | 0 | } |
1720 | 0 | reslen += len; |
1721 | |
|
1722 | 0 | if (reslen < capacity) { |
1723 | 0 | tmpAppend[reslen++] = SEP; |
1724 | 0 | } |
1725 | |
|
1726 | 0 | firstValue = FALSE; |
1727 | 0 | } |
1728 | |
|
1729 | 0 | len = (int32_t)uprv_strlen(pPriv); |
1730 | 0 | if (reslen < capacity) { |
1731 | 0 | uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); |
1732 | 0 | } |
1733 | 0 | reslen += len; |
1734 | 0 | } |
1735 | 0 | } |
1736 | | /* reset private use starting position */ |
1737 | 0 | pPriv = NULL; |
1738 | 0 | } else if (pPriv == NULL) { |
1739 | 0 | pPriv = p; |
1740 | 0 | } |
1741 | 0 | p++; |
1742 | 0 | } |
1743 | |
|
1744 | 0 | if (U_FAILURE(*status)) { |
1745 | 0 | return 0; |
1746 | 0 | } |
1747 | 0 | } |
1748 | | |
1749 | 0 | if (U_SUCCESS(*status)) { |
1750 | 0 | len = reslen; |
1751 | 0 | if (reslen < capacity) { |
1752 | 0 | uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); |
1753 | 0 | } |
1754 | 0 | } |
1755 | |
|
1756 | 0 | u_terminateChars(appendAt, capacity, reslen, status); |
1757 | |
|
1758 | 0 | return reslen; |
1759 | 0 | } |
1760 | | |
1761 | | /* |
1762 | | * ------------------------------------------------- |
1763 | | * |
1764 | | * ultag_ functions |
1765 | | * |
1766 | | * ------------------------------------------------- |
1767 | | */ |
1768 | | |
1769 | | /* Bit flags used by the parser */ |
1770 | 0 | #define LANG 0x0001 |
1771 | 0 | #define EXTL 0x0002 |
1772 | 0 | #define SCRT 0x0004 |
1773 | 0 | #define REGN 0x0008 |
1774 | 0 | #define VART 0x0010 |
1775 | 0 | #define EXTS 0x0020 |
1776 | 0 | #define EXTV 0x0040 |
1777 | 0 | #define PRIV 0x0080 |
1778 | | |
1779 | | /** |
1780 | | * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing |
1781 | | * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ ) |
1782 | | * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above. |
1783 | | */ |
1784 | | #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) |
1785 | | #pragma optimize( "", off ) |
1786 | | #endif |
1787 | | |
1788 | | static ULanguageTag* |
1789 | 0 | ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { |
1790 | 0 | ULanguageTag *t; |
1791 | 0 | char *tagBuf; |
1792 | 0 | int16_t next; |
1793 | 0 | char *pSubtag, *pNext, *pLastGoodPosition; |
1794 | 0 | int32_t subtagLen; |
1795 | 0 | int32_t extlangIdx; |
1796 | 0 | ExtensionListEntry *pExtension; |
1797 | 0 | char *pExtValueSubtag, *pExtValueSubtagEnd; |
1798 | 0 | int32_t i; |
1799 | 0 | UBool privateuseVar = FALSE; |
1800 | 0 | int32_t grandfatheredLen = 0; |
1801 | |
|
1802 | 0 | if (parsedLen != NULL) { |
1803 | 0 | *parsedLen = 0; |
1804 | 0 | } |
1805 | |
|
1806 | 0 | if (U_FAILURE(*status)) { |
1807 | 0 | return NULL; |
1808 | 0 | } |
1809 | | |
1810 | 0 | if (tagLen < 0) { |
1811 | 0 | tagLen = (int32_t)uprv_strlen(tag); |
1812 | 0 | } |
1813 | | |
1814 | | /* copy the entire string */ |
1815 | 0 | tagBuf = (char*)uprv_malloc(tagLen + 1); |
1816 | 0 | if (tagBuf == NULL) { |
1817 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1818 | 0 | return NULL; |
1819 | 0 | } |
1820 | 0 | uprv_memcpy(tagBuf, tag, tagLen); |
1821 | 0 | *(tagBuf + tagLen) = 0; |
1822 | | |
1823 | | /* create a ULanguageTag */ |
1824 | 0 | t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); |
1825 | 0 | if (t == NULL) { |
1826 | 0 | uprv_free(tagBuf); |
1827 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1828 | 0 | return NULL; |
1829 | 0 | } |
1830 | 0 | _initializeULanguageTag(t); |
1831 | 0 | t->buf = tagBuf; |
1832 | |
|
1833 | 0 | if (tagLen < MINLEN) { |
1834 | | /* the input tag is too short - return empty ULanguageTag */ |
1835 | 0 | return t; |
1836 | 0 | } |
1837 | | |
1838 | | /* check if the tag is grandfathered */ |
1839 | 0 | for (i = 0; GRANDFATHERED[i] != NULL; i += 2) { |
1840 | 0 | if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { |
1841 | 0 | int32_t newTagLength; |
1842 | |
|
1843 | 0 | grandfatheredLen = tagLen; /* back up for output parsedLen */ |
1844 | 0 | newTagLength = uprv_strlen(GRANDFATHERED[i+1]); |
1845 | 0 | if (tagLen < newTagLength) { |
1846 | 0 | uprv_free(tagBuf); |
1847 | 0 | tagBuf = (char*)uprv_malloc(newTagLength + 1); |
1848 | 0 | if (tagBuf == NULL) { |
1849 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1850 | 0 | ultag_close(t); |
1851 | 0 | return NULL; |
1852 | 0 | } |
1853 | 0 | t->buf = tagBuf; |
1854 | 0 | tagLen = newTagLength; |
1855 | 0 | } |
1856 | 0 | uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); |
1857 | 0 | break; |
1858 | 0 | } |
1859 | 0 | } |
1860 | | |
1861 | | /* |
1862 | | * langtag = language |
1863 | | * ["-" script] |
1864 | | * ["-" region] |
1865 | | * *("-" variant) |
1866 | | * *("-" extension) |
1867 | | * ["-" privateuse] |
1868 | | */ |
1869 | | |
1870 | 0 | next = LANG | PRIV; |
1871 | 0 | pNext = pLastGoodPosition = tagBuf; |
1872 | 0 | extlangIdx = 0; |
1873 | 0 | pExtension = NULL; |
1874 | 0 | pExtValueSubtag = NULL; |
1875 | 0 | pExtValueSubtagEnd = NULL; |
1876 | |
|
1877 | 0 | while (pNext) { |
1878 | 0 | char *pSep; |
1879 | |
|
1880 | 0 | pSubtag = pNext; |
1881 | | |
1882 | | /* locate next separator char */ |
1883 | 0 | pSep = pSubtag; |
1884 | 0 | while (*pSep) { |
1885 | 0 | if (*pSep == SEP) { |
1886 | 0 | break; |
1887 | 0 | } |
1888 | 0 | pSep++; |
1889 | 0 | } |
1890 | 0 | if (*pSep == 0) { |
1891 | | /* last subtag */ |
1892 | 0 | pNext = NULL; |
1893 | 0 | } else { |
1894 | 0 | pNext = pSep + 1; |
1895 | 0 | } |
1896 | 0 | subtagLen = (int32_t)(pSep - pSubtag); |
1897 | |
|
1898 | 0 | if (next & LANG) { |
1899 | 0 | if (_isLanguageSubtag(pSubtag, subtagLen)) { |
1900 | 0 | *pSep = 0; /* terminate */ |
1901 | 0 | t->language = T_CString_toLowerCase(pSubtag); |
1902 | |
|
1903 | 0 | pLastGoodPosition = pSep; |
1904 | 0 | next = EXTL | SCRT | REGN | VART | EXTS | PRIV; |
1905 | 0 | continue; |
1906 | 0 | } |
1907 | 0 | } |
1908 | 0 | if (next & EXTL) { |
1909 | 0 | if (_isExtlangSubtag(pSubtag, subtagLen)) { |
1910 | 0 | *pSep = 0; |
1911 | 0 | t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); |
1912 | |
|
1913 | 0 | pLastGoodPosition = pSep; |
1914 | 0 | if (extlangIdx < 3) { |
1915 | 0 | next = EXTL | SCRT | REGN | VART | EXTS | PRIV; |
1916 | 0 | } else { |
1917 | 0 | next = SCRT | REGN | VART | EXTS | PRIV; |
1918 | 0 | } |
1919 | 0 | continue; |
1920 | 0 | } |
1921 | 0 | } |
1922 | 0 | if (next & SCRT) { |
1923 | 0 | if (_isScriptSubtag(pSubtag, subtagLen)) { |
1924 | 0 | char *p = pSubtag; |
1925 | |
|
1926 | 0 | *pSep = 0; |
1927 | | |
1928 | | /* to title case */ |
1929 | 0 | *p = uprv_toupper(*p); |
1930 | 0 | p++; |
1931 | 0 | for (; *p; p++) { |
1932 | 0 | *p = uprv_tolower(*p); |
1933 | 0 | } |
1934 | |
|
1935 | 0 | t->script = pSubtag; |
1936 | |
|
1937 | 0 | pLastGoodPosition = pSep; |
1938 | 0 | next = REGN | VART | EXTS | PRIV; |
1939 | 0 | continue; |
1940 | 0 | } |
1941 | 0 | } |
1942 | 0 | if (next & REGN) { |
1943 | 0 | if (_isRegionSubtag(pSubtag, subtagLen)) { |
1944 | 0 | *pSep = 0; |
1945 | 0 | t->region = T_CString_toUpperCase(pSubtag); |
1946 | |
|
1947 | 0 | pLastGoodPosition = pSep; |
1948 | 0 | next = VART | EXTS | PRIV; |
1949 | 0 | continue; |
1950 | 0 | } |
1951 | 0 | } |
1952 | 0 | if (next & VART) { |
1953 | 0 | if (_isVariantSubtag(pSubtag, subtagLen) || |
1954 | 0 | (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { |
1955 | 0 | VariantListEntry *var; |
1956 | 0 | UBool isAdded; |
1957 | |
|
1958 | 0 | var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); |
1959 | 0 | if (var == NULL) { |
1960 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
1961 | 0 | goto error; |
1962 | 0 | } |
1963 | 0 | *pSep = 0; |
1964 | 0 | var->variant = T_CString_toUpperCase(pSubtag); |
1965 | 0 | isAdded = _addVariantToList(&(t->variants), var); |
1966 | 0 | if (!isAdded) { |
1967 | | /* duplicated variant entry */ |
1968 | 0 | uprv_free(var); |
1969 | 0 | break; |
1970 | 0 | } |
1971 | 0 | pLastGoodPosition = pSep; |
1972 | 0 | next = VART | EXTS | PRIV; |
1973 | 0 | continue; |
1974 | 0 | } |
1975 | 0 | } |
1976 | 0 | if (next & EXTS) { |
1977 | 0 | if (_isExtensionSingleton(pSubtag, subtagLen)) { |
1978 | 0 | if (pExtension != NULL) { |
1979 | 0 | if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { |
1980 | | /* the previous extension is incomplete */ |
1981 | 0 | uprv_free(pExtension); |
1982 | 0 | pExtension = NULL; |
1983 | 0 | break; |
1984 | 0 | } |
1985 | | |
1986 | | /* terminate the previous extension value */ |
1987 | 0 | *pExtValueSubtagEnd = 0; |
1988 | 0 | pExtension->value = T_CString_toLowerCase(pExtValueSubtag); |
1989 | | |
1990 | | /* insert the extension to the list */ |
1991 | 0 | if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { |
1992 | 0 | pLastGoodPosition = pExtValueSubtagEnd; |
1993 | 0 | } else { |
1994 | | /* stop parsing here */ |
1995 | 0 | uprv_free(pExtension); |
1996 | 0 | pExtension = NULL; |
1997 | 0 | break; |
1998 | 0 | } |
1999 | 0 | } |
2000 | | |
2001 | | /* create a new extension */ |
2002 | 0 | pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); |
2003 | 0 | if (pExtension == NULL) { |
2004 | 0 | *status = U_MEMORY_ALLOCATION_ERROR; |
2005 | 0 | goto error; |
2006 | 0 | } |
2007 | 0 | *pSep = 0; |
2008 | 0 | pExtension->key = T_CString_toLowerCase(pSubtag); |
2009 | 0 | pExtension->value = NULL; /* will be set later */ |
2010 | | |
2011 | | /* |
2012 | | * reset the start and the end location of extension value |
2013 | | * subtags for this extension |
2014 | | */ |
2015 | 0 | pExtValueSubtag = NULL; |
2016 | 0 | pExtValueSubtagEnd = NULL; |
2017 | |
|
2018 | 0 | next = EXTV; |
2019 | 0 | continue; |
2020 | 0 | } |
2021 | 0 | } |
2022 | 0 | if (next & EXTV) { |
2023 | 0 | if (_isExtensionSubtag(pSubtag, subtagLen)) { |
2024 | 0 | if (pExtValueSubtag == NULL) { |
2025 | | /* if the start postion of this extension's value is not yet, |
2026 | | this one is the first value subtag */ |
2027 | 0 | pExtValueSubtag = pSubtag; |
2028 | 0 | } |
2029 | | |
2030 | | /* Mark the end of this subtag */ |
2031 | 0 | pExtValueSubtagEnd = pSep; |
2032 | 0 | next = EXTS | EXTV | PRIV; |
2033 | |
|
2034 | 0 | continue; |
2035 | 0 | } |
2036 | 0 | } |
2037 | 0 | if (next & PRIV) { |
2038 | 0 | if (uprv_tolower(*pSubtag) == PRIVATEUSE) { |
2039 | 0 | char *pPrivuseVal; |
2040 | |
|
2041 | 0 | if (pExtension != NULL) { |
2042 | | /* Process the last extension */ |
2043 | 0 | if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { |
2044 | | /* the previous extension is incomplete */ |
2045 | 0 | uprv_free(pExtension); |
2046 | 0 | pExtension = NULL; |
2047 | 0 | break; |
2048 | 0 | } else { |
2049 | | /* terminate the previous extension value */ |
2050 | 0 | *pExtValueSubtagEnd = 0; |
2051 | 0 | pExtension->value = T_CString_toLowerCase(pExtValueSubtag); |
2052 | | |
2053 | | /* insert the extension to the list */ |
2054 | 0 | if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { |
2055 | 0 | pLastGoodPosition = pExtValueSubtagEnd; |
2056 | 0 | pExtension = NULL; |
2057 | 0 | } else { |
2058 | | /* stop parsing here */ |
2059 | 0 | uprv_free(pExtension); |
2060 | 0 | pExtension = NULL; |
2061 | 0 | break; |
2062 | 0 | } |
2063 | 0 | } |
2064 | 0 | } |
2065 | | |
2066 | | /* The rest of part will be private use value subtags */ |
2067 | 0 | if (pNext == NULL) { |
2068 | | /* empty private use subtag */ |
2069 | 0 | break; |
2070 | 0 | } |
2071 | | /* back up the private use value start position */ |
2072 | 0 | pPrivuseVal = pNext; |
2073 | | |
2074 | | /* validate private use value subtags */ |
2075 | 0 | while (pNext) { |
2076 | 0 | pSubtag = pNext; |
2077 | 0 | pSep = pSubtag; |
2078 | 0 | while (*pSep) { |
2079 | 0 | if (*pSep == SEP) { |
2080 | 0 | break; |
2081 | 0 | } |
2082 | 0 | pSep++; |
2083 | 0 | } |
2084 | 0 | if (*pSep == 0) { |
2085 | | /* last subtag */ |
2086 | 0 | pNext = NULL; |
2087 | 0 | } else { |
2088 | 0 | pNext = pSep + 1; |
2089 | 0 | } |
2090 | 0 | subtagLen = (int32_t)(pSep - pSubtag); |
2091 | |
|
2092 | 0 | if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { |
2093 | 0 | *pSep = 0; |
2094 | 0 | next = VART; |
2095 | 0 | privateuseVar = TRUE; |
2096 | 0 | break; |
2097 | 0 | } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { |
2098 | 0 | pLastGoodPosition = pSep; |
2099 | 0 | } else { |
2100 | 0 | break; |
2101 | 0 | } |
2102 | 0 | } |
2103 | |
|
2104 | 0 | if (next == VART) { |
2105 | 0 | continue; |
2106 | 0 | } |
2107 | | |
2108 | 0 | if (pLastGoodPosition - pPrivuseVal > 0) { |
2109 | 0 | *pLastGoodPosition = 0; |
2110 | 0 | t->privateuse = T_CString_toLowerCase(pPrivuseVal); |
2111 | 0 | } |
2112 | | /* No more subtags, exiting the parse loop */ |
2113 | 0 | break; |
2114 | 0 | } |
2115 | 0 | break; |
2116 | 0 | } |
2117 | | |
2118 | | /* If we fell through here, it means this subtag is illegal - quit parsing */ |
2119 | 0 | break; |
2120 | 0 | } |
2121 | | |
2122 | 0 | if (pExtension != NULL) { |
2123 | | /* Process the last extension */ |
2124 | 0 | if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { |
2125 | | /* the previous extension is incomplete */ |
2126 | 0 | uprv_free(pExtension); |
2127 | 0 | } else { |
2128 | | /* terminate the previous extension value */ |
2129 | 0 | *pExtValueSubtagEnd = 0; |
2130 | 0 | pExtension->value = T_CString_toLowerCase(pExtValueSubtag); |
2131 | | /* insert the extension to the list */ |
2132 | 0 | if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { |
2133 | 0 | pLastGoodPosition = pExtValueSubtagEnd; |
2134 | 0 | } else { |
2135 | 0 | uprv_free(pExtension); |
2136 | 0 | } |
2137 | 0 | } |
2138 | 0 | } |
2139 | |
|
2140 | 0 | if (parsedLen != NULL) { |
2141 | 0 | *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf); |
2142 | 0 | } |
2143 | |
|
2144 | 0 | return t; |
2145 | | |
2146 | 0 | error: |
2147 | 0 | ultag_close(t); |
2148 | 0 | return NULL; |
2149 | 0 | } |
2150 | | |
2151 | | /** |
2152 | | * Ticket #12705 - Turn optimization back on. |
2153 | | */ |
2154 | | #if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) |
2155 | | #pragma optimize( "", on ) |
2156 | | #endif |
2157 | | |
2158 | | static void |
2159 | 0 | ultag_close(ULanguageTag* langtag) { |
2160 | |
|
2161 | 0 | if (langtag == NULL) { |
2162 | 0 | return; |
2163 | 0 | } |
2164 | | |
2165 | 0 | uprv_free(langtag->buf); |
2166 | |
|
2167 | 0 | if (langtag->variants) { |
2168 | 0 | VariantListEntry *curVar = langtag->variants; |
2169 | 0 | while (curVar) { |
2170 | 0 | VariantListEntry *nextVar = curVar->next; |
2171 | 0 | uprv_free(curVar); |
2172 | 0 | curVar = nextVar; |
2173 | 0 | } |
2174 | 0 | } |
2175 | |
|
2176 | 0 | if (langtag->extensions) { |
2177 | 0 | ExtensionListEntry *curExt = langtag->extensions; |
2178 | 0 | while (curExt) { |
2179 | 0 | ExtensionListEntry *nextExt = curExt->next; |
2180 | 0 | uprv_free(curExt); |
2181 | 0 | curExt = nextExt; |
2182 | 0 | } |
2183 | 0 | } |
2184 | |
|
2185 | 0 | uprv_free(langtag); |
2186 | 0 | } |
2187 | | |
2188 | | static const char* |
2189 | 0 | ultag_getLanguage(const ULanguageTag* langtag) { |
2190 | 0 | return langtag->language; |
2191 | 0 | } |
2192 | | |
2193 | | #if 0 |
2194 | | static const char* |
2195 | | ultag_getJDKLanguage(const ULanguageTag* langtag) { |
2196 | | int32_t i; |
2197 | | for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { |
2198 | | if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { |
2199 | | return DEPRECATEDLANGS[i + 1]; |
2200 | | } |
2201 | | } |
2202 | | return langtag->language; |
2203 | | } |
2204 | | #endif |
2205 | | |
2206 | | static const char* |
2207 | 0 | ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { |
2208 | 0 | if (idx >= 0 && idx < MAXEXTLANG) { |
2209 | 0 | return langtag->extlang[idx]; |
2210 | 0 | } |
2211 | 0 | return NULL; |
2212 | 0 | } |
2213 | | |
2214 | | static int32_t |
2215 | 0 | ultag_getExtlangSize(const ULanguageTag* langtag) { |
2216 | 0 | int32_t size = 0; |
2217 | 0 | int32_t i; |
2218 | 0 | for (i = 0; i < MAXEXTLANG; i++) { |
2219 | 0 | if (langtag->extlang[i]) { |
2220 | 0 | size++; |
2221 | 0 | } |
2222 | 0 | } |
2223 | 0 | return size; |
2224 | 0 | } |
2225 | | |
2226 | | static const char* |
2227 | 0 | ultag_getScript(const ULanguageTag* langtag) { |
2228 | 0 | return langtag->script; |
2229 | 0 | } |
2230 | | |
2231 | | static const char* |
2232 | 0 | ultag_getRegion(const ULanguageTag* langtag) { |
2233 | 0 | return langtag->region; |
2234 | 0 | } |
2235 | | |
2236 | | static const char* |
2237 | 0 | ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { |
2238 | 0 | const char *var = NULL; |
2239 | 0 | VariantListEntry *cur = langtag->variants; |
2240 | 0 | int32_t i = 0; |
2241 | 0 | while (cur) { |
2242 | 0 | if (i == idx) { |
2243 | 0 | var = cur->variant; |
2244 | 0 | break; |
2245 | 0 | } |
2246 | 0 | cur = cur->next; |
2247 | 0 | i++; |
2248 | 0 | } |
2249 | 0 | return var; |
2250 | 0 | } |
2251 | | |
2252 | | static int32_t |
2253 | 0 | ultag_getVariantsSize(const ULanguageTag* langtag) { |
2254 | 0 | int32_t size = 0; |
2255 | 0 | VariantListEntry *cur = langtag->variants; |
2256 | 0 | while (TRUE) { |
2257 | 0 | if (cur == NULL) { |
2258 | 0 | break; |
2259 | 0 | } |
2260 | 0 | size++; |
2261 | 0 | cur = cur->next; |
2262 | 0 | } |
2263 | 0 | return size; |
2264 | 0 | } |
2265 | | |
2266 | | static const char* |
2267 | 0 | ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { |
2268 | 0 | const char *key = NULL; |
2269 | 0 | ExtensionListEntry *cur = langtag->extensions; |
2270 | 0 | int32_t i = 0; |
2271 | 0 | while (cur) { |
2272 | 0 | if (i == idx) { |
2273 | 0 | key = cur->key; |
2274 | 0 | break; |
2275 | 0 | } |
2276 | 0 | cur = cur->next; |
2277 | 0 | i++; |
2278 | 0 | } |
2279 | 0 | return key; |
2280 | 0 | } |
2281 | | |
2282 | | static const char* |
2283 | 0 | ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { |
2284 | 0 | const char *val = NULL; |
2285 | 0 | ExtensionListEntry *cur = langtag->extensions; |
2286 | 0 | int32_t i = 0; |
2287 | 0 | while (cur) { |
2288 | 0 | if (i == idx) { |
2289 | 0 | val = cur->value; |
2290 | 0 | break; |
2291 | 0 | } |
2292 | 0 | cur = cur->next; |
2293 | 0 | i++; |
2294 | 0 | } |
2295 | 0 | return val; |
2296 | 0 | } |
2297 | | |
2298 | | static int32_t |
2299 | 0 | ultag_getExtensionsSize(const ULanguageTag* langtag) { |
2300 | 0 | int32_t size = 0; |
2301 | 0 | ExtensionListEntry *cur = langtag->extensions; |
2302 | 0 | while (TRUE) { |
2303 | 0 | if (cur == NULL) { |
2304 | 0 | break; |
2305 | 0 | } |
2306 | 0 | size++; |
2307 | 0 | cur = cur->next; |
2308 | 0 | } |
2309 | 0 | return size; |
2310 | 0 | } |
2311 | | |
2312 | | static const char* |
2313 | 0 | ultag_getPrivateUse(const ULanguageTag* langtag) { |
2314 | 0 | return langtag->privateuse; |
2315 | 0 | } |
2316 | | |
2317 | | #if 0 |
2318 | | static const char* |
2319 | | ultag_getGrandfathered(const ULanguageTag* langtag) { |
2320 | | return langtag->grandfathered; |
2321 | | } |
2322 | | #endif |
2323 | | |
2324 | | |
2325 | | /* |
2326 | | * ------------------------------------------------- |
2327 | | * |
2328 | | * Locale/BCP47 conversion APIs, exposed as uloc_* |
2329 | | * |
2330 | | * ------------------------------------------------- |
2331 | | */ |
2332 | | U_CAPI int32_t U_EXPORT2 |
2333 | | uloc_toLanguageTag(const char* localeID, |
2334 | | char* langtag, |
2335 | | int32_t langtagCapacity, |
2336 | | UBool strict, |
2337 | 0 | UErrorCode* status) { |
2338 | | /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */ |
2339 | 0 | char canonical[256]; |
2340 | 0 | int32_t reslen = 0; |
2341 | 0 | UErrorCode tmpStatus = U_ZERO_ERROR; |
2342 | 0 | UBool hadPosix = FALSE; |
2343 | 0 | const char* pKeywordStart; |
2344 | | |
2345 | | /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ |
2346 | 0 | canonical[0] = 0; |
2347 | 0 | if (uprv_strlen(localeID) > 0) { |
2348 | 0 | uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus); |
2349 | 0 | if (tmpStatus != U_ZERO_ERROR) { |
2350 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
2351 | 0 | return 0; |
2352 | 0 | } |
2353 | 0 | } |
2354 | | |
2355 | | /* For handling special case - private use only tag */ |
2356 | 0 | pKeywordStart = locale_getKeywordsStart(canonical); |
2357 | 0 | if (pKeywordStart == canonical) { |
2358 | 0 | UEnumeration *kwdEnum; |
2359 | 0 | int kwdCnt = 0; |
2360 | 0 | UBool done = FALSE; |
2361 | |
|
2362 | 0 | kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus); |
2363 | 0 | if (kwdEnum != NULL) { |
2364 | 0 | kwdCnt = uenum_count(kwdEnum, &tmpStatus); |
2365 | 0 | if (kwdCnt == 1) { |
2366 | 0 | const char *key; |
2367 | 0 | int32_t len = 0; |
2368 | |
|
2369 | 0 | key = uenum_next(kwdEnum, &len, &tmpStatus); |
2370 | 0 | if (len == 1 && *key == PRIVATEUSE) { |
2371 | 0 | char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; |
2372 | 0 | buf[0] = PRIVATEUSE; |
2373 | 0 | buf[1] = SEP; |
2374 | 0 | len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); |
2375 | 0 | if (U_SUCCESS(tmpStatus)) { |
2376 | 0 | if (_isPrivateuseValueSubtags(&buf[2], len)) { |
2377 | | /* return private use only tag */ |
2378 | 0 | reslen = len + 2; |
2379 | 0 | uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); |
2380 | 0 | u_terminateChars(langtag, langtagCapacity, reslen, status); |
2381 | 0 | done = TRUE; |
2382 | 0 | } else if (strict) { |
2383 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
2384 | 0 | done = TRUE; |
2385 | 0 | } |
2386 | | /* if not strict mode, then "und" will be returned */ |
2387 | 0 | } else { |
2388 | 0 | *status = U_ILLEGAL_ARGUMENT_ERROR; |
2389 | 0 | done = TRUE; |
2390 | 0 | } |
2391 | 0 | } |
2392 | 0 | } |
2393 | 0 | uenum_close(kwdEnum); |
2394 | 0 | if (done) { |
2395 | 0 | return reslen; |
2396 | 0 | } |
2397 | 0 | } |
2398 | 0 | } |
2399 | | |
2400 | 0 | reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status); |
2401 | 0 | reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); |
2402 | 0 | reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status); |
2403 | 0 | reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); |
2404 | 0 | reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); |
2405 | 0 | reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); |
2406 | |
|
2407 | 0 | return reslen; |
2408 | 0 | } |
2409 | | |
2410 | | |
2411 | | U_CAPI int32_t U_EXPORT2 |
2412 | | uloc_forLanguageTag(const char* langtag, |
2413 | | char* localeID, |
2414 | | int32_t localeIDCapacity, |
2415 | | int32_t* parsedLength, |
2416 | 0 | UErrorCode* status) { |
2417 | 0 | ULanguageTag *lt; |
2418 | 0 | int32_t reslen = 0; |
2419 | 0 | const char *subtag, *p; |
2420 | 0 | int32_t len; |
2421 | 0 | int32_t i, n; |
2422 | 0 | UBool noRegion = TRUE; |
2423 | |
|
2424 | 0 | lt = ultag_parse(langtag, -1, parsedLength, status); |
2425 | 0 | if (U_FAILURE(*status)) { |
2426 | 0 | return 0; |
2427 | 0 | } |
2428 | | |
2429 | | /* language */ |
2430 | 0 | subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); |
2431 | 0 | if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { |
2432 | 0 | len = (int32_t)uprv_strlen(subtag); |
2433 | 0 | if (len > 0) { |
2434 | 0 | if (reslen < localeIDCapacity) { |
2435 | 0 | uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); |
2436 | 0 | } |
2437 | 0 | reslen += len; |
2438 | 0 | } |
2439 | 0 | } |
2440 | | |
2441 | | /* script */ |
2442 | 0 | subtag = ultag_getScript(lt); |
2443 | 0 | len = (int32_t)uprv_strlen(subtag); |
2444 | 0 | if (len > 0) { |
2445 | 0 | if (reslen < localeIDCapacity) { |
2446 | 0 | *(localeID + reslen) = LOCALE_SEP; |
2447 | 0 | } |
2448 | 0 | reslen++; |
2449 | | |
2450 | | /* write out the script in title case */ |
2451 | 0 | p = subtag; |
2452 | 0 | while (*p) { |
2453 | 0 | if (reslen < localeIDCapacity) { |
2454 | 0 | if (p == subtag) { |
2455 | 0 | *(localeID + reslen) = uprv_toupper(*p); |
2456 | 0 | } else { |
2457 | 0 | *(localeID + reslen) = *p; |
2458 | 0 | } |
2459 | 0 | } |
2460 | 0 | reslen++; |
2461 | 0 | p++; |
2462 | 0 | } |
2463 | 0 | } |
2464 | | |
2465 | | /* region */ |
2466 | 0 | subtag = ultag_getRegion(lt); |
2467 | 0 | len = (int32_t)uprv_strlen(subtag); |
2468 | 0 | if (len > 0) { |
2469 | 0 | if (reslen < localeIDCapacity) { |
2470 | 0 | *(localeID + reslen) = LOCALE_SEP; |
2471 | 0 | } |
2472 | 0 | reslen++; |
2473 | | /* write out the retion in upper case */ |
2474 | 0 | p = subtag; |
2475 | 0 | while (*p) { |
2476 | 0 | if (reslen < localeIDCapacity) { |
2477 | 0 | *(localeID + reslen) = uprv_toupper(*p); |
2478 | 0 | } |
2479 | 0 | reslen++; |
2480 | 0 | p++; |
2481 | 0 | } |
2482 | 0 | noRegion = FALSE; |
2483 | 0 | } |
2484 | | |
2485 | | /* variants */ |
2486 | 0 | n = ultag_getVariantsSize(lt); |
2487 | 0 | if (n > 0) { |
2488 | 0 | if (noRegion) { |
2489 | 0 | if (reslen < localeIDCapacity) { |
2490 | 0 | *(localeID + reslen) = LOCALE_SEP; |
2491 | 0 | } |
2492 | 0 | reslen++; |
2493 | 0 | } |
2494 | |
|
2495 | 0 | for (i = 0; i < n; i++) { |
2496 | 0 | subtag = ultag_getVariant(lt, i); |
2497 | 0 | if (reslen < localeIDCapacity) { |
2498 | 0 | *(localeID + reslen) = LOCALE_SEP; |
2499 | 0 | } |
2500 | 0 | reslen++; |
2501 | | /* write out the variant in upper case */ |
2502 | 0 | p = subtag; |
2503 | 0 | while (*p) { |
2504 | 0 | if (reslen < localeIDCapacity) { |
2505 | 0 | *(localeID + reslen) = uprv_toupper(*p); |
2506 | 0 | } |
2507 | 0 | reslen++; |
2508 | 0 | p++; |
2509 | 0 | } |
2510 | 0 | } |
2511 | 0 | } |
2512 | | |
2513 | | /* keywords */ |
2514 | 0 | n = ultag_getExtensionsSize(lt); |
2515 | 0 | subtag = ultag_getPrivateUse(lt); |
2516 | 0 | if (n > 0 || uprv_strlen(subtag) > 0) { |
2517 | 0 | if (reslen == 0 && n > 0) { |
2518 | | /* need a language */ |
2519 | 0 | if (reslen < localeIDCapacity) { |
2520 | 0 | uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); |
2521 | 0 | } |
2522 | 0 | reslen += LANG_UND_LEN; |
2523 | 0 | } |
2524 | 0 | len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); |
2525 | 0 | reslen += len; |
2526 | 0 | } |
2527 | |
|
2528 | 0 | ultag_close(lt); |
2529 | 0 | return u_terminateChars(localeID, localeIDCapacity, reslen, status); |
2530 | 0 | } |
2531 | | |
2532 | | |