/src/icu/source/common/uloc.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  | **********************************************************************  | 
5  |  | *   Copyright (C) 1997-2016, International Business Machines  | 
6  |  | *   Corporation and others.  All Rights Reserved.  | 
7  |  | **********************************************************************  | 
8  |  | *  | 
9  |  | * File ULOC.CPP  | 
10  |  | *  | 
11  |  | * Modification History:  | 
12  |  | *  | 
13  |  | *   Date        Name        Description  | 
14  |  | *   04/01/97    aliu        Creation.  | 
15  |  | *   08/21/98    stephen     JDK 1.2 sync  | 
16  |  | *   12/08/98    rtg         New Locale implementation and C API  | 
17  |  | *   03/15/99    damiba      overhaul.  | 
18  |  | *   04/06/99    stephen     changed setDefault() to realloc and copy  | 
19  |  | *   06/14/99    stephen     Changed calls to ures_open for new params  | 
20  |  | *   07/21/99    stephen     Modified setDefault() to propagate to C++  | 
21  |  | *   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,  | 
22  |  | *                           brought canonicalization code into line with spec  | 
23  |  | *****************************************************************************/  | 
24  |  |  | 
25  |  | /*  | 
26  |  |    POSIX's locale format, from putil.c: [no spaces]  | 
27  |  |  | 
28  |  |      ll [ _CC ] [ . MM ] [ @ VV]  | 
29  |  |  | 
30  |  |      l = lang, C = ctry, M = charmap, V = variant  | 
31  |  | */  | 
32  |  |  | 
33  |  | #include "unicode/bytestream.h"  | 
34  |  | #include "unicode/errorcode.h"  | 
35  |  | #include "unicode/stringpiece.h"  | 
36  |  | #include "unicode/utypes.h"  | 
37  |  | #include "unicode/ustring.h"  | 
38  |  | #include "unicode/uloc.h"  | 
39  |  |  | 
40  |  | #include "bytesinkutil.h"  | 
41  |  | #include "putilimp.h"  | 
42  |  | #include "ustr_imp.h"  | 
43  |  | #include "ulocimp.h"  | 
44  |  | #include "umutex.h"  | 
45  |  | #include "cstring.h"  | 
46  |  | #include "cmemory.h"  | 
47  |  | #include "locmap.h"  | 
48  |  | #include "uarrsort.h"  | 
49  |  | #include "uenumimp.h"  | 
50  |  | #include "uassert.h"  | 
51  |  | #include "charstr.h"  | 
52  |  |  | 
53  |  | U_NAMESPACE_USE  | 
54  |  |  | 
55  |  | /* ### Declarations **************************************************/  | 
56  |  |  | 
57  |  | /* Locale stuff from locid.cpp */  | 
58  |  | U_CFUNC void locale_set_default(const char *id);  | 
59  |  | U_CFUNC const char *locale_get_default(void);  | 
60  |  |  | 
61  |  | /* ### Data tables **************************************************/  | 
62  |  |  | 
63  |  | /**  | 
64  |  |  * Table of language codes, both 2- and 3-letter, with preference  | 
65  |  |  * given to 2-letter codes where possible.  Includes 3-letter codes  | 
66  |  |  * that lack a 2-letter equivalent.  | 
67  |  |  *  | 
68  |  |  * This list must be in sorted order.  This list is returned directly  | 
69  |  |  * to the user by some API.  | 
70  |  |  *  | 
71  |  |  * This list must be kept in sync with LANGUAGES_3, with corresponding  | 
72  |  |  * entries matched.  | 
73  |  |  *  | 
74  |  |  * This table should be terminated with a NULL entry, followed by a  | 
75  |  |  * second list, and another NULL entry.  The first list is visible to  | 
76  |  |  * user code when this array is returned by API.  The second list  | 
77  |  |  * contains codes we support, but do not expose through user API.  | 
78  |  |  *  | 
79  |  |  * Notes  | 
80  |  |  *  | 
81  |  |  * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to  | 
82  |  |  * include the revisions up to 2001/7/27 *CWB*  | 
83  |  |  *  | 
84  |  |  * The 3 character codes are the terminology codes like RFC 3066.  This  | 
85  |  |  * is compatible with prior ICU codes  | 
86  |  |  *  | 
87  |  |  * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the  | 
88  |  |  * table but now at the end of the table because 3 character codes are  | 
89  |  |  * duplicates.  This avoids bad searches going from 3 to 2 character  | 
90  |  |  * codes.  | 
91  |  |  *  | 
92  |  |  * The range qaa-qtz is reserved for local use  | 
93  |  |  */  | 
94  |  | /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */  | 
95  |  | /* ISO639 table version is 20150505 */  | 
96  |  | /* Subsequent hand addition of selected languages */  | 
97  |  | static const char * const LANGUAGES[] = { | 
98  |  |     "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",  | 
99  |  |     "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",  | 
100  |  |     "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",  | 
101  |  |     "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",  | 
102  |  |     "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",  | 
103  |  |     "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",  | 
104  |  |     "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",  | 
105  |  |     "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",  | 
106  |  |     "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",  | 
107  |  |     "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",  | 
108  |  |     "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",  | 
109  |  |     "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",  | 
110  |  |     "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",  | 
111  |  |     "cs",  "csb", "cu",  "cv",  "cy",  | 
112  |  |     "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",  | 
113  |  |     "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",  | 
114  |  |     "dyo", "dyu", "dz",  "dzg",  | 
115  |  |     "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",  | 
116  |  |     "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",  | 
117  |  |     "ext",  | 
118  |  |     "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",  | 
119  |  |     "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",  | 
120  |  |     "frs", "fur", "fy",  | 
121  |  |     "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",  | 
122  |  |     "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",  | 
123  |  |     "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",  | 
124  |  |     "gur", "guz", "gv",  "gwi",  | 
125  |  |     "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",  | 
126  |  |     "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",  | 
127  |  |     "hup", "hy",  "hz",  | 
128  |  |     "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",  | 
129  |  |     "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",  | 
130  |  |     "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",  | 
131  |  |     "jv",  | 
132  |  |     "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",  | 
133  |  |     "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",  | 
134  |  |     "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",  | 
135  |  |     "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",  | 
136  |  |     "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",  | 
137  |  |     "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",  | 
138  |  |     "kv",  "kw",  "ky",  | 
139  |  |     "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",  | 
140  |  |     "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",  | 
141  |  |     "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",  | 
142  |  |     "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",  | 
143  |  |     "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",  | 
144  |  |     "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",  | 
145  |  |     "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",  | 
146  |  |     "ml",  "mn",  "mnc", "mni",  | 
147  |  |     "moh", "mos", "mr",  "mrj",  | 
148  |  |     "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",  | 
149  |  |     "my",  "mye", "myv", "mzn",  | 
150  |  |     "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",  | 
151  |  |     "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",  | 
152  |  |     "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",  | 
153  |  |     "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",  | 
154  |  |     "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",  | 
155  |  |     "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",  | 
156  |  |     "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",  | 
157  |  |     "pon", "prg", "pro", "ps",  "pt",  | 
158  |  |     "qu",  "quc", "qug",  | 
159  |  |     "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",  | 
160  |  |     "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",  | 
161  |  |     "rw",  "rwk",  | 
162  |  |     "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",  | 
163  |  |     "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",  | 
164  |  |     "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",  | 
165  |  |     "sgs", "shi", "shn", "shu", "si",  "sid", "sk",  | 
166  |  |     "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",  | 
167  |  |     "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",  | 
168  |  |     "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",  | 
169  |  |     "sv",  "sw",  "swb", "syc", "syr", "szl",  | 
170  |  |     "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",  | 
171  |  |     "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr",  | 
172  |  |     "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tpi",  | 
173  |  |     "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",  | 
174  |  |     "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",  | 
175  |  |     "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",  | 
176  |  |     "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vo",  | 
177  |  |     "vot", "vro", "vun",  | 
178  |  |     "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",  | 
179  |  |     "xal", "xh",  "xmf", "xog",  | 
180  |  |     "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",  | 
181  |  |     "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",  | 
182  |  |     "zun", "zxx", "zza",  | 
183  |  | NULL,  | 
184  |  |     "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  /* obsolete language codes */  | 
185  |  | NULL  | 
186  |  | };  | 
187  |  |  | 
188  |  | static const char* const DEPRECATED_LANGUAGES[]={ | 
189  |  |     "in", "iw", "ji", "jw", NULL, NULL  | 
190  |  | };  | 
191  |  | static const char* const REPLACEMENT_LANGUAGES[]={ | 
192  |  |     "id", "he", "yi", "jv", NULL, NULL  | 
193  |  | };  | 
194  |  |  | 
195  |  | /**  | 
196  |  |  * Table of 3-letter language codes.  | 
197  |  |  *  | 
198  |  |  * This is a lookup table used to convert 3-letter language codes to  | 
199  |  |  * their 2-letter equivalent, where possible.  It must be kept in sync  | 
200  |  |  * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the  | 
201  |  |  * same language as LANGUAGES_3[i].  The commented-out lines are  | 
202  |  |  * copied from LANGUAGES to make eyeballing this baby easier.  | 
203  |  |  *  | 
204  |  |  * Where a 3-letter language code has no 2-letter equivalent, the  | 
205  |  |  * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].  | 
206  |  |  *  | 
207  |  |  * This table should be terminated with a NULL entry, followed by a  | 
208  |  |  * second list, and another NULL entry.  The two lists correspond to  | 
209  |  |  * the two lists in LANGUAGES.  | 
210  |  |  */  | 
211  |  | /* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */  | 
212  |  | /* ISO639 table version is 20150505 */  | 
213  |  | /* Subsequent hand addition of selected languages */  | 
214  |  | static const char * const LANGUAGES_3[] = { | 
215  |  |     "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",  | 
216  |  |     "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",  | 
217  |  |     "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",  | 
218  |  |     "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",  | 
219  |  |     "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",  | 
220  |  |     "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",  | 
221  |  |     "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",  | 
222  |  |     "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",  | 
223  |  |     "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",  | 
224  |  |     "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",  | 
225  |  |     "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",  | 
226  |  |     "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",  | 
227  |  |     "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",  | 
228  |  |     "ces", "csb", "chu", "chv", "cym",  | 
229  |  |     "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",  | 
230  |  |     "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",  | 
231  |  |     "dyo", "dyu", "dzo", "dzg",  | 
232  |  |     "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",  | 
233  |  |     "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",  | 
234  |  |     "ext",  | 
235  |  |     "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",  | 
236  |  |     "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",  | 
237  |  |     "frs", "fur", "fry",  | 
238  |  |     "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",  | 
239  |  |     "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",  | 
240  |  |     "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",  | 
241  |  |     "gur", "guz", "glv", "gwi",  | 
242  |  |     "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",  | 
243  |  |     "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",  | 
244  |  |     "hup", "hye", "her",  | 
245  |  |     "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",  | 
246  |  |     "ilo", "inh", "ido", "isl", "ita", "iku", "izh",  | 
247  |  |     "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",  | 
248  |  |     "jav",  | 
249  |  |     "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",  | 
250  |  |     "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",  | 
251  |  |     "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",  | 
252  |  |     "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",  | 
253  |  |     "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",  | 
254  |  |     "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",  | 
255  |  |     "kom", "cor", "kir",  | 
256  |  |     "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",  | 
257  |  |     "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",  | 
258  |  |     "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",  | 
259  |  |     "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",  | 
260  |  |     "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",  | 
261  |  |     "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",  | 
262  |  |     "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",  | 
263  |  |     "mal", "mon", "mnc", "mni",  | 
264  |  |     "moh", "mos", "mar", "mrj",  | 
265  |  |     "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",  | 
266  |  |     "mya", "mye", "myv", "mzn",  | 
267  |  |     "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",  | 
268  |  |     "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",  | 
269  |  |     "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",  | 
270  |  |     "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",  | 
271  |  |     "oci", "oji", "orm", "ori", "oss", "osa", "ota",  | 
272  |  |     "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",  | 
273  |  |     "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",  | 
274  |  |     "pon", "prg", "pro", "pus", "por",  | 
275  |  |     "que", "quc", "qug",  | 
276  |  |     "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",  | 
277  |  |     "rof", "rom", "rtm", "rus", "rue", "rug", "rup",  | 
278  |  |     "kin", "rwk",  | 
279  |  |     "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",  | 
280  |  |     "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",  | 
281  |  |     "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",  | 
282  |  |     "sgs", "shi", "shn", "shu", "sin", "sid", "slk",  | 
283  |  |     "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",  | 
284  |  |     "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",  | 
285  |  |     "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",  | 
286  |  |     "swe", "swa", "swb", "syc", "syr", "szl",  | 
287  |  |     "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",  | 
288  |  |     "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",  | 
289  |  |     "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",  | 
290  |  |     "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",  | 
291  |  |     "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",  | 
292  |  |     "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",  | 
293  |  |     "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",  | 
294  |  |     "vot", "vro", "vun",  | 
295  |  |     "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",  | 
296  |  |     "xal", "xho", "xmf", "xog",  | 
297  |  |     "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",  | 
298  |  |     "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",  | 
299  |  |     "zun", "zxx", "zza",  | 
300  |  | NULL,  | 
301  |  | /*  "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  */  | 
302  |  |     "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",  | 
303  |  | NULL  | 
304  |  | };  | 
305  |  |  | 
306  |  | /**  | 
307  |  |  * Table of 2-letter country codes.  | 
308  |  |  *  | 
309  |  |  * This list must be in sorted order.  This list is returned directly  | 
310  |  |  * to the user by some API.  | 
311  |  |  *  | 
312  |  |  * This list must be kept in sync with COUNTRIES_3, with corresponding  | 
313  |  |  * entries matched.  | 
314  |  |  *  | 
315  |  |  * This table should be terminated with a NULL entry, followed by a  | 
316  |  |  * second list, and another NULL entry.  The first list is visible to  | 
317  |  |  * user code when this array is returned by API.  The second list  | 
318  |  |  * contains codes we support, but do not expose through user API.  | 
319  |  |  *  | 
320  |  |  * Notes:  | 
321  |  |  *  | 
322  |  |  * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per  | 
323  |  |  * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added  | 
324  |  |  * new codes keeping the old ones for compatibility updated to include  | 
325  |  |  * 1999/12/03 revisions *CWB*  | 
326  |  |  *  | 
327  |  |  * RO(ROM) is now RO(ROU) according to  | 
328  |  |  * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html  | 
329  |  |  */  | 
330  |  | static const char * const COUNTRIES[] = { | 
331  |  |     "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",  | 
332  |  |     "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",  | 
333  |  |     "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",  | 
334  |  |     "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",  | 
335  |  |     "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",  | 
336  |  |     "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",  | 
337  |  |     "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",  | 
338  |  |     "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",  | 
339  |  |     "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",  | 
340  |  |     "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",  | 
341  |  |     "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",  | 
342  |  |     "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",  | 
343  |  |     "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",  | 
344  |  |     "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",  | 
345  |  |     "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",  | 
346  |  |     "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",  | 
347  |  |     "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",  | 
348  |  |     "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",  | 
349  |  |     "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",  | 
350  |  |     "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",  | 
351  |  |     "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",  | 
352  |  |     "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",  | 
353  |  |     "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",  | 
354  |  |     "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",  | 
355  |  |     "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",  | 
356  |  |     "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",  | 
357  |  |     "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",  | 
358  |  |     "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",  | 
359  |  |     "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",  | 
360  |  |     "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",  | 
361  |  | NULL,  | 
362  |  |     "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */  | 
363  |  | NULL  | 
364  |  | };  | 
365  |  |  | 
366  |  | static const char* const DEPRECATED_COUNTRIES[] = { | 
367  |  |     "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */  | 
368  |  | };  | 
369  |  | static const char* const REPLACEMENT_COUNTRIES[] = { | 
370  |  | /*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */  | 
371  |  |     "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */  | 
372  |  | };  | 
373  |  |  | 
374  |  | /**  | 
375  |  |  * Table of 3-letter country codes.  | 
376  |  |  *  | 
377  |  |  * This is a lookup table used to convert 3-letter country codes to  | 
378  |  |  * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.  | 
379  |  |  * For all valid i, COUNTRIES[i] must refer to the same country as  | 
380  |  |  * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES  | 
381  |  |  * to make eyeballing this baby easier.  | 
382  |  |  *  | 
383  |  |  * This table should be terminated with a NULL entry, followed by a  | 
384  |  |  * second list, and another NULL entry.  The two lists correspond to  | 
385  |  |  * the two lists in COUNTRIES.  | 
386  |  |  */  | 
387  |  | static const char * const COUNTRIES_3[] = { | 
388  |  | /*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */  | 
389  |  |     "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",  | 
390  |  | /*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */  | 
391  |  |     "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",  | 
392  |  | /*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */  | 
393  |  |     "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",  | 
394  |  | /*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */  | 
395  |  |     "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",  | 
396  |  | /*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */  | 
397  |  |     "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",  | 
398  |  | /*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */  | 
399  |  |     "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",  | 
400  |  | /*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",     */  | 
401  |  |     "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",  | 
402  |  | /*  "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",     */  | 
403  |  |     "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",  | 
404  |  | /*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */  | 
405  |  |     "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",  | 
406  |  | /*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */  | 
407  |  |     "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",  | 
408  |  | /*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */  | 
409  |  |     "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",  | 
410  |  | /*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */  | 
411  |  |     "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",  | 
412  |  | /*  "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */  | 
413  |  |     "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",  | 
414  |  | /*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */  | 
415  |  |     "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",  | 
416  |  | /*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */  | 
417  |  |     "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",  | 
418  |  | /*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */  | 
419  |  |     "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",  | 
420  |  | /*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */  | 
421  |  |     "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",  | 
422  |  | /*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */  | 
423  |  |     "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",  | 
424  |  | /*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */  | 
425  |  |     "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",  | 
426  |  | /*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */  | 
427  |  |     "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",  | 
428  |  | /*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */  | 
429  |  |     "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",  | 
430  |  | /*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */  | 
431  |  |     "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",  | 
432  |  | /*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */  | 
433  |  |     "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",  | 
434  |  | /*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */  | 
435  |  |     "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",  | 
436  |  | /*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */  | 
437  |  |     "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",  | 
438  |  | /*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */  | 
439  |  |     "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",  | 
440  |  | /*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */  | 
441  |  |     "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",  | 
442  |  | /*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */  | 
443  |  |     "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",  | 
444  |  | /*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */  | 
445  |  |     "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",  | 
446  |  | /*  "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */  | 
447  |  |     "WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",  | 
448  |  | NULL,  | 
449  |  | /*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */  | 
450  |  |     "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",  | 
451  |  | NULL  | 
452  |  | };  | 
453  |  |  | 
454  |  | typedef struct CanonicalizationMap { | 
455  |  |     const char *id;          /* input ID */  | 
456  |  |     const char *canonicalID; /* canonicalized output ID */  | 
457  |  | } CanonicalizationMap;  | 
458  |  |  | 
459  |  | /**  | 
460  |  |  * A map to canonicalize locale IDs.  This handles a variety of  | 
461  |  |  * different semantic kinds of transformations.  | 
462  |  |  */  | 
463  |  | static const CanonicalizationMap CANONICALIZE_MAP[] = { | 
464  |  |     { "art__LOJBAN",    "jbo" }, /* registered name */ | 
465  |  |     { "hy__AREVELA",    "hy" }, /* Registered IANA variant */ | 
466  |  |     { "hy__AREVMDA",    "hyw" }, /* Registered IANA variant */ | 
467  |  |     { "zh__GUOYU",      "zh" }, /* registered name */ | 
468  |  |     { "zh__HAKKA",      "hak" }, /* registered name */ | 
469  |  |     { "zh__XIANG",      "hsn" }, /* registered name */ | 
470  |  |     // subtags with 3 chars won't be treated as variants.  | 
471  |  |     { "zh_GAN",         "gan" }, /* registered name */ | 
472  |  |     { "zh_MIN_NAN",     "nan" }, /* registered name */ | 
473  |  |     { "zh_WUU",         "wuu" }, /* registered name */ | 
474  |  |     { "zh_YUE",         "yue" }, /* registered name */ | 
475  |  | };  | 
476  |  |  | 
477  |  | /* ### BCP47 Conversion *******************************************/  | 
478  |  | /* Test if the locale id has BCP47 u extension and does not have '@' */  | 
479  | 0  | #define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)  | 
480  |  | /* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */  | 
481  |  | static int32_t _ConvertBCP47(  | 
482  | 0  |             const char*& finalID, const char* id, char* buffer, int32_t length, UErrorCode* err) { | 
483  | 0  |     int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);  | 
484  | 0  |     if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { | 
485  | 0  |         finalID=id;  | 
486  | 0  |         if (*err == U_STRING_NOT_TERMINATED_WARNING) { | 
487  | 0  |             *err = U_BUFFER_OVERFLOW_ERROR;  | 
488  | 0  |         }  | 
489  | 0  |     } else { | 
490  | 0  |         finalID=buffer;  | 
491  | 0  |     }  | 
492  | 0  |     return localeIDSize;  | 
493  | 0  | }  | 
494  |  | /* Gets the size of the shortest subtag in the given localeID. */  | 
495  | 0  | static int32_t getShortestSubtagLength(const char *localeID) { | 
496  | 0  |     int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));  | 
497  | 0  |     int32_t length = localeIDLength;  | 
498  | 0  |     int32_t tmpLength = 0;  | 
499  | 0  |     int32_t i;  | 
500  | 0  |     UBool reset = TRUE;  | 
501  |  | 
  | 
502  | 0  |     for (i = 0; i < localeIDLength; i++) { | 
503  | 0  |         if (localeID[i] != '_' && localeID[i] != '-') { | 
504  | 0  |             if (reset) { | 
505  | 0  |                 tmpLength = 0;  | 
506  | 0  |                 reset = FALSE;  | 
507  | 0  |             }  | 
508  | 0  |             tmpLength++;  | 
509  | 0  |         } else { | 
510  | 0  |             if (tmpLength != 0 && tmpLength < length) { | 
511  | 0  |                 length = tmpLength;  | 
512  | 0  |             }  | 
513  | 0  |             reset = TRUE;  | 
514  | 0  |         }  | 
515  | 0  |     }  | 
516  |  | 
  | 
517  | 0  |     return length;  | 
518  | 0  | }  | 
519  |  |  | 
520  |  | /* ### Keywords **************************************************/  | 
521  | 0  | #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))  | 
522  | 0  | #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )  | 
523  |  | /* Punctuation/symbols allowed in legacy key values */  | 
524  | 0  | #define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')  | 
525  |  |  | 
526  | 0  | #define ULOC_KEYWORD_BUFFER_LEN 25  | 
527  | 0  | #define ULOC_MAX_NO_KEYWORDS 25  | 
528  |  |  | 
529  |  | U_CAPI const char * U_EXPORT2  | 
530  | 0  | locale_getKeywordsStart(const char *localeID) { | 
531  | 0  |     const char *result = NULL;  | 
532  | 0  |     if((result = uprv_strchr(localeID, '@')) != NULL) { | 
533  | 0  |         return result;  | 
534  | 0  |     }  | 
535  |  | #if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)  | 
536  |  |     else { | 
537  |  |         /* We do this because the @ sign is variant, and the @ sign used on one  | 
538  |  |         EBCDIC machine won't be compiled the same way on other EBCDIC based  | 
539  |  |         machines. */  | 
540  |  |         static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 }; | 
541  |  |         const uint8_t *charToFind = ebcdicSigns;  | 
542  |  |         while(*charToFind) { | 
543  |  |             if((result = uprv_strchr(localeID, *charToFind)) != NULL) { | 
544  |  |                 return result;  | 
545  |  |             }  | 
546  |  |             charToFind++;  | 
547  |  |         }  | 
548  |  |     }  | 
549  |  | #endif  | 
550  | 0  |     return NULL;  | 
551  | 0  | }  | 
552  |  |  | 
553  |  | /**  | 
554  |  |  * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]  | 
555  |  |  * @param keywordName incoming name to be canonicalized  | 
556  |  |  * @param status return status (keyword too long)  | 
557  |  |  * @return length of the keyword name  | 
558  |  |  */  | 
559  |  | static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)  | 
560  | 0  | { | 
561  | 0  |   int32_t keywordNameLen = 0;  | 
562  |  | 
  | 
563  | 0  |   for (; *keywordName != 0; keywordName++) { | 
564  | 0  |     if (!UPRV_ISALPHANUM(*keywordName)) { | 
565  | 0  |       *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */  | 
566  | 0  |       return 0;  | 
567  | 0  |     }  | 
568  | 0  |     if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) { | 
569  | 0  |       buf[keywordNameLen++] = uprv_tolower(*keywordName);  | 
570  | 0  |     } else { | 
571  |  |       /* keyword name too long for internal buffer */  | 
572  | 0  |       *status = U_INTERNAL_PROGRAM_ERROR;  | 
573  | 0  |       return 0;  | 
574  | 0  |     }  | 
575  | 0  |   }  | 
576  | 0  |   if (keywordNameLen == 0) { | 
577  | 0  |     *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */  | 
578  | 0  |     return 0;  | 
579  | 0  |   }  | 
580  | 0  |   buf[keywordNameLen] = 0; /* terminate */  | 
581  |  | 
  | 
582  | 0  |   return keywordNameLen;  | 
583  | 0  | }  | 
584  |  |  | 
585  |  | typedef struct { | 
586  |  |     char keyword[ULOC_KEYWORD_BUFFER_LEN];  | 
587  |  |     int32_t keywordLen;  | 
588  |  |     const char *valueStart;  | 
589  |  |     int32_t valueLen;  | 
590  |  | } KeywordStruct;  | 
591  |  |  | 
592  |  | static int32_t U_CALLCONV  | 
593  | 0  | compareKeywordStructs(const void * /*context*/, const void *left, const void *right) { | 
594  | 0  |     const char* leftString = ((const KeywordStruct *)left)->keyword;  | 
595  | 0  |     const char* rightString = ((const KeywordStruct *)right)->keyword;  | 
596  | 0  |     return uprv_strcmp(leftString, rightString);  | 
597  | 0  | }  | 
598  |  |  | 
599  |  | U_CFUNC void  | 
600  |  | ulocimp_getKeywords(const char *localeID,  | 
601  |  |                     char prev,  | 
602  |  |                     ByteSink& sink,  | 
603  |  |                     UBool valuesToo,  | 
604  |  |                     UErrorCode *status)  | 
605  | 0  | { | 
606  | 0  |     KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];  | 
607  |  | 
  | 
608  | 0  |     int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;  | 
609  | 0  |     int32_t numKeywords = 0;  | 
610  | 0  |     const char* pos = localeID;  | 
611  | 0  |     const char* equalSign = NULL;  | 
612  | 0  |     const char* semicolon = NULL;  | 
613  | 0  |     int32_t i = 0, j, n;  | 
614  |  | 
  | 
615  | 0  |     if(prev == '@') { /* start of keyword definition */ | 
616  |  |         /* we will grab pairs, trim spaces, lowercase keywords, sort and return */  | 
617  | 0  |         do { | 
618  | 0  |             UBool duplicate = FALSE;  | 
619  |  |             /* skip leading spaces */  | 
620  | 0  |             while(*pos == ' ') { | 
621  | 0  |                 pos++;  | 
622  | 0  |             }  | 
623  | 0  |             if (!*pos) { /* handle trailing "; " */ | 
624  | 0  |                 break;  | 
625  | 0  |             }  | 
626  | 0  |             if(numKeywords == maxKeywords) { | 
627  | 0  |                 *status = U_INTERNAL_PROGRAM_ERROR;  | 
628  | 0  |                 return;  | 
629  | 0  |             }  | 
630  | 0  |             equalSign = uprv_strchr(pos, '=');  | 
631  | 0  |             semicolon = uprv_strchr(pos, ';');  | 
632  |  |             /* lack of '=' [foo@currency] is illegal */  | 
633  |  |             /* ';' before '=' [foo@currency;collation=pinyin] is illegal */  | 
634  | 0  |             if(!equalSign || (semicolon && semicolon<equalSign)) { | 
635  | 0  |                 *status = U_INVALID_FORMAT_ERROR;  | 
636  | 0  |                 return;  | 
637  | 0  |             }  | 
638  |  |             /* need to normalize both keyword and keyword name */  | 
639  | 0  |             if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) { | 
640  |  |                 /* keyword name too long for internal buffer */  | 
641  | 0  |                 *status = U_INTERNAL_PROGRAM_ERROR;  | 
642  | 0  |                 return;  | 
643  | 0  |             }  | 
644  | 0  |             for(i = 0, n = 0; i < equalSign - pos; ++i) { | 
645  | 0  |                 if (pos[i] != ' ') { | 
646  | 0  |                     keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);  | 
647  | 0  |                 }  | 
648  | 0  |             }  | 
649  |  |  | 
650  |  |             /* zero-length keyword is an error. */  | 
651  | 0  |             if (n == 0) { | 
652  | 0  |                 *status = U_INVALID_FORMAT_ERROR;  | 
653  | 0  |                 return;  | 
654  | 0  |             }  | 
655  |  |  | 
656  | 0  |             keywordList[numKeywords].keyword[n] = 0;  | 
657  | 0  |             keywordList[numKeywords].keywordLen = n;  | 
658  |  |             /* now grab the value part. First we skip the '=' */  | 
659  | 0  |             equalSign++;  | 
660  |  |             /* then we leading spaces */  | 
661  | 0  |             while(*equalSign == ' ') { | 
662  | 0  |                 equalSign++;  | 
663  | 0  |             }  | 
664  |  |  | 
665  |  |             /* Premature end or zero-length value */  | 
666  | 0  |             if (!*equalSign || equalSign == semicolon) { | 
667  | 0  |                 *status = U_INVALID_FORMAT_ERROR;  | 
668  | 0  |                 return;  | 
669  | 0  |             }  | 
670  |  |  | 
671  | 0  |             keywordList[numKeywords].valueStart = equalSign;  | 
672  |  | 
  | 
673  | 0  |             pos = semicolon;  | 
674  | 0  |             i = 0;  | 
675  | 0  |             if(pos) { | 
676  | 0  |                 while(*(pos - i - 1) == ' ') { | 
677  | 0  |                     i++;  | 
678  | 0  |                 }  | 
679  | 0  |                 keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);  | 
680  | 0  |                 pos++;  | 
681  | 0  |             } else { | 
682  | 0  |                 i = (int32_t)uprv_strlen(equalSign);  | 
683  | 0  |                 while(i && equalSign[i-1] == ' ') { | 
684  | 0  |                     i--;  | 
685  | 0  |                 }  | 
686  | 0  |                 keywordList[numKeywords].valueLen = i;  | 
687  | 0  |             }  | 
688  |  |             /* If this is a duplicate keyword, then ignore it */  | 
689  | 0  |             for (j=0; j<numKeywords; ++j) { | 
690  | 0  |                 if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) { | 
691  | 0  |                     duplicate = TRUE;  | 
692  | 0  |                     break;  | 
693  | 0  |                 }  | 
694  | 0  |             }  | 
695  | 0  |             if (!duplicate) { | 
696  | 0  |                 ++numKeywords;  | 
697  | 0  |             }  | 
698  | 0  |         } while(pos);  | 
699  |  |  | 
700  |  |         /* now we have a list of keywords */  | 
701  |  |         /* we need to sort it */  | 
702  | 0  |         uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);  | 
703  |  |  | 
704  |  |         /* Now construct the keyword part */  | 
705  | 0  |         for(i = 0; i < numKeywords; i++) { | 
706  | 0  |             sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);  | 
707  | 0  |             if(valuesToo) { | 
708  | 0  |                 sink.Append("=", 1); | 
709  | 0  |                 sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);  | 
710  | 0  |                 if(i < numKeywords - 1) { | 
711  | 0  |                     sink.Append(";", 1); | 
712  | 0  |                 }  | 
713  | 0  |             } else { | 
714  | 0  |                 sink.Append("\0", 1); | 
715  | 0  |             }  | 
716  | 0  |         }  | 
717  | 0  |     }  | 
718  | 0  | }  | 
719  |  |  | 
720  |  | U_CAPI int32_t U_EXPORT2  | 
721  |  | uloc_getKeywordValue(const char* localeID,  | 
722  |  |                      const char* keywordName,  | 
723  |  |                      char* buffer, int32_t bufferCapacity,  | 
724  |  |                      UErrorCode* status)  | 
725  | 0  | { | 
726  | 0  |     if (U_FAILURE(*status)) { | 
727  | 0  |         return 0;  | 
728  | 0  |     }  | 
729  |  |  | 
730  | 0  |     CheckedArrayByteSink sink(buffer, bufferCapacity);  | 
731  | 0  |     ulocimp_getKeywordValue(localeID, keywordName, sink, status);  | 
732  |  | 
  | 
733  | 0  |     int32_t reslen = sink.NumberOfBytesAppended();  | 
734  |  | 
  | 
735  | 0  |     if (U_FAILURE(*status)) { | 
736  | 0  |         return reslen;  | 
737  | 0  |     }  | 
738  |  |  | 
739  | 0  |     if (sink.Overflowed()) { | 
740  | 0  |         *status = U_BUFFER_OVERFLOW_ERROR;  | 
741  | 0  |     } else { | 
742  | 0  |         u_terminateChars(buffer, bufferCapacity, reslen, status);  | 
743  | 0  |     }  | 
744  |  | 
  | 
745  | 0  |     return reslen;  | 
746  | 0  | }  | 
747  |  |  | 
748  |  | U_CAPI void U_EXPORT2  | 
749  |  | ulocimp_getKeywordValue(const char* localeID,  | 
750  |  |                         const char* keywordName,  | 
751  |  |                         icu::ByteSink& sink,  | 
752  |  |                         UErrorCode* status)  | 
753  | 0  | { | 
754  | 0  |     const char* startSearchHere = NULL;  | 
755  | 0  |     const char* nextSeparator = NULL;  | 
756  | 0  |     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];  | 
757  | 0  |     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];  | 
758  |  | 
  | 
759  | 0  |     if(status && U_SUCCESS(*status) && localeID) { | 
760  | 0  |       char tempBuffer[ULOC_FULLNAME_CAPACITY];  | 
761  | 0  |       const char* tmpLocaleID;  | 
762  |  | 
  | 
763  | 0  |       if (keywordName == NULL || keywordName[0] == 0) { | 
764  | 0  |         *status = U_ILLEGAL_ARGUMENT_ERROR;  | 
765  | 0  |         return;  | 
766  | 0  |       }  | 
767  |  |  | 
768  | 0  |       locale_canonKeywordName(keywordNameBuffer, keywordName, status);  | 
769  | 0  |       if(U_FAILURE(*status)) { | 
770  | 0  |         return;  | 
771  | 0  |       }  | 
772  |  |  | 
773  | 0  |       if (_hasBCP47Extension(localeID)) { | 
774  | 0  |           _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);  | 
775  | 0  |       } else { | 
776  | 0  |           tmpLocaleID=localeID;  | 
777  | 0  |       }  | 
778  |  | 
  | 
779  | 0  |       startSearchHere = locale_getKeywordsStart(tmpLocaleID);  | 
780  | 0  |       if(startSearchHere == NULL) { | 
781  |  |           /* no keywords, return at once */  | 
782  | 0  |           return;  | 
783  | 0  |       }  | 
784  |  |  | 
785  |  |       /* find the first keyword */  | 
786  | 0  |       while(startSearchHere) { | 
787  | 0  |           const char* keyValueTail;  | 
788  | 0  |           int32_t keyValueLen;  | 
789  |  | 
  | 
790  | 0  |           startSearchHere++; /* skip @ or ; */  | 
791  | 0  |           nextSeparator = uprv_strchr(startSearchHere, '=');  | 
792  | 0  |           if(!nextSeparator) { | 
793  | 0  |               *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */  | 
794  | 0  |               return;  | 
795  | 0  |           }  | 
796  |  |           /* strip leading & trailing spaces (TC decided to tolerate these) */  | 
797  | 0  |           while(*startSearchHere == ' ') { | 
798  | 0  |               startSearchHere++;  | 
799  | 0  |           }  | 
800  | 0  |           keyValueTail = nextSeparator;  | 
801  | 0  |           while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') { | 
802  | 0  |               keyValueTail--;  | 
803  | 0  |           }  | 
804  |  |           /* now keyValueTail points to first char after the keyName */  | 
805  |  |           /* copy & normalize keyName from locale */  | 
806  | 0  |           if (startSearchHere == keyValueTail) { | 
807  | 0  |               *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */  | 
808  | 0  |               return;  | 
809  | 0  |           }  | 
810  | 0  |           keyValueLen = 0;  | 
811  | 0  |           while (startSearchHere < keyValueTail) { | 
812  | 0  |             if (!UPRV_ISALPHANUM(*startSearchHere)) { | 
813  | 0  |               *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */  | 
814  | 0  |               return;  | 
815  | 0  |             }  | 
816  | 0  |             if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { | 
817  | 0  |               localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);  | 
818  | 0  |             } else { | 
819  |  |               /* keyword name too long for internal buffer */  | 
820  | 0  |               *status = U_INTERNAL_PROGRAM_ERROR;  | 
821  | 0  |               return;  | 
822  | 0  |             }  | 
823  | 0  |           }  | 
824  | 0  |           localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */  | 
825  |  | 
  | 
826  | 0  |           startSearchHere = uprv_strchr(nextSeparator, ';');  | 
827  |  | 
  | 
828  | 0  |           if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) { | 
829  |  |                /* current entry matches the keyword. */  | 
830  | 0  |              nextSeparator++; /* skip '=' */  | 
831  |  |               /* First strip leading & trailing spaces (TC decided to tolerate these) */  | 
832  | 0  |               while(*nextSeparator == ' ') { | 
833  | 0  |                 nextSeparator++;  | 
834  | 0  |               }  | 
835  | 0  |               keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);  | 
836  | 0  |               while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') { | 
837  | 0  |                 keyValueTail--;  | 
838  | 0  |               }  | 
839  |  |               /* Now copy the value, but check well-formedness */  | 
840  | 0  |               if (nextSeparator == keyValueTail) { | 
841  | 0  |                 *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */  | 
842  | 0  |                 return;  | 
843  | 0  |               }  | 
844  | 0  |               while (nextSeparator < keyValueTail) { | 
845  | 0  |                 if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) { | 
846  | 0  |                   *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */  | 
847  | 0  |                   return;  | 
848  | 0  |                 }  | 
849  |  |                 /* Should we lowercase value to return here? Tests expect as-is. */  | 
850  | 0  |                 sink.Append(nextSeparator++, 1);  | 
851  | 0  |               }  | 
852  | 0  |               return;  | 
853  | 0  |           }  | 
854  | 0  |       }  | 
855  | 0  |     }  | 
856  | 0  | }  | 
857  |  |  | 
858  |  | U_CAPI int32_t U_EXPORT2  | 
859  |  | uloc_setKeywordValue(const char* keywordName,  | 
860  |  |                      const char* keywordValue,  | 
861  |  |                      char* buffer, int32_t bufferCapacity,  | 
862  |  |                      UErrorCode* status)  | 
863  | 0  | { | 
864  |  |     /* TODO: sorting. removal. */  | 
865  | 0  |     int32_t keywordNameLen;  | 
866  | 0  |     int32_t keywordValueLen;  | 
867  | 0  |     int32_t bufLen;  | 
868  | 0  |     int32_t needLen = 0;  | 
869  | 0  |     char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];  | 
870  | 0  |     char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];  | 
871  | 0  |     char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];  | 
872  | 0  |     int32_t rc;  | 
873  | 0  |     char* nextSeparator = NULL;  | 
874  | 0  |     char* nextEqualsign = NULL;  | 
875  | 0  |     char* startSearchHere = NULL;  | 
876  | 0  |     char* keywordStart = NULL;  | 
877  | 0  |     CharString updatedKeysAndValues;  | 
878  | 0  |     UBool handledInputKeyAndValue = FALSE;  | 
879  | 0  |     char keyValuePrefix = '@';  | 
880  |  | 
  | 
881  | 0  |     if(U_FAILURE(*status)) { | 
882  | 0  |         return -1;  | 
883  | 0  |     }  | 
884  | 0  |     if (*status == U_STRING_NOT_TERMINATED_WARNING) { | 
885  | 0  |         *status = U_ZERO_ERROR;  | 
886  | 0  |     }  | 
887  | 0  |     if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) { | 
888  | 0  |         *status = U_ILLEGAL_ARGUMENT_ERROR;  | 
889  | 0  |         return 0;  | 
890  | 0  |     }  | 
891  | 0  |     bufLen = (int32_t)uprv_strlen(buffer);  | 
892  | 0  |     if(bufferCapacity<bufLen) { | 
893  |  |         /* The capacity is less than the length?! Is this NULL terminated? */  | 
894  | 0  |         *status = U_ILLEGAL_ARGUMENT_ERROR;  | 
895  | 0  |         return 0;  | 
896  | 0  |     }  | 
897  | 0  |     keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);  | 
898  | 0  |     if(U_FAILURE(*status)) { | 
899  | 0  |         return 0;  | 
900  | 0  |     }  | 
901  |  |  | 
902  | 0  |     keywordValueLen = 0;  | 
903  | 0  |     if(keywordValue) { | 
904  | 0  |         while (*keywordValue != 0) { | 
905  | 0  |             if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) { | 
906  | 0  |                 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */  | 
907  | 0  |                 return 0;  | 
908  | 0  |             }  | 
909  | 0  |             if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) { | 
910  |  |                 /* Should we force lowercase in value to set? */  | 
911  | 0  |                 keywordValueBuffer[keywordValueLen++] = *keywordValue++;  | 
912  | 0  |             } else { | 
913  |  |                 /* keywordValue too long for internal buffer */  | 
914  | 0  |                 *status = U_INTERNAL_PROGRAM_ERROR;  | 
915  | 0  |                 return 0;  | 
916  | 0  |             }  | 
917  | 0  |         }  | 
918  | 0  |     }  | 
919  | 0  |     keywordValueBuffer[keywordValueLen] = 0; /* terminate */  | 
920  |  | 
  | 
921  | 0  |     startSearchHere = (char*)locale_getKeywordsStart(buffer);  | 
922  | 0  |     if(startSearchHere == NULL || (startSearchHere[1]==0)) { | 
923  | 0  |         if(keywordValueLen == 0) { /* no keywords = nothing to remove */ | 
924  | 0  |             U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);  | 
925  | 0  |             return bufLen;  | 
926  | 0  |         }  | 
927  |  |  | 
928  | 0  |         needLen = bufLen+1+keywordNameLen+1+keywordValueLen;  | 
929  | 0  |         if(startSearchHere) { /* had a single @ */ | 
930  | 0  |             needLen--; /* already had the @ */  | 
931  |  |             /* startSearchHere points at the @ */  | 
932  | 0  |         } else { | 
933  | 0  |             startSearchHere=buffer+bufLen;  | 
934  | 0  |         }  | 
935  | 0  |         if(needLen >= bufferCapacity) { | 
936  | 0  |             *status = U_BUFFER_OVERFLOW_ERROR;  | 
937  | 0  |             return needLen; /* no change */  | 
938  | 0  |         }  | 
939  | 0  |         *startSearchHere++ = '@';  | 
940  | 0  |         uprv_strcpy(startSearchHere, keywordNameBuffer);  | 
941  | 0  |         startSearchHere += keywordNameLen;  | 
942  | 0  |         *startSearchHere++ = '=';  | 
943  | 0  |         uprv_strcpy(startSearchHere, keywordValueBuffer);  | 
944  | 0  |         U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);  | 
945  | 0  |         return needLen;  | 
946  | 0  |     } /* end shortcut - no @ */  | 
947  |  |  | 
948  | 0  |     keywordStart = startSearchHere;  | 
949  |  |     /* search for keyword */  | 
950  | 0  |     while(keywordStart) { | 
951  | 0  |         const char* keyValueTail;  | 
952  | 0  |         int32_t keyValueLen;  | 
953  |  | 
  | 
954  | 0  |         keywordStart++; /* skip @ or ; */  | 
955  | 0  |         nextEqualsign = uprv_strchr(keywordStart, '=');  | 
956  | 0  |         if (!nextEqualsign) { | 
957  | 0  |             *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */  | 
958  | 0  |             return 0;  | 
959  | 0  |         }  | 
960  |  |         /* strip leading & trailing spaces (TC decided to tolerate these) */  | 
961  | 0  |         while(*keywordStart == ' ') { | 
962  | 0  |             keywordStart++;  | 
963  | 0  |         }  | 
964  | 0  |         keyValueTail = nextEqualsign;  | 
965  | 0  |         while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') { | 
966  | 0  |             keyValueTail--;  | 
967  | 0  |         }  | 
968  |  |         /* now keyValueTail points to first char after the keyName */  | 
969  |  |         /* copy & normalize keyName from locale */  | 
970  | 0  |         if (keywordStart == keyValueTail) { | 
971  | 0  |             *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */  | 
972  | 0  |             return 0;  | 
973  | 0  |         }  | 
974  | 0  |         keyValueLen = 0;  | 
975  | 0  |         while (keywordStart < keyValueTail) { | 
976  | 0  |             if (!UPRV_ISALPHANUM(*keywordStart)) { | 
977  | 0  |                 *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */  | 
978  | 0  |                 return 0;  | 
979  | 0  |             }  | 
980  | 0  |             if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) { | 
981  | 0  |                 localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);  | 
982  | 0  |             } else { | 
983  |  |                 /* keyword name too long for internal buffer */  | 
984  | 0  |                 *status = U_INTERNAL_PROGRAM_ERROR;  | 
985  | 0  |                 return 0;  | 
986  | 0  |             }  | 
987  | 0  |         }  | 
988  | 0  |         localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */  | 
989  |  | 
  | 
990  | 0  |         nextSeparator = uprv_strchr(nextEqualsign, ';');  | 
991  |  |  | 
992  |  |         /* start processing the value part */  | 
993  | 0  |         nextEqualsign++; /* skip '=' */  | 
994  |  |         /* First strip leading & trailing spaces (TC decided to tolerate these) */  | 
995  | 0  |         while(*nextEqualsign == ' ') { | 
996  | 0  |             nextEqualsign++;  | 
997  | 0  |         }  | 
998  | 0  |         keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);  | 
999  | 0  |         while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') { | 
1000  | 0  |             keyValueTail--;  | 
1001  | 0  |         }  | 
1002  | 0  |         if (nextEqualsign == keyValueTail) { | 
1003  | 0  |             *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */  | 
1004  | 0  |             return 0;  | 
1005  | 0  |         }  | 
1006  |  |  | 
1007  | 0  |         rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);  | 
1008  | 0  |         if(rc == 0) { | 
1009  |  |             /* Current entry matches the input keyword. Update the entry */  | 
1010  | 0  |             if(keywordValueLen > 0) { /* updating a value */ | 
1011  | 0  |                 updatedKeysAndValues.append(keyValuePrefix, *status);  | 
1012  | 0  |                 keyValuePrefix = ';'; /* for any subsequent key-value pair */  | 
1013  | 0  |                 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);  | 
1014  | 0  |                 updatedKeysAndValues.append('=', *status); | 
1015  | 0  |                 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);  | 
1016  | 0  |             } /* else removing this entry, don't emit anything */  | 
1017  | 0  |             handledInputKeyAndValue = TRUE;  | 
1018  | 0  |         } else { | 
1019  |  |            /* input keyword sorts earlier than current entry, add before current entry */  | 
1020  | 0  |             if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) { | 
1021  |  |                 /* insert new entry at this location */  | 
1022  | 0  |                 updatedKeysAndValues.append(keyValuePrefix, *status);  | 
1023  | 0  |                 keyValuePrefix = ';'; /* for any subsequent key-value pair */  | 
1024  | 0  |                 updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);  | 
1025  | 0  |                 updatedKeysAndValues.append('=', *status); | 
1026  | 0  |                 updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);  | 
1027  | 0  |                 handledInputKeyAndValue = TRUE;  | 
1028  | 0  |             }  | 
1029  |  |             /* copy the current entry */  | 
1030  | 0  |             updatedKeysAndValues.append(keyValuePrefix, *status);  | 
1031  | 0  |             keyValuePrefix = ';'; /* for any subsequent key-value pair */  | 
1032  | 0  |             updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);  | 
1033  | 0  |             updatedKeysAndValues.append('=', *status); | 
1034  | 0  |             updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);  | 
1035  | 0  |         }  | 
1036  | 0  |         if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) { | 
1037  |  |             /* append new entry at the end, it sorts later than existing entries */  | 
1038  | 0  |             updatedKeysAndValues.append(keyValuePrefix, *status);  | 
1039  |  |             /* skip keyValuePrefix update, no subsequent key-value pair */  | 
1040  | 0  |             updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);  | 
1041  | 0  |             updatedKeysAndValues.append('=', *status); | 
1042  | 0  |             updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);  | 
1043  | 0  |             handledInputKeyAndValue = TRUE;  | 
1044  | 0  |         }  | 
1045  | 0  |         keywordStart = nextSeparator;  | 
1046  | 0  |     } /* end loop searching */  | 
1047  |  |  | 
1048  |  |     /* Any error from updatedKeysAndValues.append above would be internal and not due to  | 
1049  |  |      * problems with the passed-in locale. So if we did encounter problems with the  | 
1050  |  |      * passed-in locale above, those errors took precedence and overrode any error  | 
1051  |  |      * status from updatedKeysAndValues.append, and also caused a return of 0. If there  | 
1052  |  |      * are errors here they are from updatedKeysAndValues.append; they do cause an  | 
1053  |  |      * error return but the passed-in locale is unmodified and the original bufLen is  | 
1054  |  |      * returned.  | 
1055  |  |      */  | 
1056  | 0  |     if (!handledInputKeyAndValue || U_FAILURE(*status)) { | 
1057  |  |         /* if input key/value specified removal of a keyword not present in locale, or  | 
1058  |  |          * there was an error in CharString.append, leave original locale alone. */  | 
1059  | 0  |         U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);  | 
1060  | 0  |         return bufLen;  | 
1061  | 0  |     }  | 
1062  |  |  | 
1063  |  |     // needLen = length of the part before '@'  | 
1064  | 0  |     needLen = (int32_t)(startSearchHere - buffer);  | 
1065  |  |     // Check to see can we fit the startSearchHere, if not, return  | 
1066  |  |     // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.  | 
1067  |  |     // We do this because this API function does not behave like most others:  | 
1068  |  |     // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.  | 
1069  |  |     // When the contents fits but without the terminating NUL, in this case we need to not change  | 
1070  |  |     // the buffer contents and return with a buffer overflow error.  | 
1071  | 0  |     int32_t appendLength = updatedKeysAndValues.length();  | 
1072  | 0  |     if (appendLength >= bufferCapacity - needLen) { | 
1073  | 0  |         *status = U_BUFFER_OVERFLOW_ERROR;  | 
1074  | 0  |         return needLen + appendLength;  | 
1075  | 0  |     }  | 
1076  | 0  |     needLen += updatedKeysAndValues.extract(  | 
1077  | 0  |                          startSearchHere, bufferCapacity - needLen, *status);  | 
1078  | 0  |     U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);  | 
1079  | 0  |     return needLen;  | 
1080  | 0  | }  | 
1081  |  |  | 
1082  |  | /* ### ID parsing implementation **************************************************/  | 
1083  |  |  | 
1084  | 0  | #define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))  | 
1085  |  |  | 
1086  |  | /*returns TRUE if one of the special prefixes is here (s=string)  | 
1087  |  |   'x-' or 'i-' */  | 
1088  | 0  | #define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))  | 
1089  |  |  | 
1090  |  | /* Dot terminates it because of POSIX form  where dot precedes the codepage  | 
1091  |  |  * except for variant  | 
1092  |  |  */  | 
1093  | 0  | #define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))  | 
1094  |  |  | 
1095  |  | /**  | 
1096  |  |  * Lookup 'key' in the array 'list'.  The array 'list' should contain  | 
1097  |  |  * a NULL entry, followed by more entries, and a second NULL entry.  | 
1098  |  |  *  | 
1099  |  |  * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or  | 
1100  |  |  * COUNTRIES_3.  | 
1101  |  |  */  | 
1102  |  | static int16_t _findIndex(const char* const* list, const char* key)  | 
1103  | 0  | { | 
1104  | 0  |     const char* const* anchor = list;  | 
1105  | 0  |     int32_t pass = 0;  | 
1106  |  |  | 
1107  |  |     /* Make two passes through two NULL-terminated arrays at 'list' */  | 
1108  | 0  |     while (pass++ < 2) { | 
1109  | 0  |         while (*list) { | 
1110  | 0  |             if (uprv_strcmp(key, *list) == 0) { | 
1111  | 0  |                 return (int16_t)(list - anchor);  | 
1112  | 0  |             }  | 
1113  | 0  |             list++;  | 
1114  | 0  |         }  | 
1115  | 0  |         ++list;     /* skip final NULL *CWB*/  | 
1116  | 0  |     }  | 
1117  | 0  |     return -1;  | 
1118  | 0  | }  | 
1119  |  |  | 
1120  |  | U_CFUNC const char*  | 
1121  | 0  | uloc_getCurrentCountryID(const char* oldID){ | 
1122  | 0  |     int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);  | 
1123  | 0  |     if (offset >= 0) { | 
1124  | 0  |         return REPLACEMENT_COUNTRIES[offset];  | 
1125  | 0  |     }  | 
1126  | 0  |     return oldID;  | 
1127  | 0  | }  | 
1128  |  | U_CFUNC const char*  | 
1129  | 0  | uloc_getCurrentLanguageID(const char* oldID){ | 
1130  | 0  |     int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);  | 
1131  | 0  |     if (offset >= 0) { | 
1132  | 0  |         return REPLACEMENT_LANGUAGES[offset];  | 
1133  | 0  |     }  | 
1134  | 0  |     return oldID;  | 
1135  | 0  | }  | 
1136  |  | /*  | 
1137  |  |  * the internal functions _getLanguage(), _getCountry(), _getVariant()  | 
1138  |  |  * avoid duplicating code to handle the earlier locale ID pieces  | 
1139  |  |  * in the functions for the later ones by  | 
1140  |  |  * setting the *pEnd pointer to where they stopped parsing  | 
1141  |  |  *  | 
1142  |  |  * TODO try to use this in Locale  | 
1143  |  |  */  | 
1144  |  | CharString U_EXPORT2  | 
1145  |  | ulocimp_getLanguage(const char *localeID,  | 
1146  |  |                     const char **pEnd,  | 
1147  | 0  |                     UErrorCode &status) { | 
1148  | 0  |     CharString result;  | 
1149  |  | 
  | 
1150  | 0  |     if (uprv_stricmp(localeID, "root") == 0) { | 
1151  | 0  |         localeID += 4;  | 
1152  | 0  |     } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&  | 
1153  | 0  |                (localeID[3] == '\0' ||  | 
1154  | 0  |                 localeID[3] == '-' ||  | 
1155  | 0  |                 localeID[3] == '_' ||  | 
1156  | 0  |                 localeID[3] == '@')) { | 
1157  | 0  |         localeID += 3;  | 
1158  | 0  |     }  | 
1159  |  |  | 
1160  |  |     /* if it starts with i- or x- then copy that prefix */  | 
1161  | 0  |     if(_isIDPrefix(localeID)) { | 
1162  | 0  |         result.append((char)uprv_tolower(*localeID), status);  | 
1163  | 0  |         result.append('-', status); | 
1164  | 0  |         localeID+=2;  | 
1165  | 0  |     }  | 
1166  |  |  | 
1167  |  |     /* copy the language as far as possible and count its length */  | 
1168  | 0  |     while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) { | 
1169  | 0  |         result.append((char)uprv_tolower(*localeID), status);  | 
1170  | 0  |         localeID++;  | 
1171  | 0  |     }  | 
1172  |  | 
  | 
1173  | 0  |     if(result.length()==3) { | 
1174  |  |         /* convert 3 character code to 2 character code if possible *CWB*/  | 
1175  | 0  |         int32_t offset = _findIndex(LANGUAGES_3, result.data());  | 
1176  | 0  |         if(offset>=0) { | 
1177  | 0  |             result.clear();  | 
1178  | 0  |             result.append(LANGUAGES[offset], status);  | 
1179  | 0  |         }  | 
1180  | 0  |     }  | 
1181  |  | 
  | 
1182  | 0  |     if(pEnd!=NULL) { | 
1183  | 0  |         *pEnd=localeID;  | 
1184  | 0  |     }  | 
1185  |  | 
  | 
1186  | 0  |     return result;  | 
1187  | 0  | }  | 
1188  |  |  | 
1189  |  | CharString U_EXPORT2  | 
1190  |  | ulocimp_getScript(const char *localeID,  | 
1191  |  |                   const char **pEnd,  | 
1192  | 0  |                   UErrorCode &status) { | 
1193  | 0  |     CharString result;  | 
1194  | 0  |     int32_t idLen = 0;  | 
1195  |  | 
  | 
1196  | 0  |     if (pEnd != NULL) { | 
1197  | 0  |         *pEnd = localeID;  | 
1198  | 0  |     }  | 
1199  |  |  | 
1200  |  |     /* copy the second item as far as possible and count its length */  | 
1201  | 0  |     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])  | 
1202  | 0  |             && uprv_isASCIILetter(localeID[idLen])) { | 
1203  | 0  |         idLen++;  | 
1204  | 0  |     }  | 
1205  |  |  | 
1206  |  |     /* If it's exactly 4 characters long, then it's a script and not a country. */  | 
1207  | 0  |     if (idLen == 4) { | 
1208  | 0  |         int32_t i;  | 
1209  | 0  |         if (pEnd != NULL) { | 
1210  | 0  |             *pEnd = localeID+idLen;  | 
1211  | 0  |         }  | 
1212  | 0  |         if (idLen >= 1) { | 
1213  | 0  |             result.append((char)uprv_toupper(*(localeID++)), status);  | 
1214  | 0  |         }  | 
1215  | 0  |         for (i = 1; i < idLen; i++) { | 
1216  | 0  |             result.append((char)uprv_tolower(*(localeID++)), status);  | 
1217  | 0  |         }  | 
1218  | 0  |     }  | 
1219  |  | 
  | 
1220  | 0  |     return result;  | 
1221  | 0  | }  | 
1222  |  |  | 
1223  |  | CharString U_EXPORT2  | 
1224  |  | ulocimp_getCountry(const char *localeID,  | 
1225  |  |                    const char **pEnd,  | 
1226  | 0  |                    UErrorCode &status) { | 
1227  | 0  |     CharString result;  | 
1228  | 0  |     int32_t idLen=0;  | 
1229  |  |  | 
1230  |  |     /* copy the country as far as possible and count its length */  | 
1231  | 0  |     while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) { | 
1232  | 0  |         result.append((char)uprv_toupper(localeID[idLen]), status);  | 
1233  | 0  |         idLen++;  | 
1234  | 0  |     }  | 
1235  |  |  | 
1236  |  |     /* the country should be either length 2 or 3 */  | 
1237  | 0  |     if (idLen == 2 || idLen == 3) { | 
1238  |  |         /* convert 3 character code to 2 character code if possible *CWB*/  | 
1239  | 0  |         if(idLen==3) { | 
1240  | 0  |             int32_t offset = _findIndex(COUNTRIES_3, result.data());  | 
1241  | 0  |             if(offset>=0) { | 
1242  | 0  |                 result.clear();  | 
1243  | 0  |                 result.append(COUNTRIES[offset], status);  | 
1244  | 0  |             }  | 
1245  | 0  |         }  | 
1246  | 0  |         localeID+=idLen;  | 
1247  | 0  |     } else { | 
1248  | 0  |         result.clear();  | 
1249  | 0  |     }  | 
1250  |  | 
  | 
1251  | 0  |     if(pEnd!=NULL) { | 
1252  | 0  |         *pEnd=localeID;  | 
1253  | 0  |     }  | 
1254  |  | 
  | 
1255  | 0  |     return result;  | 
1256  | 0  | }  | 
1257  |  |  | 
1258  |  | /**  | 
1259  |  |  * @param needSeparator if true, then add leading '_' if any variants  | 
1260  |  |  * are added to 'variant'  | 
1261  |  |  */  | 
1262  |  | static void  | 
1263  |  | _getVariant(const char *localeID,  | 
1264  |  |             char prev,  | 
1265  |  |             ByteSink& sink,  | 
1266  | 0  |             UBool needSeparator) { | 
1267  | 0  |     UBool hasVariant = FALSE;  | 
1268  |  |  | 
1269  |  |     /* get one or more variant tags and separate them with '_' */  | 
1270  | 0  |     if(_isIDSeparator(prev)) { | 
1271  |  |         /* get a variant string after a '-' or '_' */  | 
1272  | 0  |         while(!_isTerminator(*localeID)) { | 
1273  | 0  |             if (needSeparator) { | 
1274  | 0  |                 sink.Append("_", 1); | 
1275  | 0  |                 needSeparator = FALSE;  | 
1276  | 0  |             }  | 
1277  | 0  |             char c = (char)uprv_toupper(*localeID);  | 
1278  | 0  |             if (c == '-') c = '_';  | 
1279  | 0  |             sink.Append(&c, 1);  | 
1280  | 0  |             hasVariant = TRUE;  | 
1281  | 0  |             localeID++;  | 
1282  | 0  |         }  | 
1283  | 0  |     }  | 
1284  |  |  | 
1285  |  |     /* if there is no variant tag after a '-' or '_' then look for '@' */  | 
1286  | 0  |     if(!hasVariant) { | 
1287  | 0  |         if(prev=='@') { | 
1288  |  |             /* keep localeID */  | 
1289  | 0  |         } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) { | 
1290  | 0  |             ++localeID; /* point after the '@' */  | 
1291  | 0  |         } else { | 
1292  | 0  |             return;  | 
1293  | 0  |         }  | 
1294  | 0  |         while(!_isTerminator(*localeID)) { | 
1295  | 0  |             if (needSeparator) { | 
1296  | 0  |                 sink.Append("_", 1); | 
1297  | 0  |                 needSeparator = FALSE;  | 
1298  | 0  |             }  | 
1299  | 0  |             char c = (char)uprv_toupper(*localeID);  | 
1300  | 0  |             if (c == '-' || c == ',') c = '_';  | 
1301  | 0  |             sink.Append(&c, 1);  | 
1302  | 0  |             localeID++;  | 
1303  | 0  |         }  | 
1304  | 0  |     }  | 
1305  | 0  | }  | 
1306  |  |  | 
1307  |  | /* Keyword enumeration */  | 
1308  |  |  | 
1309  |  | typedef struct UKeywordsContext { | 
1310  |  |     char* keywords;  | 
1311  |  |     char* current;  | 
1312  |  | } UKeywordsContext;  | 
1313  |  |  | 
1314  |  | U_CDECL_BEGIN  | 
1315  |  |  | 
1316  |  | static void U_CALLCONV  | 
1317  | 0  | uloc_kw_closeKeywords(UEnumeration *enumerator) { | 
1318  | 0  |     uprv_free(((UKeywordsContext *)enumerator->context)->keywords);  | 
1319  | 0  |     uprv_free(enumerator->context);  | 
1320  | 0  |     uprv_free(enumerator);  | 
1321  | 0  | }  | 
1322  |  |  | 
1323  |  | static int32_t U_CALLCONV  | 
1324  | 0  | uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) { | 
1325  | 0  |     char *kw = ((UKeywordsContext *)en->context)->keywords;  | 
1326  | 0  |     int32_t result = 0;  | 
1327  | 0  |     while(*kw) { | 
1328  | 0  |         result++;  | 
1329  | 0  |         kw += uprv_strlen(kw)+1;  | 
1330  | 0  |     }  | 
1331  | 0  |     return result;  | 
1332  | 0  | }  | 
1333  |  |  | 
1334  |  | static const char * U_CALLCONV  | 
1335  |  | uloc_kw_nextKeyword(UEnumeration* en,  | 
1336  |  |                     int32_t* resultLength,  | 
1337  | 0  |                     UErrorCode* /*status*/) { | 
1338  | 0  |     const char* result = ((UKeywordsContext *)en->context)->current;  | 
1339  | 0  |     int32_t len = 0;  | 
1340  | 0  |     if(*result) { | 
1341  | 0  |         len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);  | 
1342  | 0  |         ((UKeywordsContext *)en->context)->current += len+1;  | 
1343  | 0  |     } else { | 
1344  | 0  |         result = NULL;  | 
1345  | 0  |     }  | 
1346  | 0  |     if (resultLength) { | 
1347  | 0  |         *resultLength = len;  | 
1348  | 0  |     }  | 
1349  | 0  |     return result;  | 
1350  | 0  | }  | 
1351  |  |  | 
1352  |  | static void U_CALLCONV  | 
1353  |  | uloc_kw_resetKeywords(UEnumeration* en,  | 
1354  | 0  |                       UErrorCode* /*status*/) { | 
1355  | 0  |     ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;  | 
1356  | 0  | }  | 
1357  |  |  | 
1358  |  | U_CDECL_END  | 
1359  |  |  | 
1360  |  |  | 
1361  |  | static const UEnumeration gKeywordsEnum = { | 
1362  |  |     NULL,  | 
1363  |  |     NULL,  | 
1364  |  |     uloc_kw_closeKeywords,  | 
1365  |  |     uloc_kw_countKeywords,  | 
1366  |  |     uenum_unextDefault,  | 
1367  |  |     uloc_kw_nextKeyword,  | 
1368  |  |     uloc_kw_resetKeywords  | 
1369  |  | };  | 
1370  |  |  | 
1371  |  | U_CAPI UEnumeration* U_EXPORT2  | 
1372  |  | uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)  | 
1373  | 0  | { | 
1374  | 0  |     LocalMemory<UKeywordsContext> myContext;  | 
1375  | 0  |     LocalMemory<UEnumeration> result;  | 
1376  |  | 
  | 
1377  | 0  |     if (U_FAILURE(*status)) { | 
1378  | 0  |         return nullptr;  | 
1379  | 0  |     }  | 
1380  | 0  |     myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));  | 
1381  | 0  |     result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));  | 
1382  | 0  |     if (myContext.isNull() || result.isNull()) { | 
1383  | 0  |         *status = U_MEMORY_ALLOCATION_ERROR;  | 
1384  | 0  |         return nullptr;  | 
1385  | 0  |     }  | 
1386  | 0  |     uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));  | 
1387  | 0  |     myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));  | 
1388  | 0  |     if (myContext->keywords == nullptr) { | 
1389  | 0  |         *status = U_MEMORY_ALLOCATION_ERROR;  | 
1390  | 0  |         return nullptr;  | 
1391  | 0  |     }  | 
1392  | 0  |     uprv_memcpy(myContext->keywords, keywordList, keywordListSize);  | 
1393  | 0  |     myContext->keywords[keywordListSize] = 0;  | 
1394  | 0  |     myContext->current = myContext->keywords;  | 
1395  | 0  |     result->context = myContext.orphan();  | 
1396  | 0  |     return result.orphan();  | 
1397  | 0  | }  | 
1398  |  |  | 
1399  |  | U_CAPI UEnumeration* U_EXPORT2  | 
1400  |  | uloc_openKeywords(const char* localeID,  | 
1401  |  |                         UErrorCode* status)  | 
1402  | 0  | { | 
1403  | 0  |     char tempBuffer[ULOC_FULLNAME_CAPACITY];  | 
1404  | 0  |     const char* tmpLocaleID;  | 
1405  |  | 
  | 
1406  | 0  |     if(status==NULL || U_FAILURE(*status)) { | 
1407  | 0  |         return 0;  | 
1408  | 0  |     }  | 
1409  |  |  | 
1410  | 0  |     if (_hasBCP47Extension(localeID)) { | 
1411  | 0  |         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);  | 
1412  | 0  |     } else { | 
1413  | 0  |         if (localeID==NULL) { | 
1414  | 0  |            localeID=uloc_getDefault();  | 
1415  | 0  |         }  | 
1416  | 0  |         tmpLocaleID=localeID;  | 
1417  | 0  |     }  | 
1418  |  |  | 
1419  |  |     /* Skip the language */  | 
1420  | 0  |     ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);  | 
1421  | 0  |     if (U_FAILURE(*status)) { | 
1422  | 0  |         return 0;  | 
1423  | 0  |     }  | 
1424  |  |  | 
1425  | 0  |     if(_isIDSeparator(*tmpLocaleID)) { | 
1426  | 0  |         const char *scriptID;  | 
1427  |  |         /* Skip the script if available */  | 
1428  | 0  |         ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);  | 
1429  | 0  |         if (U_FAILURE(*status)) { | 
1430  | 0  |             return 0;  | 
1431  | 0  |         }  | 
1432  | 0  |         if(scriptID != tmpLocaleID+1) { | 
1433  |  |             /* Found optional script */  | 
1434  | 0  |             tmpLocaleID = scriptID;  | 
1435  | 0  |         }  | 
1436  |  |         /* Skip the Country */  | 
1437  | 0  |         if (_isIDSeparator(*tmpLocaleID)) { | 
1438  | 0  |             ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);  | 
1439  | 0  |             if (U_FAILURE(*status)) { | 
1440  | 0  |                 return 0;  | 
1441  | 0  |             }  | 
1442  | 0  |         }  | 
1443  | 0  |     }  | 
1444  |  |  | 
1445  |  |     /* keywords are located after '@' */  | 
1446  | 0  |     if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) { | 
1447  | 0  |         CharString keywords;  | 
1448  | 0  |         CharStringByteSink sink(&keywords);  | 
1449  | 0  |         ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status);  | 
1450  | 0  |         if (U_FAILURE(*status)) { | 
1451  | 0  |             return NULL;  | 
1452  | 0  |         }  | 
1453  | 0  |         return uloc_openKeywordList(keywords.data(), keywords.length(), status);  | 
1454  | 0  |     }  | 
1455  | 0  |     return NULL;  | 
1456  | 0  | }  | 
1457  |  |  | 
1458  |  |  | 
1459  |  | /* bit-flags for 'options' parameter of _canonicalize */  | 
1460  | 0  | #define _ULOC_STRIP_KEYWORDS 0x2  | 
1461  | 0  | #define _ULOC_CANONICALIZE   0x1  | 
1462  |  |  | 
1463  | 0  | #define OPTION_SET(options, mask) ((options & mask) != 0)  | 
1464  |  |  | 
1465  |  | static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'}; | 
1466  | 0  | #define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)  | 
1467  |  |  | 
1468  |  | /**  | 
1469  |  |  * Canonicalize the given localeID, to level 1 or to level 2,  | 
1470  |  |  * depending on the options.  To specify level 1, pass in options=0.  | 
1471  |  |  * To specify level 2, pass in options=_ULOC_CANONICALIZE.  | 
1472  |  |  *  | 
1473  |  |  * This is the code underlying uloc_getName and uloc_canonicalize.  | 
1474  |  |  */  | 
1475  |  | static void  | 
1476  |  | _canonicalize(const char* localeID,  | 
1477  |  |               ByteSink& sink,  | 
1478  |  |               uint32_t options,  | 
1479  | 0  |               UErrorCode* err) { | 
1480  | 0  |     int32_t j, fieldCount=0, scriptSize=0, variantSize=0;  | 
1481  | 0  |     PreflightingLocaleIDBuffer tempBuffer;  | 
1482  | 0  |     const char* origLocaleID;  | 
1483  | 0  |     const char* tmpLocaleID;  | 
1484  | 0  |     const char* keywordAssign = NULL;  | 
1485  | 0  |     const char* separatorIndicator = NULL;  | 
1486  |  | 
  | 
1487  | 0  |     if (U_FAILURE(*err)) { | 
1488  | 0  |         return;  | 
1489  | 0  |     }  | 
1490  |  |  | 
1491  | 0  |     if (_hasBCP47Extension(localeID)) { | 
1492  | 0  |         do { | 
1493  | 0  |             tempBuffer.requestedCapacity = _ConvertBCP47(tmpLocaleID, localeID,  | 
1494  | 0  |                 tempBuffer.getBuffer(), tempBuffer.getCapacity(), err);  | 
1495  | 0  |         } while (tempBuffer.needToTryAgain(err));  | 
1496  | 0  |     } else { | 
1497  | 0  |         if (localeID==NULL) { | 
1498  | 0  |            localeID=uloc_getDefault();  | 
1499  | 0  |         }  | 
1500  | 0  |         tmpLocaleID=localeID;  | 
1501  | 0  |     }  | 
1502  |  | 
  | 
1503  | 0  |     origLocaleID=tmpLocaleID;  | 
1504  |  |  | 
1505  |  |     /* get all pieces, one after another, and separate with '_' */  | 
1506  | 0  |     CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);  | 
1507  |  | 
  | 
1508  | 0  |     if (tag.length() == I_DEFAULT_LENGTH &&  | 
1509  | 0  |             uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) { | 
1510  | 0  |         tag.clear();  | 
1511  | 0  |         tag.append(uloc_getDefault(), *err);  | 
1512  | 0  |     } else if(_isIDSeparator(*tmpLocaleID)) { | 
1513  | 0  |         const char *scriptID;  | 
1514  |  | 
  | 
1515  | 0  |         ++fieldCount;  | 
1516  | 0  |         tag.append('_', *err); | 
1517  |  | 
  | 
1518  | 0  |         CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);  | 
1519  | 0  |         tag.append(script, *err);  | 
1520  | 0  |         scriptSize = script.length();  | 
1521  | 0  |         if(scriptSize > 0) { | 
1522  |  |             /* Found optional script */  | 
1523  | 0  |             tmpLocaleID = scriptID;  | 
1524  | 0  |             ++fieldCount;  | 
1525  | 0  |             if (_isIDSeparator(*tmpLocaleID)) { | 
1526  |  |                 /* If there is something else, then we add the _ */  | 
1527  | 0  |                 tag.append('_', *err); | 
1528  | 0  |             }  | 
1529  | 0  |         }  | 
1530  |  | 
  | 
1531  | 0  |         if (_isIDSeparator(*tmpLocaleID)) { | 
1532  | 0  |             const char *cntryID;  | 
1533  |  | 
  | 
1534  | 0  |             CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);  | 
1535  | 0  |             tag.append(country, *err);  | 
1536  | 0  |             if (!country.isEmpty()) { | 
1537  |  |                 /* Found optional country */  | 
1538  | 0  |                 tmpLocaleID = cntryID;  | 
1539  | 0  |             }  | 
1540  | 0  |             if(_isIDSeparator(*tmpLocaleID)) { | 
1541  |  |                 /* If there is something else, then we add the _  if we found country before. */  | 
1542  | 0  |                 if (!_isIDSeparator(*(tmpLocaleID+1))) { | 
1543  | 0  |                     ++fieldCount;  | 
1544  | 0  |                     tag.append('_', *err); | 
1545  | 0  |                 }  | 
1546  |  | 
  | 
1547  | 0  |                 variantSize = -tag.length();  | 
1548  | 0  |                 { | 
1549  | 0  |                     CharStringByteSink s(&tag);  | 
1550  | 0  |                     _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE);  | 
1551  | 0  |                 }  | 
1552  | 0  |                 variantSize += tag.length();  | 
1553  | 0  |                 if (variantSize > 0) { | 
1554  | 0  |                     tmpLocaleID += variantSize + 1; /* skip '_' and variant */  | 
1555  | 0  |                 }  | 
1556  | 0  |             }  | 
1557  | 0  |         }  | 
1558  | 0  |     }  | 
1559  |  |  | 
1560  |  |     /* Copy POSIX-style charset specifier, if any [mr.utf8] */  | 
1561  | 0  |     if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') { | 
1562  | 0  |         UBool done = FALSE;  | 
1563  | 0  |         do { | 
1564  | 0  |             char c = *tmpLocaleID;  | 
1565  | 0  |             switch (c) { | 
1566  | 0  |             case 0:  | 
1567  | 0  |             case '@':  | 
1568  | 0  |                 done = TRUE;  | 
1569  | 0  |                 break;  | 
1570  | 0  |             default:  | 
1571  | 0  |                 tag.append(c, *err);  | 
1572  | 0  |                 ++tmpLocaleID;  | 
1573  | 0  |                 break;  | 
1574  | 0  |             }  | 
1575  | 0  |         } while (!done);  | 
1576  | 0  |     }  | 
1577  |  |  | 
1578  |  |     /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'  | 
1579  |  |        After this, tmpLocaleID either points to '@' or is NULL */  | 
1580  | 0  |     if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) { | 
1581  | 0  |         keywordAssign = uprv_strchr(tmpLocaleID, '=');  | 
1582  | 0  |         separatorIndicator = uprv_strchr(tmpLocaleID, ';');  | 
1583  | 0  |     }  | 
1584  |  |  | 
1585  |  |     /* Copy POSIX-style variant, if any [mr@FOO] */  | 
1586  | 0  |     if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&  | 
1587  | 0  |         tmpLocaleID != NULL && keywordAssign == NULL) { | 
1588  | 0  |         for (;;) { | 
1589  | 0  |             char c = *tmpLocaleID;  | 
1590  | 0  |             if (c == 0) { | 
1591  | 0  |                 break;  | 
1592  | 0  |             }  | 
1593  | 0  |             tag.append(c, *err);  | 
1594  | 0  |             ++tmpLocaleID;  | 
1595  | 0  |         }  | 
1596  | 0  |     }  | 
1597  |  | 
  | 
1598  | 0  |     if (OPTION_SET(options, _ULOC_CANONICALIZE)) { | 
1599  |  |         /* Handle @FOO variant if @ is present and not followed by = */  | 
1600  | 0  |         if (tmpLocaleID!=NULL && keywordAssign==NULL) { | 
1601  |  |             /* Add missing '_' if needed */  | 
1602  | 0  |             if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) { | 
1603  | 0  |                 do { | 
1604  | 0  |                     tag.append('_', *err); | 
1605  | 0  |                     ++fieldCount;  | 
1606  | 0  |                 } while(fieldCount<2);  | 
1607  | 0  |             }  | 
1608  |  | 
  | 
1609  | 0  |             int32_t posixVariantSize = -tag.length();  | 
1610  | 0  |             { | 
1611  | 0  |                 CharStringByteSink s(&tag);  | 
1612  | 0  |                 _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));  | 
1613  | 0  |             }  | 
1614  | 0  |             posixVariantSize += tag.length();  | 
1615  | 0  |             if (posixVariantSize > 0) { | 
1616  | 0  |                 variantSize += posixVariantSize;  | 
1617  | 0  |             }  | 
1618  | 0  |         }  | 
1619  |  |  | 
1620  |  |         /* Look up the ID in the canonicalization map */  | 
1621  | 0  |         for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) { | 
1622  | 0  |             StringPiece id(CANONICALIZE_MAP[j].id);  | 
1623  | 0  |             if (tag == id) { | 
1624  | 0  |                 if (id.empty() && tmpLocaleID != NULL) { | 
1625  | 0  |                     break; /* Don't remap "" if keywords present */  | 
1626  | 0  |                 }  | 
1627  | 0  |                 tag.clear();  | 
1628  | 0  |                 tag.append(CANONICALIZE_MAP[j].canonicalID, *err);  | 
1629  | 0  |                 break;  | 
1630  | 0  |             }  | 
1631  | 0  |         }  | 
1632  | 0  |     }  | 
1633  |  | 
  | 
1634  | 0  |     sink.Append(tag.data(), tag.length());  | 
1635  |  | 
  | 
1636  | 0  |     if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) { | 
1637  | 0  |         if (tmpLocaleID!=NULL && keywordAssign!=NULL &&  | 
1638  | 0  |             (!separatorIndicator || separatorIndicator > keywordAssign)) { | 
1639  | 0  |             sink.Append("@", 1); | 
1640  | 0  |             ++fieldCount;  | 
1641  | 0  |             ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);  | 
1642  | 0  |         }  | 
1643  | 0  |     }  | 
1644  | 0  | }  | 
1645  |  |  | 
1646  |  | /* ### ID parsing API **************************************************/  | 
1647  |  |  | 
1648  |  | U_CAPI int32_t  U_EXPORT2  | 
1649  |  | uloc_getParent(const char*    localeID,  | 
1650  |  |                char* parent,  | 
1651  |  |                int32_t parentCapacity,  | 
1652  |  |                UErrorCode* err)  | 
1653  | 0  | { | 
1654  | 0  |     const char *lastUnderscore;  | 
1655  | 0  |     int32_t i;  | 
1656  |  | 
  | 
1657  | 0  |     if (U_FAILURE(*err))  | 
1658  | 0  |         return 0;  | 
1659  |  |  | 
1660  | 0  |     if (localeID == NULL)  | 
1661  | 0  |         localeID = uloc_getDefault();  | 
1662  |  | 
  | 
1663  | 0  |     lastUnderscore=uprv_strrchr(localeID, '_');  | 
1664  | 0  |     if(lastUnderscore!=NULL) { | 
1665  | 0  |         i=(int32_t)(lastUnderscore-localeID);  | 
1666  | 0  |     } else { | 
1667  | 0  |         i=0;  | 
1668  | 0  |     }  | 
1669  |  | 
  | 
1670  | 0  |     if (i > 0) { | 
1671  | 0  |         if (uprv_strnicmp(localeID, "und_", 4) == 0) { | 
1672  | 0  |             localeID += 3;  | 
1673  | 0  |             i -= 3;  | 
1674  | 0  |             uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));  | 
1675  | 0  |         } else if (parent != localeID) { | 
1676  | 0  |             uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));  | 
1677  | 0  |         }  | 
1678  | 0  |     }  | 
1679  |  | 
  | 
1680  | 0  |     return u_terminateChars(parent, parentCapacity, i, err);  | 
1681  | 0  | }  | 
1682  |  |  | 
1683  |  | U_CAPI int32_t U_EXPORT2  | 
1684  |  | uloc_getLanguage(const char*    localeID,  | 
1685  |  |          char* language,  | 
1686  |  |          int32_t languageCapacity,  | 
1687  |  |          UErrorCode* err)  | 
1688  | 0  | { | 
1689  |  |     /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/  | 
1690  |  | 
  | 
1691  | 0  |     if (err==NULL || U_FAILURE(*err)) { | 
1692  | 0  |         return 0;  | 
1693  | 0  |     }  | 
1694  |  |  | 
1695  | 0  |     if(localeID==NULL) { | 
1696  | 0  |         localeID=uloc_getDefault();  | 
1697  | 0  |     }  | 
1698  |  | 
  | 
1699  | 0  |     return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);  | 
1700  | 0  | }  | 
1701  |  |  | 
1702  |  | U_CAPI int32_t U_EXPORT2  | 
1703  |  | uloc_getScript(const char*    localeID,  | 
1704  |  |          char* script,  | 
1705  |  |          int32_t scriptCapacity,  | 
1706  |  |          UErrorCode* err)  | 
1707  | 0  | { | 
1708  | 0  |     if(err==NULL || U_FAILURE(*err)) { | 
1709  | 0  |         return 0;  | 
1710  | 0  |     }  | 
1711  |  |  | 
1712  | 0  |     if(localeID==NULL) { | 
1713  | 0  |         localeID=uloc_getDefault();  | 
1714  | 0  |     }  | 
1715  |  |  | 
1716  |  |     /* skip the language */  | 
1717  | 0  |     ulocimp_getLanguage(localeID, &localeID, *err);  | 
1718  | 0  |     if (U_FAILURE(*err)) { | 
1719  | 0  |         return 0;  | 
1720  | 0  |     }  | 
1721  |  |  | 
1722  | 0  |     if(_isIDSeparator(*localeID)) { | 
1723  | 0  |         return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);  | 
1724  | 0  |     }  | 
1725  | 0  |     return u_terminateChars(script, scriptCapacity, 0, err);  | 
1726  | 0  | }  | 
1727  |  |  | 
1728  |  | U_CAPI int32_t  U_EXPORT2  | 
1729  |  | uloc_getCountry(const char* localeID,  | 
1730  |  |             char* country,  | 
1731  |  |             int32_t countryCapacity,  | 
1732  |  |             UErrorCode* err)  | 
1733  | 0  | { | 
1734  | 0  |     if(err==NULL || U_FAILURE(*err)) { | 
1735  | 0  |         return 0;  | 
1736  | 0  |     }  | 
1737  |  |  | 
1738  | 0  |     if(localeID==NULL) { | 
1739  | 0  |         localeID=uloc_getDefault();  | 
1740  | 0  |     }  | 
1741  |  |  | 
1742  |  |     /* Skip the language */  | 
1743  | 0  |     ulocimp_getLanguage(localeID, &localeID, *err);  | 
1744  | 0  |     if (U_FAILURE(*err)) { | 
1745  | 0  |         return 0;  | 
1746  | 0  |     }  | 
1747  |  |  | 
1748  | 0  |     if(_isIDSeparator(*localeID)) { | 
1749  | 0  |         const char *scriptID;  | 
1750  |  |         /* Skip the script if available */  | 
1751  | 0  |         ulocimp_getScript(localeID+1, &scriptID, *err);  | 
1752  | 0  |         if (U_FAILURE(*err)) { | 
1753  | 0  |             return 0;  | 
1754  | 0  |         }  | 
1755  | 0  |         if(scriptID != localeID+1) { | 
1756  |  |             /* Found optional script */  | 
1757  | 0  |             localeID = scriptID;  | 
1758  | 0  |         }  | 
1759  | 0  |         if(_isIDSeparator(*localeID)) { | 
1760  | 0  |             return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);  | 
1761  | 0  |         }  | 
1762  | 0  |     }  | 
1763  | 0  |     return u_terminateChars(country, countryCapacity, 0, err);  | 
1764  | 0  | }  | 
1765  |  |  | 
1766  |  | U_CAPI int32_t  U_EXPORT2  | 
1767  |  | uloc_getVariant(const char* localeID,  | 
1768  |  |                 char* variant,  | 
1769  |  |                 int32_t variantCapacity,  | 
1770  |  |                 UErrorCode* err)  | 
1771  | 0  | { | 
1772  | 0  |     char tempBuffer[ULOC_FULLNAME_CAPACITY];  | 
1773  | 0  |     const char* tmpLocaleID;  | 
1774  | 0  |     int32_t i=0;  | 
1775  |  | 
  | 
1776  | 0  |     if(err==NULL || U_FAILURE(*err)) { | 
1777  | 0  |         return 0;  | 
1778  | 0  |     }  | 
1779  |  |  | 
1780  | 0  |     if (_hasBCP47Extension(localeID)) { | 
1781  | 0  |         _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);  | 
1782  | 0  |     } else { | 
1783  | 0  |         if (localeID==NULL) { | 
1784  | 0  |            localeID=uloc_getDefault();  | 
1785  | 0  |         }  | 
1786  | 0  |         tmpLocaleID=localeID;  | 
1787  | 0  |     }  | 
1788  |  |  | 
1789  |  |     /* Skip the language */  | 
1790  | 0  |     ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);  | 
1791  | 0  |     if (U_FAILURE(*err)) { | 
1792  | 0  |         return 0;  | 
1793  | 0  |     }  | 
1794  |  |  | 
1795  | 0  |     if(_isIDSeparator(*tmpLocaleID)) { | 
1796  | 0  |         const char *scriptID;  | 
1797  |  |         /* Skip the script if available */  | 
1798  | 0  |         ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);  | 
1799  | 0  |         if (U_FAILURE(*err)) { | 
1800  | 0  |             return 0;  | 
1801  | 0  |         }  | 
1802  | 0  |         if(scriptID != tmpLocaleID+1) { | 
1803  |  |             /* Found optional script */  | 
1804  | 0  |             tmpLocaleID = scriptID;  | 
1805  | 0  |         }  | 
1806  |  |         /* Skip the Country */  | 
1807  | 0  |         if (_isIDSeparator(*tmpLocaleID)) { | 
1808  | 0  |             const char *cntryID;  | 
1809  | 0  |             ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);  | 
1810  | 0  |             if (U_FAILURE(*err)) { | 
1811  | 0  |                 return 0;  | 
1812  | 0  |             }  | 
1813  | 0  |             if (cntryID != tmpLocaleID+1) { | 
1814  |  |                 /* Found optional country */  | 
1815  | 0  |                 tmpLocaleID = cntryID;  | 
1816  | 0  |             }  | 
1817  | 0  |             if(_isIDSeparator(*tmpLocaleID)) { | 
1818  |  |                 /* If there was no country ID, skip a possible extra IDSeparator */  | 
1819  | 0  |                 if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) { | 
1820  | 0  |                     tmpLocaleID++;  | 
1821  | 0  |                 }  | 
1822  |  | 
  | 
1823  | 0  |                 CheckedArrayByteSink sink(variant, variantCapacity);  | 
1824  | 0  |                 _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE);  | 
1825  |  | 
  | 
1826  | 0  |                 i = sink.NumberOfBytesAppended();  | 
1827  |  | 
  | 
1828  | 0  |                 if (U_FAILURE(*err)) { | 
1829  | 0  |                     return i;  | 
1830  | 0  |                 }  | 
1831  |  |  | 
1832  | 0  |                 if (sink.Overflowed()) { | 
1833  | 0  |                     *err = U_BUFFER_OVERFLOW_ERROR;  | 
1834  | 0  |                     return i;  | 
1835  | 0  |                 }  | 
1836  | 0  |             }  | 
1837  | 0  |         }  | 
1838  | 0  |     }  | 
1839  |  |  | 
1840  | 0  |     return u_terminateChars(variant, variantCapacity, i, err);  | 
1841  | 0  | }  | 
1842  |  |  | 
1843  |  | U_CAPI int32_t  U_EXPORT2  | 
1844  |  | uloc_getName(const char* localeID,  | 
1845  |  |              char* name,  | 
1846  |  |              int32_t nameCapacity,  | 
1847  |  |              UErrorCode* err)  | 
1848  | 0  | { | 
1849  | 0  |     if (U_FAILURE(*err)) { | 
1850  | 0  |         return 0;  | 
1851  | 0  |     }  | 
1852  |  |  | 
1853  | 0  |     CheckedArrayByteSink sink(name, nameCapacity);  | 
1854  | 0  |     ulocimp_getName(localeID, sink, err);  | 
1855  |  | 
  | 
1856  | 0  |     int32_t reslen = sink.NumberOfBytesAppended();  | 
1857  |  | 
  | 
1858  | 0  |     if (U_FAILURE(*err)) { | 
1859  | 0  |         return reslen;  | 
1860  | 0  |     }  | 
1861  |  |  | 
1862  | 0  |     if (sink.Overflowed()) { | 
1863  | 0  |         *err = U_BUFFER_OVERFLOW_ERROR;  | 
1864  | 0  |     } else { | 
1865  | 0  |         u_terminateChars(name, nameCapacity, reslen, err);  | 
1866  | 0  |     }  | 
1867  |  | 
  | 
1868  | 0  |     return reslen;  | 
1869  | 0  | }  | 
1870  |  |  | 
1871  |  | U_CAPI void U_EXPORT2  | 
1872  |  | ulocimp_getName(const char* localeID,  | 
1873  |  |                 ByteSink& sink,  | 
1874  |  |                 UErrorCode* err)  | 
1875  | 0  | { | 
1876  | 0  |     _canonicalize(localeID, sink, 0, err);  | 
1877  | 0  | }  | 
1878  |  |  | 
1879  |  | U_CAPI int32_t  U_EXPORT2  | 
1880  |  | uloc_getBaseName(const char* localeID,  | 
1881  |  |                  char* name,  | 
1882  |  |                  int32_t nameCapacity,  | 
1883  |  |                  UErrorCode* err)  | 
1884  | 0  | { | 
1885  | 0  |     if (U_FAILURE(*err)) { | 
1886  | 0  |         return 0;  | 
1887  | 0  |     }  | 
1888  |  |  | 
1889  | 0  |     CheckedArrayByteSink sink(name, nameCapacity);  | 
1890  | 0  |     ulocimp_getBaseName(localeID, sink, err);  | 
1891  |  | 
  | 
1892  | 0  |     int32_t reslen = sink.NumberOfBytesAppended();  | 
1893  |  | 
  | 
1894  | 0  |     if (U_FAILURE(*err)) { | 
1895  | 0  |         return reslen;  | 
1896  | 0  |     }  | 
1897  |  |  | 
1898  | 0  |     if (sink.Overflowed()) { | 
1899  | 0  |         *err = U_BUFFER_OVERFLOW_ERROR;  | 
1900  | 0  |     } else { | 
1901  | 0  |         u_terminateChars(name, nameCapacity, reslen, err);  | 
1902  | 0  |     }  | 
1903  |  | 
  | 
1904  | 0  |     return reslen;  | 
1905  | 0  | }  | 
1906  |  |  | 
1907  |  | U_CAPI void U_EXPORT2  | 
1908  |  | ulocimp_getBaseName(const char* localeID,  | 
1909  |  |                     ByteSink& sink,  | 
1910  |  |                     UErrorCode* err)  | 
1911  | 0  | { | 
1912  | 0  |     _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);  | 
1913  | 0  | }  | 
1914  |  |  | 
1915  |  | U_CAPI int32_t  U_EXPORT2  | 
1916  |  | uloc_canonicalize(const char* localeID,  | 
1917  |  |                   char* name,  | 
1918  |  |                   int32_t nameCapacity,  | 
1919  |  |                   UErrorCode* err)  | 
1920  | 0  | { | 
1921  | 0  |     if (U_FAILURE(*err)) { | 
1922  | 0  |         return 0;  | 
1923  | 0  |     }  | 
1924  |  |  | 
1925  | 0  |     CheckedArrayByteSink sink(name, nameCapacity);  | 
1926  | 0  |     ulocimp_canonicalize(localeID, sink, err);  | 
1927  |  | 
  | 
1928  | 0  |     int32_t reslen = sink.NumberOfBytesAppended();  | 
1929  |  | 
  | 
1930  | 0  |     if (U_FAILURE(*err)) { | 
1931  | 0  |         return reslen;  | 
1932  | 0  |     }  | 
1933  |  |  | 
1934  | 0  |     if (sink.Overflowed()) { | 
1935  | 0  |         *err = U_BUFFER_OVERFLOW_ERROR;  | 
1936  | 0  |     } else { | 
1937  | 0  |         u_terminateChars(name, nameCapacity, reslen, err);  | 
1938  | 0  |     }  | 
1939  |  | 
  | 
1940  | 0  |     return reslen;  | 
1941  | 0  | }  | 
1942  |  |  | 
1943  |  | U_CAPI void U_EXPORT2  | 
1944  |  | ulocimp_canonicalize(const char* localeID,  | 
1945  |  |                      ByteSink& sink,  | 
1946  |  |                      UErrorCode* err)  | 
1947  | 0  | { | 
1948  | 0  |     _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);  | 
1949  | 0  | }  | 
1950  |  |  | 
1951  |  | U_CAPI const char*  U_EXPORT2  | 
1952  |  | uloc_getISO3Language(const char* localeID)  | 
1953  | 0  | { | 
1954  | 0  |     int16_t offset;  | 
1955  | 0  |     char lang[ULOC_LANG_CAPACITY];  | 
1956  | 0  |     UErrorCode err = U_ZERO_ERROR;  | 
1957  |  | 
  | 
1958  | 0  |     if (localeID == NULL)  | 
1959  | 0  |     { | 
1960  | 0  |         localeID = uloc_getDefault();  | 
1961  | 0  |     }  | 
1962  | 0  |     uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);  | 
1963  | 0  |     if (U_FAILURE(err))  | 
1964  | 0  |         return "";  | 
1965  | 0  |     offset = _findIndex(LANGUAGES, lang);  | 
1966  | 0  |     if (offset < 0)  | 
1967  | 0  |         return "";  | 
1968  | 0  |     return LANGUAGES_3[offset];  | 
1969  | 0  | }  | 
1970  |  |  | 
1971  |  | U_CAPI const char*  U_EXPORT2  | 
1972  |  | uloc_getISO3Country(const char* localeID)  | 
1973  | 0  | { | 
1974  | 0  |     int16_t offset;  | 
1975  | 0  |     char cntry[ULOC_LANG_CAPACITY];  | 
1976  | 0  |     UErrorCode err = U_ZERO_ERROR;  | 
1977  |  | 
  | 
1978  | 0  |     if (localeID == NULL)  | 
1979  | 0  |     { | 
1980  | 0  |         localeID = uloc_getDefault();  | 
1981  | 0  |     }  | 
1982  | 0  |     uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);  | 
1983  | 0  |     if (U_FAILURE(err))  | 
1984  | 0  |         return "";  | 
1985  | 0  |     offset = _findIndex(COUNTRIES, cntry);  | 
1986  | 0  |     if (offset < 0)  | 
1987  | 0  |         return "";  | 
1988  |  |  | 
1989  | 0  |     return COUNTRIES_3[offset];  | 
1990  | 0  | }  | 
1991  |  |  | 
1992  |  | U_CAPI uint32_t  U_EXPORT2  | 
1993  |  | uloc_getLCID(const char* localeID)  | 
1994  | 0  | { | 
1995  | 0  |     UErrorCode status = U_ZERO_ERROR;  | 
1996  | 0  |     char       langID[ULOC_FULLNAME_CAPACITY];  | 
1997  | 0  |     uint32_t   lcid = 0;  | 
1998  |  |  | 
1999  |  |     /* Check for incomplete id. */  | 
2000  | 0  |     if (!localeID || uprv_strlen(localeID) < 2) { | 
2001  | 0  |         return 0;  | 
2002  | 0  |     }  | 
2003  |  |  | 
2004  |  |     // First, attempt Windows platform lookup if available, but fall  | 
2005  |  |     // through to catch any special cases (ICU vs Windows name differences).  | 
2006  | 0  |     lcid = uprv_convertToLCIDPlatform(localeID, &status);  | 
2007  | 0  |     if (U_FAILURE(status)) { | 
2008  | 0  |         return 0;  | 
2009  | 0  |     }  | 
2010  | 0  |     if (lcid > 0) { | 
2011  |  |         // Windows found an LCID, return that  | 
2012  | 0  |         return lcid;  | 
2013  | 0  |     }  | 
2014  |  |  | 
2015  | 0  |     uloc_getLanguage(localeID, langID, sizeof(langID), &status);  | 
2016  | 0  |     if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) { | 
2017  | 0  |         return 0;  | 
2018  | 0  |     }  | 
2019  |  |  | 
2020  | 0  |     if (uprv_strchr(localeID, '@')) { | 
2021  |  |         // uprv_convertToLCID does not support keywords other than collation.  | 
2022  |  |         // Remove all keywords except collation.  | 
2023  | 0  |         int32_t len;  | 
2024  | 0  |         char tmpLocaleID[ULOC_FULLNAME_CAPACITY];  | 
2025  |  | 
  | 
2026  | 0  |         CharString collVal;  | 
2027  | 0  |         { | 
2028  | 0  |             CharStringByteSink sink(&collVal);  | 
2029  | 0  |             ulocimp_getKeywordValue(localeID, "collation", sink, &status);  | 
2030  | 0  |         }  | 
2031  |  | 
  | 
2032  | 0  |         if (U_SUCCESS(status) && !collVal.isEmpty()) { | 
2033  | 0  |             len = uloc_getBaseName(localeID, tmpLocaleID,  | 
2034  | 0  |                 UPRV_LENGTHOF(tmpLocaleID) - 1, &status);  | 
2035  |  | 
  | 
2036  | 0  |             if (U_SUCCESS(status) && len > 0) { | 
2037  | 0  |                 tmpLocaleID[len] = 0;  | 
2038  |  | 
  | 
2039  | 0  |                 len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID, | 
2040  | 0  |                     UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);  | 
2041  |  | 
  | 
2042  | 0  |                 if (U_SUCCESS(status) && len > 0) { | 
2043  | 0  |                     tmpLocaleID[len] = 0;  | 
2044  | 0  |                     return uprv_convertToLCID(langID, tmpLocaleID, &status);  | 
2045  | 0  |                 }  | 
2046  | 0  |             }  | 
2047  | 0  |         }  | 
2048  |  |  | 
2049  |  |         // fall through - all keywords are simply ignored  | 
2050  | 0  |         status = U_ZERO_ERROR;  | 
2051  | 0  |     }  | 
2052  |  |  | 
2053  | 0  |     return uprv_convertToLCID(langID, localeID, &status);  | 
2054  | 0  | }  | 
2055  |  |  | 
2056  |  | U_CAPI int32_t U_EXPORT2  | 
2057  |  | uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,  | 
2058  |  |                 UErrorCode *status)  | 
2059  | 0  | { | 
2060  | 0  |     return uprv_convertToPosix(hostid, locale, localeCapacity, status);  | 
2061  | 0  | }  | 
2062  |  |  | 
2063  |  | /* ### Default locale **************************************************/  | 
2064  |  |  | 
2065  |  | U_CAPI const char*  U_EXPORT2  | 
2066  |  | uloc_getDefault()  | 
2067  | 0  | { | 
2068  | 0  |     return locale_get_default();  | 
2069  | 0  | }  | 
2070  |  |  | 
2071  |  | U_CAPI void  U_EXPORT2  | 
2072  |  | uloc_setDefault(const char*   newDefaultLocale,  | 
2073  |  |              UErrorCode* err)  | 
2074  | 0  | { | 
2075  | 0  |     if (U_FAILURE(*err))  | 
2076  | 0  |         return;  | 
2077  |  |     /* the error code isn't currently used for anything by this function*/  | 
2078  |  |  | 
2079  |  |     /* propagate change to C++ */  | 
2080  | 0  |     locale_set_default(newDefaultLocale);  | 
2081  | 0  | }  | 
2082  |  |  | 
2083  |  | /**  | 
2084  |  |  * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer  | 
2085  |  |  * to an array of pointers to arrays of char.  All of these pointers are owned  | 
2086  |  |  * by ICU-- do not delete them, and do not write through them.  The array is  | 
2087  |  |  * terminated with a null pointer.  | 
2088  |  |  */  | 
2089  |  | U_CAPI const char* const*  U_EXPORT2  | 
2090  |  | uloc_getISOLanguages()  | 
2091  | 0  | { | 
2092  | 0  |     return LANGUAGES;  | 
2093  | 0  | }  | 
2094  |  |  | 
2095  |  | /**  | 
2096  |  |  * Returns a list of all 2-letter country codes defined in ISO 639.  This is a  | 
2097  |  |  * pointer to an array of pointers to arrays of char.  All of these pointers are  | 
2098  |  |  * owned by ICU-- do not delete them, and do not write through them.  The array is  | 
2099  |  |  * terminated with a null pointer.  | 
2100  |  |  */  | 
2101  |  | U_CAPI const char* const*  U_EXPORT2  | 
2102  |  | uloc_getISOCountries()  | 
2103  | 0  | { | 
2104  | 0  |     return COUNTRIES;  | 
2105  | 0  | }  | 
2106  |  |  | 
2107  |  | U_CAPI const char* U_EXPORT2  | 
2108  |  | uloc_toUnicodeLocaleKey(const char* keyword)  | 
2109  | 0  | { | 
2110  | 0  |     const char* bcpKey = ulocimp_toBcpKey(keyword);  | 
2111  | 0  |     if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) { | 
2112  |  |         // unknown keyword, but syntax is fine..  | 
2113  | 0  |         return keyword;  | 
2114  | 0  |     }  | 
2115  | 0  |     return bcpKey;  | 
2116  | 0  | }  | 
2117  |  |  | 
2118  |  | U_CAPI const char* U_EXPORT2  | 
2119  |  | uloc_toUnicodeLocaleType(const char* keyword, const char* value)  | 
2120  | 0  | { | 
2121  | 0  |     const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);  | 
2122  | 0  |     if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) { | 
2123  |  |         // unknown keyword, but syntax is fine..  | 
2124  | 0  |         return value;  | 
2125  | 0  |     }  | 
2126  | 0  |     return bcpType;  | 
2127  | 0  | }  | 
2128  |  |  | 
2129  |  | static UBool  | 
2130  |  | isWellFormedLegacyKey(const char* legacyKey)  | 
2131  | 0  | { | 
2132  | 0  |     const char* p = legacyKey;  | 
2133  | 0  |     while (*p) { | 
2134  | 0  |         if (!UPRV_ISALPHANUM(*p)) { | 
2135  | 0  |             return FALSE;  | 
2136  | 0  |         }  | 
2137  | 0  |         p++;  | 
2138  | 0  |     }  | 
2139  | 0  |     return TRUE;  | 
2140  | 0  | }  | 
2141  |  |  | 
2142  |  | static UBool  | 
2143  |  | isWellFormedLegacyType(const char* legacyType)  | 
2144  | 0  | { | 
2145  | 0  |     const char* p = legacyType;  | 
2146  | 0  |     int32_t alphaNumLen = 0;  | 
2147  | 0  |     while (*p) { | 
2148  | 0  |         if (*p == '_' || *p == '/' || *p == '-') { | 
2149  | 0  |             if (alphaNumLen == 0) { | 
2150  | 0  |                 return FALSE;  | 
2151  | 0  |             }  | 
2152  | 0  |             alphaNumLen = 0;  | 
2153  | 0  |         } else if (UPRV_ISALPHANUM(*p)) { | 
2154  | 0  |             alphaNumLen++;  | 
2155  | 0  |         } else { | 
2156  | 0  |             return FALSE;  | 
2157  | 0  |         }  | 
2158  | 0  |         p++;  | 
2159  | 0  |     }  | 
2160  | 0  |     return (alphaNumLen != 0);  | 
2161  | 0  | }  | 
2162  |  |  | 
2163  |  | U_CAPI const char* U_EXPORT2  | 
2164  |  | uloc_toLegacyKey(const char* keyword)  | 
2165  | 0  | { | 
2166  | 0  |     const char* legacyKey = ulocimp_toLegacyKey(keyword);  | 
2167  | 0  |     if (legacyKey == NULL) { | 
2168  |  |         // Checks if the specified locale key is well-formed with the legacy locale syntax.  | 
2169  |  |         //  | 
2170  |  |         // Note:  | 
2171  |  |         //  LDML/CLDR provides some definition of keyword syntax in  | 
2172  |  |         //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and  | 
2173  |  |         //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax  | 
2174  |  |         //  Keys can only consist of [0-9a-zA-Z].  | 
2175  | 0  |         if (isWellFormedLegacyKey(keyword)) { | 
2176  | 0  |             return keyword;  | 
2177  | 0  |         }  | 
2178  | 0  |     }  | 
2179  | 0  |     return legacyKey;  | 
2180  | 0  | }  | 
2181  |  |  | 
2182  |  | U_CAPI const char* U_EXPORT2  | 
2183  |  | uloc_toLegacyType(const char* keyword, const char* value)  | 
2184  | 0  | { | 
2185  | 0  |     const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);  | 
2186  | 0  |     if (legacyType == NULL) { | 
2187  |  |         // Checks if the specified locale type is well-formed with the legacy locale syntax.  | 
2188  |  |         //  | 
2189  |  |         // Note:  | 
2190  |  |         //  LDML/CLDR provides some definition of keyword syntax in  | 
2191  |  |         //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and  | 
2192  |  |         //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax  | 
2193  |  |         //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values  | 
2194  |  |         //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")  | 
2195  | 0  |         if (isWellFormedLegacyType(value)) { | 
2196  | 0  |             return value;  | 
2197  | 0  |         }  | 
2198  | 0  |     }  | 
2199  | 0  |     return legacyType;  | 
2200  | 0  | }  | 
2201  |  |  | 
2202  |  | /*eof*/  |