Coverage Report

Created: 2023-02-22 06:51

/src/icu/source/common/uloc.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 1997-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*
9
* File ULOC.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   04/01/97    aliu        Creation.
15
*   08/21/98    stephen     JDK 1.2 sync
16
*   12/08/98    rtg         New Locale implementation and C API
17
*   03/15/99    damiba      overhaul.
18
*   04/06/99    stephen     changed setDefault() to realloc and copy
19
*   06/14/99    stephen     Changed calls to ures_open for new params
20
*   07/21/99    stephen     Modified setDefault() to propagate to C++
21
*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
22
*                           brought canonicalization code into line with spec
23
*****************************************************************************/
24
25
/*
26
   POSIX's locale format, from putil.c: [no spaces]
27
28
     ll [ _CC ] [ . MM ] [ @ VV]
29
30
     l = lang, C = ctry, M = charmap, V = variant
31
*/
32
33
#include "unicode/bytestream.h"
34
#include "unicode/errorcode.h"
35
#include "unicode/stringpiece.h"
36
#include "unicode/utypes.h"
37
#include "unicode/ustring.h"
38
#include "unicode/uloc.h"
39
40
#include "bytesinkutil.h"
41
#include "putilimp.h"
42
#include "ustr_imp.h"
43
#include "ulocimp.h"
44
#include "umutex.h"
45
#include "cstring.h"
46
#include "cmemory.h"
47
#include "locmap.h"
48
#include "uarrsort.h"
49
#include "uenumimp.h"
50
#include "uassert.h"
51
#include "charstr.h"
52
53
U_NAMESPACE_USE
54
55
/* ### Declarations **************************************************/
56
57
/* Locale stuff from locid.cpp */
58
U_CFUNC void locale_set_default(const char *id);
59
U_CFUNC const char *locale_get_default(void);
60
61
/* ### Data tables **************************************************/
62
63
/**
64
 * Table of language codes, both 2- and 3-letter, with preference
65
 * given to 2-letter codes where possible.  Includes 3-letter codes
66
 * that lack a 2-letter equivalent.
67
 *
68
 * This list must be in sorted order.  This list is returned directly
69
 * to the user by some API.
70
 *
71
 * This list must be kept in sync with LANGUAGES_3, with corresponding
72
 * entries matched.
73
 *
74
 * This table should be terminated with a NULL entry, followed by a
75
 * second list, and another NULL entry.  The first list is visible to
76
 * user code when this array is returned by API.  The second list
77
 * contains codes we support, but do not expose through user API.
78
 *
79
 * Notes
80
 *
81
 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82
 * include the revisions up to 2001/7/27 *CWB*
83
 *
84
 * The 3 character codes are the terminology codes like RFC 3066.  This
85
 * is compatible with prior ICU codes
86
 *
87
 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88
 * table but now at the end of the table because 3 character codes are
89
 * duplicates.  This avoids bad searches going from 3 to 2 character
90
 * codes.
91
 *
92
 * The range qaa-qtz is reserved for local use
93
 */
94
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
95
/* ISO639 table version is 20150505 */
96
/* Subsequent hand addition of selected languages */
97
static const char * const LANGUAGES[] = {
98
    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
99
    "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
100
    "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
101
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
102
    "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",
103
    "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
104
    "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
105
    "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",
106
    "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",
107
    "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",
108
    "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",
109
    "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",
110
    "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",
111
    "cs",  "csb", "cu",  "cv",  "cy",
112
    "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",
113
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
114
    "dyo", "dyu", "dz",  "dzg",
115
    "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",
116
    "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",
117
    "ext",
118
    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",
119
    "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",
120
    "frs", "fur", "fy",
121
    "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
122
    "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",
123
    "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",
124
    "gur", "guz", "gv",  "gwi",
125
    "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",
126
    "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",
127
    "hup", "hy",  "hz",
128
    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",
129
    "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",
130
    "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131
    "jv",
132
    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
133
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",
134
    "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",
135
    "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",
136
    "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",
137
    "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",
138
    "kv",  "kw",  "ky",
139
    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",
140
    "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",
141
    "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",
142
    "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",
143
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
144
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",
145
    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
146
    "ml",  "mn",  "mnc", "mni",
147
    "moh", "mos", "mr",  "mrj",
148
    "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",
149
    "my",  "mye", "myv", "mzn",
150
    "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",
151
    "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",
152
    "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",
153
    "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",
154
    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",
155
    "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
156
    "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",
157
    "pon", "prg", "pro", "ps",  "pt",
158
    "qu",  "quc", "qug",
159
    "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",
160
    "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",
161
    "rw",  "rwk",
162
    "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",
163
    "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",
164
    "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",
165
    "sgs", "shi", "shn", "shu", "si",  "sid", "sk",
166
    "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",
167
    "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",
168
    "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",
169
    "sv",  "sw",  "swb", "syc", "syr", "szl",
170
    "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",
171
    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr",
172
    "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tpi",
173
    "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",
174
    "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",
175
    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
176
    "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vo",
177
    "vot", "vro", "vun",
178
    "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",
179
    "xal", "xh",  "xmf", "xog",
180
    "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",
181
    "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",
182
    "zun", "zxx", "zza",
183
NULL,
184
    "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  /* obsolete language codes */
185
NULL
186
};
187
188
static const char* const DEPRECATED_LANGUAGES[]={
189
    "in", "iw", "ji", "jw", NULL, NULL
190
};
191
static const char* const REPLACEMENT_LANGUAGES[]={
192
    "id", "he", "yi", "jv", NULL, NULL
193
};
194
195
/**
196
 * Table of 3-letter language codes.
197
 *
198
 * This is a lookup table used to convert 3-letter language codes to
199
 * their 2-letter equivalent, where possible.  It must be kept in sync
200
 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
201
 * same language as LANGUAGES_3[i].  The commented-out lines are
202
 * copied from LANGUAGES to make eyeballing this baby easier.
203
 *
204
 * Where a 3-letter language code has no 2-letter equivalent, the
205
 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
206
 *
207
 * This table should be terminated with a NULL entry, followed by a
208
 * second list, and another NULL entry.  The two lists correspond to
209
 * the two lists in LANGUAGES.
210
 */
211
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
212
/* ISO639 table version is 20150505 */
213
/* Subsequent hand addition of selected languages */
214
static const char * const LANGUAGES_3[] = {
215
    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
216
    "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
217
    "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
218
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
219
    "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
220
    "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
221
    "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
222
    "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
223
    "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
224
    "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
225
    "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
226
    "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
227
    "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
228
    "ces", "csb", "chu", "chv", "cym",
229
    "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
230
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
231
    "dyo", "dyu", "dzo", "dzg",
232
    "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
233
    "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
234
    "ext",
235
    "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
236
    "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
237
    "frs", "fur", "fry",
238
    "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
239
    "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
240
    "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
241
    "gur", "guz", "glv", "gwi",
242
    "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
243
    "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
244
    "hup", "hye", "her",
245
    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
246
    "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
247
    "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
248
    "jav",
249
    "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
250
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
251
    "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
252
    "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
253
    "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
254
    "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
255
    "kom", "cor", "kir",
256
    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
257
    "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
258
    "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
259
    "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
260
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
261
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
262
    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
263
    "mal", "mon", "mnc", "mni",
264
    "moh", "mos", "mar", "mrj",
265
    "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
266
    "mya", "mye", "myv", "mzn",
267
    "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
268
    "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
269
    "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
270
    "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
271
    "oci", "oji", "orm", "ori", "oss", "osa", "ota",
272
    "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
273
    "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
274
    "pon", "prg", "pro", "pus", "por",
275
    "que", "quc", "qug",
276
    "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
277
    "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
278
    "kin", "rwk",
279
    "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
280
    "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
281
    "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
282
    "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
283
    "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
284
    "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
285
    "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
286
    "swe", "swa", "swb", "syc", "syr", "szl",
287
    "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
288
    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
289
    "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
290
    "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
291
    "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
292
    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
293
    "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
294
    "vot", "vro", "vun",
295
    "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
296
    "xal", "xho", "xmf", "xog",
297
    "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
298
    "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
299
    "zun", "zxx", "zza",
300
NULL,
301
/*  "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  */
302
    "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
303
NULL
304
};
305
306
/**
307
 * Table of 2-letter country codes.
308
 *
309
 * This list must be in sorted order.  This list is returned directly
310
 * to the user by some API.
311
 *
312
 * This list must be kept in sync with COUNTRIES_3, with corresponding
313
 * entries matched.
314
 *
315
 * This table should be terminated with a NULL entry, followed by a
316
 * second list, and another NULL entry.  The first list is visible to
317
 * user code when this array is returned by API.  The second list
318
 * contains codes we support, but do not expose through user API.
319
 *
320
 * Notes:
321
 *
322
 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
323
 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
324
 * new codes keeping the old ones for compatibility updated to include
325
 * 1999/12/03 revisions *CWB*
326
 *
327
 * RO(ROM) is now RO(ROU) according to
328
 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
329
 */
330
static const char * const COUNTRIES[] = {
331
    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
332
    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
333
    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
334
    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
335
    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
336
    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
337
    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",
338
    "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",
339
    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
340
    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
341
    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
342
    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
343
    "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
344
    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
345
    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
346
    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
347
    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
348
    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
349
    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
350
    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
351
    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
352
    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
353
    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
354
    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
355
    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
356
    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
357
    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
358
    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
359
    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
360
    "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
361
NULL,
362
    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
363
NULL
364
};
365
366
static const char* const DEPRECATED_COUNTRIES[] = {
367
    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
368
};
369
static const char* const REPLACEMENT_COUNTRIES[] = {
370
/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
371
    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
372
};
373
374
/**
375
 * Table of 3-letter country codes.
376
 *
377
 * This is a lookup table used to convert 3-letter country codes to
378
 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
379
 * For all valid i, COUNTRIES[i] must refer to the same country as
380
 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
381
 * to make eyeballing this baby easier.
382
 *
383
 * This table should be terminated with a NULL entry, followed by a
384
 * second list, and another NULL entry.  The two lists correspond to
385
 * the two lists in COUNTRIES.
386
 */
387
static const char * const COUNTRIES_3[] = {
388
/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
389
    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
390
/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
391
    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
392
/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
393
    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394
/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
395
    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
396
/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
397
    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398
/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
399
    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
400
/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",     */
401
    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
402
/*  "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",     */
403
    "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
404
/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
405
    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406
/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
407
    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
408
/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
409
    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410
/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
411
    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
412
/*  "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
413
    "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
414
/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
415
    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416
/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
417
    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418
/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
419
    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420
/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
421
    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
422
/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
423
    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424
/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
425
    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426
/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
427
    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428
/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
429
    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430
/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
431
    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432
/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
433
    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
434
/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
435
    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436
/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
437
    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
438
/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
439
    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440
/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
441
    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442
/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
443
    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444
/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
445
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
446
/*  "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
447
    "WSM", "XXK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
448
NULL,
449
/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
450
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
451
NULL
452
};
453
454
typedef struct CanonicalizationMap {
455
    const char *id;          /* input ID */
456
    const char *canonicalID; /* canonicalized output ID */
457
} CanonicalizationMap;
458
459
/**
460
 * A map to canonicalize locale IDs.  This handles a variety of
461
 * different semantic kinds of transformations.
462
 */
463
static const CanonicalizationMap CANONICALIZE_MAP[] = {
464
    { "art__LOJBAN",    "jbo" }, /* registered name */
465
    { "hy__AREVELA",    "hy" }, /* Registered IANA variant */
466
    { "hy__AREVMDA",    "hyw" }, /* Registered IANA variant */
467
    { "zh__GUOYU",      "zh" }, /* registered name */
468
    { "zh__HAKKA",      "hak" }, /* registered name */
469
    { "zh__XIANG",      "hsn" }, /* registered name */
470
    // subtags with 3 chars won't be treated as variants.
471
    { "zh_GAN",         "gan" }, /* registered name */
472
    { "zh_MIN_NAN",     "nan" }, /* registered name */
473
    { "zh_WUU",         "wuu" }, /* registered name */
474
    { "zh_YUE",         "yue" }, /* registered name */
475
};
476
477
/* ### BCP47 Conversion *******************************************/
478
/* Test if the locale id has BCP47 u extension and does not have '@' */
479
0
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
481
static int32_t _ConvertBCP47(
482
0
            const char*& finalID, const char* id, char* buffer, int32_t length, UErrorCode* err) {
483
0
    int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);
484
0
    if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
485
0
        finalID=id;
486
0
        if (*err == U_STRING_NOT_TERMINATED_WARNING) {
487
0
            *err = U_BUFFER_OVERFLOW_ERROR;
488
0
        }
489
0
    } else {
490
0
        finalID=buffer;
491
0
    }
492
0
    return localeIDSize;
493
0
}
494
/* Gets the size of the shortest subtag in the given localeID. */
495
0
static int32_t getShortestSubtagLength(const char *localeID) {
496
0
    int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
497
0
    int32_t length = localeIDLength;
498
0
    int32_t tmpLength = 0;
499
0
    int32_t i;
500
0
    UBool reset = TRUE;
501
502
0
    for (i = 0; i < localeIDLength; i++) {
503
0
        if (localeID[i] != '_' && localeID[i] != '-') {
504
0
            if (reset) {
505
0
                tmpLength = 0;
506
0
                reset = FALSE;
507
0
            }
508
0
            tmpLength++;
509
0
        } else {
510
0
            if (tmpLength != 0 && tmpLength < length) {
511
0
                length = tmpLength;
512
0
            }
513
0
            reset = TRUE;
514
0
        }
515
0
    }
516
517
0
    return length;
518
0
}
519
520
/* ### Keywords **************************************************/
521
0
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
522
0
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
523
/* Punctuation/symbols allowed in legacy key values */
524
0
#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
525
526
0
#define ULOC_KEYWORD_BUFFER_LEN 25
527
0
#define ULOC_MAX_NO_KEYWORDS 25
528
529
U_CAPI const char * U_EXPORT2
530
0
locale_getKeywordsStart(const char *localeID) {
531
0
    const char *result = NULL;
532
0
    if((result = uprv_strchr(localeID, '@')) != NULL) {
533
0
        return result;
534
0
    }
535
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
536
    else {
537
        /* We do this because the @ sign is variant, and the @ sign used on one
538
        EBCDIC machine won't be compiled the same way on other EBCDIC based
539
        machines. */
540
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
541
        const uint8_t *charToFind = ebcdicSigns;
542
        while(*charToFind) {
543
            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
544
                return result;
545
            }
546
            charToFind++;
547
        }
548
    }
549
#endif
550
0
    return NULL;
551
0
}
552
553
/**
554
 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
555
 * @param keywordName incoming name to be canonicalized
556
 * @param status return status (keyword too long)
557
 * @return length of the keyword name
558
 */
559
static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
560
0
{
561
0
  int32_t keywordNameLen = 0;
562
563
0
  for (; *keywordName != 0; keywordName++) {
564
0
    if (!UPRV_ISALPHANUM(*keywordName)) {
565
0
      *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
566
0
      return 0;
567
0
    }
568
0
    if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
569
0
      buf[keywordNameLen++] = uprv_tolower(*keywordName);
570
0
    } else {
571
      /* keyword name too long for internal buffer */
572
0
      *status = U_INTERNAL_PROGRAM_ERROR;
573
0
      return 0;
574
0
    }
575
0
  }
576
0
  if (keywordNameLen == 0) {
577
0
    *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
578
0
    return 0;
579
0
  }
580
0
  buf[keywordNameLen] = 0; /* terminate */
581
582
0
  return keywordNameLen;
583
0
}
584
585
typedef struct {
586
    char keyword[ULOC_KEYWORD_BUFFER_LEN];
587
    int32_t keywordLen;
588
    const char *valueStart;
589
    int32_t valueLen;
590
} KeywordStruct;
591
592
static int32_t U_CALLCONV
593
0
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
594
0
    const char* leftString = ((const KeywordStruct *)left)->keyword;
595
0
    const char* rightString = ((const KeywordStruct *)right)->keyword;
596
0
    return uprv_strcmp(leftString, rightString);
597
0
}
598
599
U_CFUNC void
600
ulocimp_getKeywords(const char *localeID,
601
                    char prev,
602
                    ByteSink& sink,
603
                    UBool valuesToo,
604
                    UErrorCode *status)
605
0
{
606
0
    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
607
608
0
    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
609
0
    int32_t numKeywords = 0;
610
0
    const char* pos = localeID;
611
0
    const char* equalSign = NULL;
612
0
    const char* semicolon = NULL;
613
0
    int32_t i = 0, j, n;
614
615
0
    if(prev == '@') { /* start of keyword definition */
616
        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
617
0
        do {
618
0
            UBool duplicate = FALSE;
619
            /* skip leading spaces */
620
0
            while(*pos == ' ') {
621
0
                pos++;
622
0
            }
623
0
            if (!*pos) { /* handle trailing "; " */
624
0
                break;
625
0
            }
626
0
            if(numKeywords == maxKeywords) {
627
0
                *status = U_INTERNAL_PROGRAM_ERROR;
628
0
                return;
629
0
            }
630
0
            equalSign = uprv_strchr(pos, '=');
631
0
            semicolon = uprv_strchr(pos, ';');
632
            /* lack of '=' [foo@currency] is illegal */
633
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
634
0
            if(!equalSign || (semicolon && semicolon<equalSign)) {
635
0
                *status = U_INVALID_FORMAT_ERROR;
636
0
                return;
637
0
            }
638
            /* need to normalize both keyword and keyword name */
639
0
            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
640
                /* keyword name too long for internal buffer */
641
0
                *status = U_INTERNAL_PROGRAM_ERROR;
642
0
                return;
643
0
            }
644
0
            for(i = 0, n = 0; i < equalSign - pos; ++i) {
645
0
                if (pos[i] != ' ') {
646
0
                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
647
0
                }
648
0
            }
649
650
            /* zero-length keyword is an error. */
651
0
            if (n == 0) {
652
0
                *status = U_INVALID_FORMAT_ERROR;
653
0
                return;
654
0
            }
655
656
0
            keywordList[numKeywords].keyword[n] = 0;
657
0
            keywordList[numKeywords].keywordLen = n;
658
            /* now grab the value part. First we skip the '=' */
659
0
            equalSign++;
660
            /* then we leading spaces */
661
0
            while(*equalSign == ' ') {
662
0
                equalSign++;
663
0
            }
664
665
            /* Premature end or zero-length value */
666
0
            if (!*equalSign || equalSign == semicolon) {
667
0
                *status = U_INVALID_FORMAT_ERROR;
668
0
                return;
669
0
            }
670
671
0
            keywordList[numKeywords].valueStart = equalSign;
672
673
0
            pos = semicolon;
674
0
            i = 0;
675
0
            if(pos) {
676
0
                while(*(pos - i - 1) == ' ') {
677
0
                    i++;
678
0
                }
679
0
                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
680
0
                pos++;
681
0
            } else {
682
0
                i = (int32_t)uprv_strlen(equalSign);
683
0
                while(i && equalSign[i-1] == ' ') {
684
0
                    i--;
685
0
                }
686
0
                keywordList[numKeywords].valueLen = i;
687
0
            }
688
            /* If this is a duplicate keyword, then ignore it */
689
0
            for (j=0; j<numKeywords; ++j) {
690
0
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
691
0
                    duplicate = TRUE;
692
0
                    break;
693
0
                }
694
0
            }
695
0
            if (!duplicate) {
696
0
                ++numKeywords;
697
0
            }
698
0
        } while(pos);
699
700
        /* now we have a list of keywords */
701
        /* we need to sort it */
702
0
        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
703
704
        /* Now construct the keyword part */
705
0
        for(i = 0; i < numKeywords; i++) {
706
0
            sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
707
0
            if(valuesToo) {
708
0
                sink.Append("=", 1);
709
0
                sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
710
0
                if(i < numKeywords - 1) {
711
0
                    sink.Append(";", 1);
712
0
                }
713
0
            } else {
714
0
                sink.Append("\0", 1);
715
0
            }
716
0
        }
717
0
    }
718
0
}
719
720
U_CAPI int32_t U_EXPORT2
721
uloc_getKeywordValue(const char* localeID,
722
                     const char* keywordName,
723
                     char* buffer, int32_t bufferCapacity,
724
                     UErrorCode* status)
725
0
{
726
0
    if (U_FAILURE(*status)) {
727
0
        return 0;
728
0
    }
729
730
0
    CheckedArrayByteSink sink(buffer, bufferCapacity);
731
0
    ulocimp_getKeywordValue(localeID, keywordName, sink, status);
732
733
0
    int32_t reslen = sink.NumberOfBytesAppended();
734
735
0
    if (U_FAILURE(*status)) {
736
0
        return reslen;
737
0
    }
738
739
0
    if (sink.Overflowed()) {
740
0
        *status = U_BUFFER_OVERFLOW_ERROR;
741
0
    } else {
742
0
        u_terminateChars(buffer, bufferCapacity, reslen, status);
743
0
    }
744
745
0
    return reslen;
746
0
}
747
748
U_CAPI void U_EXPORT2
749
ulocimp_getKeywordValue(const char* localeID,
750
                        const char* keywordName,
751
                        icu::ByteSink& sink,
752
                        UErrorCode* status)
753
0
{
754
0
    const char* startSearchHere = NULL;
755
0
    const char* nextSeparator = NULL;
756
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
757
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
758
759
0
    if(status && U_SUCCESS(*status) && localeID) {
760
0
      char tempBuffer[ULOC_FULLNAME_CAPACITY];
761
0
      const char* tmpLocaleID;
762
763
0
      if (keywordName == NULL || keywordName[0] == 0) {
764
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
765
0
        return;
766
0
      }
767
768
0
      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
769
0
      if(U_FAILURE(*status)) {
770
0
        return;
771
0
      }
772
773
0
      if (_hasBCP47Extension(localeID)) {
774
0
          _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
775
0
      } else {
776
0
          tmpLocaleID=localeID;
777
0
      }
778
779
0
      startSearchHere = locale_getKeywordsStart(tmpLocaleID);
780
0
      if(startSearchHere == NULL) {
781
          /* no keywords, return at once */
782
0
          return;
783
0
      }
784
785
      /* find the first keyword */
786
0
      while(startSearchHere) {
787
0
          const char* keyValueTail;
788
0
          int32_t keyValueLen;
789
790
0
          startSearchHere++; /* skip @ or ; */
791
0
          nextSeparator = uprv_strchr(startSearchHere, '=');
792
0
          if(!nextSeparator) {
793
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
794
0
              return;
795
0
          }
796
          /* strip leading & trailing spaces (TC decided to tolerate these) */
797
0
          while(*startSearchHere == ' ') {
798
0
              startSearchHere++;
799
0
          }
800
0
          keyValueTail = nextSeparator;
801
0
          while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
802
0
              keyValueTail--;
803
0
          }
804
          /* now keyValueTail points to first char after the keyName */
805
          /* copy & normalize keyName from locale */
806
0
          if (startSearchHere == keyValueTail) {
807
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
808
0
              return;
809
0
          }
810
0
          keyValueLen = 0;
811
0
          while (startSearchHere < keyValueTail) {
812
0
            if (!UPRV_ISALPHANUM(*startSearchHere)) {
813
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
814
0
              return;
815
0
            }
816
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
817
0
              localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
818
0
            } else {
819
              /* keyword name too long for internal buffer */
820
0
              *status = U_INTERNAL_PROGRAM_ERROR;
821
0
              return;
822
0
            }
823
0
          }
824
0
          localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
825
826
0
          startSearchHere = uprv_strchr(nextSeparator, ';');
827
828
0
          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
829
               /* current entry matches the keyword. */
830
0
             nextSeparator++; /* skip '=' */
831
              /* First strip leading & trailing spaces (TC decided to tolerate these) */
832
0
              while(*nextSeparator == ' ') {
833
0
                nextSeparator++;
834
0
              }
835
0
              keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
836
0
              while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
837
0
                keyValueTail--;
838
0
              }
839
              /* Now copy the value, but check well-formedness */
840
0
              if (nextSeparator == keyValueTail) {
841
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
842
0
                return;
843
0
              }
844
0
              while (nextSeparator < keyValueTail) {
845
0
                if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
846
0
                  *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
847
0
                  return;
848
0
                }
849
                /* Should we lowercase value to return here? Tests expect as-is. */
850
0
                sink.Append(nextSeparator++, 1);
851
0
              }
852
0
              return;
853
0
          }
854
0
      }
855
0
    }
856
0
}
857
858
U_CAPI int32_t U_EXPORT2
859
uloc_setKeywordValue(const char* keywordName,
860
                     const char* keywordValue,
861
                     char* buffer, int32_t bufferCapacity,
862
                     UErrorCode* status)
863
0
{
864
    /* TODO: sorting. removal. */
865
0
    int32_t keywordNameLen;
866
0
    int32_t keywordValueLen;
867
0
    int32_t bufLen;
868
0
    int32_t needLen = 0;
869
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
870
0
    char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
871
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
872
0
    int32_t rc;
873
0
    char* nextSeparator = NULL;
874
0
    char* nextEqualsign = NULL;
875
0
    char* startSearchHere = NULL;
876
0
    char* keywordStart = NULL;
877
0
    CharString updatedKeysAndValues;
878
0
    UBool handledInputKeyAndValue = FALSE;
879
0
    char keyValuePrefix = '@';
880
881
0
    if(U_FAILURE(*status)) {
882
0
        return -1;
883
0
    }
884
0
    if (*status == U_STRING_NOT_TERMINATED_WARNING) {
885
0
        *status = U_ZERO_ERROR;
886
0
    }
887
0
    if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
888
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
889
0
        return 0;
890
0
    }
891
0
    bufLen = (int32_t)uprv_strlen(buffer);
892
0
    if(bufferCapacity<bufLen) {
893
        /* The capacity is less than the length?! Is this NULL terminated? */
894
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
895
0
        return 0;
896
0
    }
897
0
    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
898
0
    if(U_FAILURE(*status)) {
899
0
        return 0;
900
0
    }
901
902
0
    keywordValueLen = 0;
903
0
    if(keywordValue) {
904
0
        while (*keywordValue != 0) {
905
0
            if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
906
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
907
0
                return 0;
908
0
            }
909
0
            if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
910
                /* Should we force lowercase in value to set? */
911
0
                keywordValueBuffer[keywordValueLen++] = *keywordValue++;
912
0
            } else {
913
                /* keywordValue too long for internal buffer */
914
0
                *status = U_INTERNAL_PROGRAM_ERROR;
915
0
                return 0;
916
0
            }
917
0
        }
918
0
    }
919
0
    keywordValueBuffer[keywordValueLen] = 0; /* terminate */
920
921
0
    startSearchHere = (char*)locale_getKeywordsStart(buffer);
922
0
    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
923
0
        if(keywordValueLen == 0) { /* no keywords = nothing to remove */
924
0
            U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
925
0
            return bufLen;
926
0
        }
927
928
0
        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
929
0
        if(startSearchHere) { /* had a single @ */
930
0
            needLen--; /* already had the @ */
931
            /* startSearchHere points at the @ */
932
0
        } else {
933
0
            startSearchHere=buffer+bufLen;
934
0
        }
935
0
        if(needLen >= bufferCapacity) {
936
0
            *status = U_BUFFER_OVERFLOW_ERROR;
937
0
            return needLen; /* no change */
938
0
        }
939
0
        *startSearchHere++ = '@';
940
0
        uprv_strcpy(startSearchHere, keywordNameBuffer);
941
0
        startSearchHere += keywordNameLen;
942
0
        *startSearchHere++ = '=';
943
0
        uprv_strcpy(startSearchHere, keywordValueBuffer);
944
0
        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
945
0
        return needLen;
946
0
    } /* end shortcut - no @ */
947
948
0
    keywordStart = startSearchHere;
949
    /* search for keyword */
950
0
    while(keywordStart) {
951
0
        const char* keyValueTail;
952
0
        int32_t keyValueLen;
953
954
0
        keywordStart++; /* skip @ or ; */
955
0
        nextEqualsign = uprv_strchr(keywordStart, '=');
956
0
        if (!nextEqualsign) {
957
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
958
0
            return 0;
959
0
        }
960
        /* strip leading & trailing spaces (TC decided to tolerate these) */
961
0
        while(*keywordStart == ' ') {
962
0
            keywordStart++;
963
0
        }
964
0
        keyValueTail = nextEqualsign;
965
0
        while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
966
0
            keyValueTail--;
967
0
        }
968
        /* now keyValueTail points to first char after the keyName */
969
        /* copy & normalize keyName from locale */
970
0
        if (keywordStart == keyValueTail) {
971
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
972
0
            return 0;
973
0
        }
974
0
        keyValueLen = 0;
975
0
        while (keywordStart < keyValueTail) {
976
0
            if (!UPRV_ISALPHANUM(*keywordStart)) {
977
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
978
0
                return 0;
979
0
            }
980
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
981
0
                localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
982
0
            } else {
983
                /* keyword name too long for internal buffer */
984
0
                *status = U_INTERNAL_PROGRAM_ERROR;
985
0
                return 0;
986
0
            }
987
0
        }
988
0
        localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
989
990
0
        nextSeparator = uprv_strchr(nextEqualsign, ';');
991
992
        /* start processing the value part */
993
0
        nextEqualsign++; /* skip '=' */
994
        /* First strip leading & trailing spaces (TC decided to tolerate these) */
995
0
        while(*nextEqualsign == ' ') {
996
0
            nextEqualsign++;
997
0
        }
998
0
        keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
999
0
        while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1000
0
            keyValueTail--;
1001
0
        }
1002
0
        if (nextEqualsign == keyValueTail) {
1003
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1004
0
            return 0;
1005
0
        }
1006
1007
0
        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1008
0
        if(rc == 0) {
1009
            /* Current entry matches the input keyword. Update the entry */
1010
0
            if(keywordValueLen > 0) { /* updating a value */
1011
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1012
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1013
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1014
0
                updatedKeysAndValues.append('=', *status);
1015
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1016
0
            } /* else removing this entry, don't emit anything */
1017
0
            handledInputKeyAndValue = TRUE;
1018
0
        } else {
1019
           /* input keyword sorts earlier than current entry, add before current entry */
1020
0
            if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1021
                /* insert new entry at this location */
1022
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1023
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1024
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1025
0
                updatedKeysAndValues.append('=', *status);
1026
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1027
0
                handledInputKeyAndValue = TRUE;
1028
0
            }
1029
            /* copy the current entry */
1030
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1031
0
            keyValuePrefix = ';'; /* for any subsequent key-value pair */
1032
0
            updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1033
0
            updatedKeysAndValues.append('=', *status);
1034
0
            updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
1035
0
        }
1036
0
        if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1037
            /* append new entry at the end, it sorts later than existing entries */
1038
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1039
            /* skip keyValuePrefix update, no subsequent key-value pair */
1040
0
            updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1041
0
            updatedKeysAndValues.append('=', *status);
1042
0
            updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1043
0
            handledInputKeyAndValue = TRUE;
1044
0
        }
1045
0
        keywordStart = nextSeparator;
1046
0
    } /* end loop searching */
1047
1048
    /* Any error from updatedKeysAndValues.append above would be internal and not due to
1049
     * problems with the passed-in locale. So if we did encounter problems with the
1050
     * passed-in locale above, those errors took precedence and overrode any error
1051
     * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1052
     * are errors here they are from updatedKeysAndValues.append; they do cause an
1053
     * error return but the passed-in locale is unmodified and the original bufLen is
1054
     * returned.
1055
     */
1056
0
    if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1057
        /* if input key/value specified removal of a keyword not present in locale, or
1058
         * there was an error in CharString.append, leave original locale alone. */
1059
0
        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1060
0
        return bufLen;
1061
0
    }
1062
1063
    // needLen = length of the part before '@'
1064
0
    needLen = (int32_t)(startSearchHere - buffer);
1065
    // Check to see can we fit the startSearchHere, if not, return
1066
    // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1067
    // We do this because this API function does not behave like most others:
1068
    // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1069
    // When the contents fits but without the terminating NUL, in this case we need to not change
1070
    // the buffer contents and return with a buffer overflow error.
1071
0
    int32_t appendLength = updatedKeysAndValues.length();
1072
0
    if (appendLength >= bufferCapacity - needLen) {
1073
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1074
0
        return needLen + appendLength;
1075
0
    }
1076
0
    needLen += updatedKeysAndValues.extract(
1077
0
                         startSearchHere, bufferCapacity - needLen, *status);
1078
0
    U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1079
0
    return needLen;
1080
0
}
1081
1082
/* ### ID parsing implementation **************************************************/
1083
1084
0
#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1085
1086
/*returns TRUE if one of the special prefixes is here (s=string)
1087
  'x-' or 'i-' */
1088
0
#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1089
1090
/* Dot terminates it because of POSIX form  where dot precedes the codepage
1091
 * except for variant
1092
 */
1093
0
#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1094
1095
/**
1096
 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1097
 * a NULL entry, followed by more entries, and a second NULL entry.
1098
 *
1099
 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1100
 * COUNTRIES_3.
1101
 */
1102
static int16_t _findIndex(const char* const* list, const char* key)
1103
0
{
1104
0
    const char* const* anchor = list;
1105
0
    int32_t pass = 0;
1106
1107
    /* Make two passes through two NULL-terminated arrays at 'list' */
1108
0
    while (pass++ < 2) {
1109
0
        while (*list) {
1110
0
            if (uprv_strcmp(key, *list) == 0) {
1111
0
                return (int16_t)(list - anchor);
1112
0
            }
1113
0
            list++;
1114
0
        }
1115
0
        ++list;     /* skip final NULL *CWB*/
1116
0
    }
1117
0
    return -1;
1118
0
}
1119
1120
U_CFUNC const char*
1121
0
uloc_getCurrentCountryID(const char* oldID){
1122
0
    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1123
0
    if (offset >= 0) {
1124
0
        return REPLACEMENT_COUNTRIES[offset];
1125
0
    }
1126
0
    return oldID;
1127
0
}
1128
U_CFUNC const char*
1129
0
uloc_getCurrentLanguageID(const char* oldID){
1130
0
    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1131
0
    if (offset >= 0) {
1132
0
        return REPLACEMENT_LANGUAGES[offset];
1133
0
    }
1134
0
    return oldID;
1135
0
}
1136
/*
1137
 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1138
 * avoid duplicating code to handle the earlier locale ID pieces
1139
 * in the functions for the later ones by
1140
 * setting the *pEnd pointer to where they stopped parsing
1141
 *
1142
 * TODO try to use this in Locale
1143
 */
1144
CharString U_EXPORT2
1145
ulocimp_getLanguage(const char *localeID,
1146
                    const char **pEnd,
1147
0
                    UErrorCode &status) {
1148
0
    CharString result;
1149
1150
0
    if (uprv_stricmp(localeID, "root") == 0) {
1151
0
        localeID += 4;
1152
0
    } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
1153
0
               (localeID[3] == '\0' ||
1154
0
                localeID[3] == '-' ||
1155
0
                localeID[3] == '_' ||
1156
0
                localeID[3] == '@')) {
1157
0
        localeID += 3;
1158
0
    }
1159
1160
    /* if it starts with i- or x- then copy that prefix */
1161
0
    if(_isIDPrefix(localeID)) {
1162
0
        result.append((char)uprv_tolower(*localeID), status);
1163
0
        result.append('-', status);
1164
0
        localeID+=2;
1165
0
    }
1166
1167
    /* copy the language as far as possible and count its length */
1168
0
    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1169
0
        result.append((char)uprv_tolower(*localeID), status);
1170
0
        localeID++;
1171
0
    }
1172
1173
0
    if(result.length()==3) {
1174
        /* convert 3 character code to 2 character code if possible *CWB*/
1175
0
        int32_t offset = _findIndex(LANGUAGES_3, result.data());
1176
0
        if(offset>=0) {
1177
0
            result.clear();
1178
0
            result.append(LANGUAGES[offset], status);
1179
0
        }
1180
0
    }
1181
1182
0
    if(pEnd!=NULL) {
1183
0
        *pEnd=localeID;
1184
0
    }
1185
1186
0
    return result;
1187
0
}
1188
1189
CharString U_EXPORT2
1190
ulocimp_getScript(const char *localeID,
1191
                  const char **pEnd,
1192
0
                  UErrorCode &status) {
1193
0
    CharString result;
1194
0
    int32_t idLen = 0;
1195
1196
0
    if (pEnd != NULL) {
1197
0
        *pEnd = localeID;
1198
0
    }
1199
1200
    /* copy the second item as far as possible and count its length */
1201
0
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1202
0
            && uprv_isASCIILetter(localeID[idLen])) {
1203
0
        idLen++;
1204
0
    }
1205
1206
    /* If it's exactly 4 characters long, then it's a script and not a country. */
1207
0
    if (idLen == 4) {
1208
0
        int32_t i;
1209
0
        if (pEnd != NULL) {
1210
0
            *pEnd = localeID+idLen;
1211
0
        }
1212
0
        if (idLen >= 1) {
1213
0
            result.append((char)uprv_toupper(*(localeID++)), status);
1214
0
        }
1215
0
        for (i = 1; i < idLen; i++) {
1216
0
            result.append((char)uprv_tolower(*(localeID++)), status);
1217
0
        }
1218
0
    }
1219
1220
0
    return result;
1221
0
}
1222
1223
CharString U_EXPORT2
1224
ulocimp_getCountry(const char *localeID,
1225
                   const char **pEnd,
1226
0
                   UErrorCode &status) {
1227
0
    CharString result;
1228
0
    int32_t idLen=0;
1229
1230
    /* copy the country as far as possible and count its length */
1231
0
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1232
0
        result.append((char)uprv_toupper(localeID[idLen]), status);
1233
0
        idLen++;
1234
0
    }
1235
1236
    /* the country should be either length 2 or 3 */
1237
0
    if (idLen == 2 || idLen == 3) {
1238
        /* convert 3 character code to 2 character code if possible *CWB*/
1239
0
        if(idLen==3) {
1240
0
            int32_t offset = _findIndex(COUNTRIES_3, result.data());
1241
0
            if(offset>=0) {
1242
0
                result.clear();
1243
0
                result.append(COUNTRIES[offset], status);
1244
0
            }
1245
0
        }
1246
0
        localeID+=idLen;
1247
0
    } else {
1248
0
        result.clear();
1249
0
    }
1250
1251
0
    if(pEnd!=NULL) {
1252
0
        *pEnd=localeID;
1253
0
    }
1254
1255
0
    return result;
1256
0
}
1257
1258
/**
1259
 * @param needSeparator if true, then add leading '_' if any variants
1260
 * are added to 'variant'
1261
 */
1262
static void
1263
_getVariant(const char *localeID,
1264
            char prev,
1265
            ByteSink& sink,
1266
0
            UBool needSeparator) {
1267
0
    UBool hasVariant = FALSE;
1268
1269
    /* get one or more variant tags and separate them with '_' */
1270
0
    if(_isIDSeparator(prev)) {
1271
        /* get a variant string after a '-' or '_' */
1272
0
        while(!_isTerminator(*localeID)) {
1273
0
            if (needSeparator) {
1274
0
                sink.Append("_", 1);
1275
0
                needSeparator = FALSE;
1276
0
            }
1277
0
            char c = (char)uprv_toupper(*localeID);
1278
0
            if (c == '-') c = '_';
1279
0
            sink.Append(&c, 1);
1280
0
            hasVariant = TRUE;
1281
0
            localeID++;
1282
0
        }
1283
0
    }
1284
1285
    /* if there is no variant tag after a '-' or '_' then look for '@' */
1286
0
    if(!hasVariant) {
1287
0
        if(prev=='@') {
1288
            /* keep localeID */
1289
0
        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1290
0
            ++localeID; /* point after the '@' */
1291
0
        } else {
1292
0
            return;
1293
0
        }
1294
0
        while(!_isTerminator(*localeID)) {
1295
0
            if (needSeparator) {
1296
0
                sink.Append("_", 1);
1297
0
                needSeparator = FALSE;
1298
0
            }
1299
0
            char c = (char)uprv_toupper(*localeID);
1300
0
            if (c == '-' || c == ',') c = '_';
1301
0
            sink.Append(&c, 1);
1302
0
            localeID++;
1303
0
        }
1304
0
    }
1305
0
}
1306
1307
/* Keyword enumeration */
1308
1309
typedef struct UKeywordsContext {
1310
    char* keywords;
1311
    char* current;
1312
} UKeywordsContext;
1313
1314
U_CDECL_BEGIN
1315
1316
static void U_CALLCONV
1317
0
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1318
0
    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1319
0
    uprv_free(enumerator->context);
1320
0
    uprv_free(enumerator);
1321
0
}
1322
1323
static int32_t U_CALLCONV
1324
0
uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1325
0
    char *kw = ((UKeywordsContext *)en->context)->keywords;
1326
0
    int32_t result = 0;
1327
0
    while(*kw) {
1328
0
        result++;
1329
0
        kw += uprv_strlen(kw)+1;
1330
0
    }
1331
0
    return result;
1332
0
}
1333
1334
static const char * U_CALLCONV
1335
uloc_kw_nextKeyword(UEnumeration* en,
1336
                    int32_t* resultLength,
1337
0
                    UErrorCode* /*status*/) {
1338
0
    const char* result = ((UKeywordsContext *)en->context)->current;
1339
0
    int32_t len = 0;
1340
0
    if(*result) {
1341
0
        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1342
0
        ((UKeywordsContext *)en->context)->current += len+1;
1343
0
    } else {
1344
0
        result = NULL;
1345
0
    }
1346
0
    if (resultLength) {
1347
0
        *resultLength = len;
1348
0
    }
1349
0
    return result;
1350
0
}
1351
1352
static void U_CALLCONV
1353
uloc_kw_resetKeywords(UEnumeration* en,
1354
0
                      UErrorCode* /*status*/) {
1355
0
    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1356
0
}
1357
1358
U_CDECL_END
1359
1360
1361
static const UEnumeration gKeywordsEnum = {
1362
    NULL,
1363
    NULL,
1364
    uloc_kw_closeKeywords,
1365
    uloc_kw_countKeywords,
1366
    uenum_unextDefault,
1367
    uloc_kw_nextKeyword,
1368
    uloc_kw_resetKeywords
1369
};
1370
1371
U_CAPI UEnumeration* U_EXPORT2
1372
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1373
0
{
1374
0
    LocalMemory<UKeywordsContext> myContext;
1375
0
    LocalMemory<UEnumeration> result;
1376
1377
0
    if (U_FAILURE(*status)) {
1378
0
        return nullptr;
1379
0
    }
1380
0
    myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1381
0
    result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1382
0
    if (myContext.isNull() || result.isNull()) {
1383
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1384
0
        return nullptr;
1385
0
    }
1386
0
    uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1387
0
    myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1388
0
    if (myContext->keywords == nullptr) {
1389
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1390
0
        return nullptr;
1391
0
    }
1392
0
    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1393
0
    myContext->keywords[keywordListSize] = 0;
1394
0
    myContext->current = myContext->keywords;
1395
0
    result->context = myContext.orphan();
1396
0
    return result.orphan();
1397
0
}
1398
1399
U_CAPI UEnumeration* U_EXPORT2
1400
uloc_openKeywords(const char* localeID,
1401
                        UErrorCode* status)
1402
0
{
1403
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1404
0
    const char* tmpLocaleID;
1405
1406
0
    if(status==NULL || U_FAILURE(*status)) {
1407
0
        return 0;
1408
0
    }
1409
1410
0
    if (_hasBCP47Extension(localeID)) {
1411
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1412
0
    } else {
1413
0
        if (localeID==NULL) {
1414
0
           localeID=uloc_getDefault();
1415
0
        }
1416
0
        tmpLocaleID=localeID;
1417
0
    }
1418
1419
    /* Skip the language */
1420
0
    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
1421
0
    if (U_FAILURE(*status)) {
1422
0
        return 0;
1423
0
    }
1424
1425
0
    if(_isIDSeparator(*tmpLocaleID)) {
1426
0
        const char *scriptID;
1427
        /* Skip the script if available */
1428
0
        ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
1429
0
        if (U_FAILURE(*status)) {
1430
0
            return 0;
1431
0
        }
1432
0
        if(scriptID != tmpLocaleID+1) {
1433
            /* Found optional script */
1434
0
            tmpLocaleID = scriptID;
1435
0
        }
1436
        /* Skip the Country */
1437
0
        if (_isIDSeparator(*tmpLocaleID)) {
1438
0
            ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
1439
0
            if (U_FAILURE(*status)) {
1440
0
                return 0;
1441
0
            }
1442
0
        }
1443
0
    }
1444
1445
    /* keywords are located after '@' */
1446
0
    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1447
0
        CharString keywords;
1448
0
        CharStringByteSink sink(&keywords);
1449
0
        ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status);
1450
0
        if (U_FAILURE(*status)) {
1451
0
            return NULL;
1452
0
        }
1453
0
        return uloc_openKeywordList(keywords.data(), keywords.length(), status);
1454
0
    }
1455
0
    return NULL;
1456
0
}
1457
1458
1459
/* bit-flags for 'options' parameter of _canonicalize */
1460
0
#define _ULOC_STRIP_KEYWORDS 0x2
1461
0
#define _ULOC_CANONICALIZE   0x1
1462
1463
0
#define OPTION_SET(options, mask) ((options & mask) != 0)
1464
1465
static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1466
0
#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1467
1468
/**
1469
 * Canonicalize the given localeID, to level 1 or to level 2,
1470
 * depending on the options.  To specify level 1, pass in options=0.
1471
 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1472
 *
1473
 * This is the code underlying uloc_getName and uloc_canonicalize.
1474
 */
1475
static void
1476
_canonicalize(const char* localeID,
1477
              ByteSink& sink,
1478
              uint32_t options,
1479
0
              UErrorCode* err) {
1480
0
    int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
1481
0
    PreflightingLocaleIDBuffer tempBuffer;
1482
0
    const char* origLocaleID;
1483
0
    const char* tmpLocaleID;
1484
0
    const char* keywordAssign = NULL;
1485
0
    const char* separatorIndicator = NULL;
1486
1487
0
    if (U_FAILURE(*err)) {
1488
0
        return;
1489
0
    }
1490
1491
0
    if (_hasBCP47Extension(localeID)) {
1492
0
        do {
1493
0
            tempBuffer.requestedCapacity = _ConvertBCP47(tmpLocaleID, localeID,
1494
0
                tempBuffer.getBuffer(), tempBuffer.getCapacity(), err);
1495
0
        } while (tempBuffer.needToTryAgain(err));
1496
0
    } else {
1497
0
        if (localeID==NULL) {
1498
0
           localeID=uloc_getDefault();
1499
0
        }
1500
0
        tmpLocaleID=localeID;
1501
0
    }
1502
1503
0
    origLocaleID=tmpLocaleID;
1504
1505
    /* get all pieces, one after another, and separate with '_' */
1506
0
    CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1507
1508
0
    if (tag.length() == I_DEFAULT_LENGTH &&
1509
0
            uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
1510
0
        tag.clear();
1511
0
        tag.append(uloc_getDefault(), *err);
1512
0
    } else if(_isIDSeparator(*tmpLocaleID)) {
1513
0
        const char *scriptID;
1514
1515
0
        ++fieldCount;
1516
0
        tag.append('_', *err);
1517
1518
0
        CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1519
0
        tag.append(script, *err);
1520
0
        scriptSize = script.length();
1521
0
        if(scriptSize > 0) {
1522
            /* Found optional script */
1523
0
            tmpLocaleID = scriptID;
1524
0
            ++fieldCount;
1525
0
            if (_isIDSeparator(*tmpLocaleID)) {
1526
                /* If there is something else, then we add the _ */
1527
0
                tag.append('_', *err);
1528
0
            }
1529
0
        }
1530
1531
0
        if (_isIDSeparator(*tmpLocaleID)) {
1532
0
            const char *cntryID;
1533
1534
0
            CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1535
0
            tag.append(country, *err);
1536
0
            if (!country.isEmpty()) {
1537
                /* Found optional country */
1538
0
                tmpLocaleID = cntryID;
1539
0
            }
1540
0
            if(_isIDSeparator(*tmpLocaleID)) {
1541
                /* If there is something else, then we add the _  if we found country before. */
1542
0
                if (!_isIDSeparator(*(tmpLocaleID+1))) {
1543
0
                    ++fieldCount;
1544
0
                    tag.append('_', *err);
1545
0
                }
1546
1547
0
                variantSize = -tag.length();
1548
0
                {
1549
0
                    CharStringByteSink s(&tag);
1550
0
                    _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
1551
0
                }
1552
0
                variantSize += tag.length();
1553
0
                if (variantSize > 0) {
1554
0
                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1555
0
                }
1556
0
            }
1557
0
        }
1558
0
    }
1559
1560
    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1561
0
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1562
0
        UBool done = FALSE;
1563
0
        do {
1564
0
            char c = *tmpLocaleID;
1565
0
            switch (c) {
1566
0
            case 0:
1567
0
            case '@':
1568
0
                done = TRUE;
1569
0
                break;
1570
0
            default:
1571
0
                tag.append(c, *err);
1572
0
                ++tmpLocaleID;
1573
0
                break;
1574
0
            }
1575
0
        } while (!done);
1576
0
    }
1577
1578
    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1579
       After this, tmpLocaleID either points to '@' or is NULL */
1580
0
    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1581
0
        keywordAssign = uprv_strchr(tmpLocaleID, '=');
1582
0
        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1583
0
    }
1584
1585
    /* Copy POSIX-style variant, if any [mr@FOO] */
1586
0
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1587
0
        tmpLocaleID != NULL && keywordAssign == NULL) {
1588
0
        for (;;) {
1589
0
            char c = *tmpLocaleID;
1590
0
            if (c == 0) {
1591
0
                break;
1592
0
            }
1593
0
            tag.append(c, *err);
1594
0
            ++tmpLocaleID;
1595
0
        }
1596
0
    }
1597
1598
0
    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1599
        /* Handle @FOO variant if @ is present and not followed by = */
1600
0
        if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1601
            /* Add missing '_' if needed */
1602
0
            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1603
0
                do {
1604
0
                    tag.append('_', *err);
1605
0
                    ++fieldCount;
1606
0
                } while(fieldCount<2);
1607
0
            }
1608
1609
0
            int32_t posixVariantSize = -tag.length();
1610
0
            {
1611
0
                CharStringByteSink s(&tag);
1612
0
                _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
1613
0
            }
1614
0
            posixVariantSize += tag.length();
1615
0
            if (posixVariantSize > 0) {
1616
0
                variantSize += posixVariantSize;
1617
0
            }
1618
0
        }
1619
1620
        /* Look up the ID in the canonicalization map */
1621
0
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1622
0
            StringPiece id(CANONICALIZE_MAP[j].id);
1623
0
            if (tag == id) {
1624
0
                if (id.empty() && tmpLocaleID != NULL) {
1625
0
                    break; /* Don't remap "" if keywords present */
1626
0
                }
1627
0
                tag.clear();
1628
0
                tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
1629
0
                break;
1630
0
            }
1631
0
        }
1632
0
    }
1633
1634
0
    sink.Append(tag.data(), tag.length());
1635
1636
0
    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1637
0
        if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1638
0
            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1639
0
            sink.Append("@", 1);
1640
0
            ++fieldCount;
1641
0
            ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
1642
0
        }
1643
0
    }
1644
0
}
1645
1646
/* ### ID parsing API **************************************************/
1647
1648
U_CAPI int32_t  U_EXPORT2
1649
uloc_getParent(const char*    localeID,
1650
               char* parent,
1651
               int32_t parentCapacity,
1652
               UErrorCode* err)
1653
0
{
1654
0
    const char *lastUnderscore;
1655
0
    int32_t i;
1656
1657
0
    if (U_FAILURE(*err))
1658
0
        return 0;
1659
1660
0
    if (localeID == NULL)
1661
0
        localeID = uloc_getDefault();
1662
1663
0
    lastUnderscore=uprv_strrchr(localeID, '_');
1664
0
    if(lastUnderscore!=NULL) {
1665
0
        i=(int32_t)(lastUnderscore-localeID);
1666
0
    } else {
1667
0
        i=0;
1668
0
    }
1669
1670
0
    if (i > 0) {
1671
0
        if (uprv_strnicmp(localeID, "und_", 4) == 0) {
1672
0
            localeID += 3;
1673
0
            i -= 3;
1674
0
            uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1675
0
        } else if (parent != localeID) {
1676
0
            uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1677
0
        }
1678
0
    }
1679
1680
0
    return u_terminateChars(parent, parentCapacity, i, err);
1681
0
}
1682
1683
U_CAPI int32_t U_EXPORT2
1684
uloc_getLanguage(const char*    localeID,
1685
         char* language,
1686
         int32_t languageCapacity,
1687
         UErrorCode* err)
1688
0
{
1689
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1690
1691
0
    if (err==NULL || U_FAILURE(*err)) {
1692
0
        return 0;
1693
0
    }
1694
1695
0
    if(localeID==NULL) {
1696
0
        localeID=uloc_getDefault();
1697
0
    }
1698
1699
0
    return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
1700
0
}
1701
1702
U_CAPI int32_t U_EXPORT2
1703
uloc_getScript(const char*    localeID,
1704
         char* script,
1705
         int32_t scriptCapacity,
1706
         UErrorCode* err)
1707
0
{
1708
0
    if(err==NULL || U_FAILURE(*err)) {
1709
0
        return 0;
1710
0
    }
1711
1712
0
    if(localeID==NULL) {
1713
0
        localeID=uloc_getDefault();
1714
0
    }
1715
1716
    /* skip the language */
1717
0
    ulocimp_getLanguage(localeID, &localeID, *err);
1718
0
    if (U_FAILURE(*err)) {
1719
0
        return 0;
1720
0
    }
1721
1722
0
    if(_isIDSeparator(*localeID)) {
1723
0
        return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
1724
0
    }
1725
0
    return u_terminateChars(script, scriptCapacity, 0, err);
1726
0
}
1727
1728
U_CAPI int32_t  U_EXPORT2
1729
uloc_getCountry(const char* localeID,
1730
            char* country,
1731
            int32_t countryCapacity,
1732
            UErrorCode* err)
1733
0
{
1734
0
    if(err==NULL || U_FAILURE(*err)) {
1735
0
        return 0;
1736
0
    }
1737
1738
0
    if(localeID==NULL) {
1739
0
        localeID=uloc_getDefault();
1740
0
    }
1741
1742
    /* Skip the language */
1743
0
    ulocimp_getLanguage(localeID, &localeID, *err);
1744
0
    if (U_FAILURE(*err)) {
1745
0
        return 0;
1746
0
    }
1747
1748
0
    if(_isIDSeparator(*localeID)) {
1749
0
        const char *scriptID;
1750
        /* Skip the script if available */
1751
0
        ulocimp_getScript(localeID+1, &scriptID, *err);
1752
0
        if (U_FAILURE(*err)) {
1753
0
            return 0;
1754
0
        }
1755
0
        if(scriptID != localeID+1) {
1756
            /* Found optional script */
1757
0
            localeID = scriptID;
1758
0
        }
1759
0
        if(_isIDSeparator(*localeID)) {
1760
0
            return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
1761
0
        }
1762
0
    }
1763
0
    return u_terminateChars(country, countryCapacity, 0, err);
1764
0
}
1765
1766
U_CAPI int32_t  U_EXPORT2
1767
uloc_getVariant(const char* localeID,
1768
                char* variant,
1769
                int32_t variantCapacity,
1770
                UErrorCode* err)
1771
0
{
1772
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1773
0
    const char* tmpLocaleID;
1774
0
    int32_t i=0;
1775
1776
0
    if(err==NULL || U_FAILURE(*err)) {
1777
0
        return 0;
1778
0
    }
1779
1780
0
    if (_hasBCP47Extension(localeID)) {
1781
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1782
0
    } else {
1783
0
        if (localeID==NULL) {
1784
0
           localeID=uloc_getDefault();
1785
0
        }
1786
0
        tmpLocaleID=localeID;
1787
0
    }
1788
1789
    /* Skip the language */
1790
0
    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1791
0
    if (U_FAILURE(*err)) {
1792
0
        return 0;
1793
0
    }
1794
1795
0
    if(_isIDSeparator(*tmpLocaleID)) {
1796
0
        const char *scriptID;
1797
        /* Skip the script if available */
1798
0
        ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1799
0
        if (U_FAILURE(*err)) {
1800
0
            return 0;
1801
0
        }
1802
0
        if(scriptID != tmpLocaleID+1) {
1803
            /* Found optional script */
1804
0
            tmpLocaleID = scriptID;
1805
0
        }
1806
        /* Skip the Country */
1807
0
        if (_isIDSeparator(*tmpLocaleID)) {
1808
0
            const char *cntryID;
1809
0
            ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1810
0
            if (U_FAILURE(*err)) {
1811
0
                return 0;
1812
0
            }
1813
0
            if (cntryID != tmpLocaleID+1) {
1814
                /* Found optional country */
1815
0
                tmpLocaleID = cntryID;
1816
0
            }
1817
0
            if(_isIDSeparator(*tmpLocaleID)) {
1818
                /* If there was no country ID, skip a possible extra IDSeparator */
1819
0
                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1820
0
                    tmpLocaleID++;
1821
0
                }
1822
1823
0
                CheckedArrayByteSink sink(variant, variantCapacity);
1824
0
                _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE);
1825
1826
0
                i = sink.NumberOfBytesAppended();
1827
1828
0
                if (U_FAILURE(*err)) {
1829
0
                    return i;
1830
0
                }
1831
1832
0
                if (sink.Overflowed()) {
1833
0
                    *err = U_BUFFER_OVERFLOW_ERROR;
1834
0
                    return i;
1835
0
                }
1836
0
            }
1837
0
        }
1838
0
    }
1839
1840
0
    return u_terminateChars(variant, variantCapacity, i, err);
1841
0
}
1842
1843
U_CAPI int32_t  U_EXPORT2
1844
uloc_getName(const char* localeID,
1845
             char* name,
1846
             int32_t nameCapacity,
1847
             UErrorCode* err)
1848
0
{
1849
0
    if (U_FAILURE(*err)) {
1850
0
        return 0;
1851
0
    }
1852
1853
0
    CheckedArrayByteSink sink(name, nameCapacity);
1854
0
    ulocimp_getName(localeID, sink, err);
1855
1856
0
    int32_t reslen = sink.NumberOfBytesAppended();
1857
1858
0
    if (U_FAILURE(*err)) {
1859
0
        return reslen;
1860
0
    }
1861
1862
0
    if (sink.Overflowed()) {
1863
0
        *err = U_BUFFER_OVERFLOW_ERROR;
1864
0
    } else {
1865
0
        u_terminateChars(name, nameCapacity, reslen, err);
1866
0
    }
1867
1868
0
    return reslen;
1869
0
}
1870
1871
U_CAPI void U_EXPORT2
1872
ulocimp_getName(const char* localeID,
1873
                ByteSink& sink,
1874
                UErrorCode* err)
1875
0
{
1876
0
    _canonicalize(localeID, sink, 0, err);
1877
0
}
1878
1879
U_CAPI int32_t  U_EXPORT2
1880
uloc_getBaseName(const char* localeID,
1881
                 char* name,
1882
                 int32_t nameCapacity,
1883
                 UErrorCode* err)
1884
0
{
1885
0
    if (U_FAILURE(*err)) {
1886
0
        return 0;
1887
0
    }
1888
1889
0
    CheckedArrayByteSink sink(name, nameCapacity);
1890
0
    ulocimp_getBaseName(localeID, sink, err);
1891
1892
0
    int32_t reslen = sink.NumberOfBytesAppended();
1893
1894
0
    if (U_FAILURE(*err)) {
1895
0
        return reslen;
1896
0
    }
1897
1898
0
    if (sink.Overflowed()) {
1899
0
        *err = U_BUFFER_OVERFLOW_ERROR;
1900
0
    } else {
1901
0
        u_terminateChars(name, nameCapacity, reslen, err);
1902
0
    }
1903
1904
0
    return reslen;
1905
0
}
1906
1907
U_CAPI void U_EXPORT2
1908
ulocimp_getBaseName(const char* localeID,
1909
                    ByteSink& sink,
1910
                    UErrorCode* err)
1911
0
{
1912
0
    _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
1913
0
}
1914
1915
U_CAPI int32_t  U_EXPORT2
1916
uloc_canonicalize(const char* localeID,
1917
                  char* name,
1918
                  int32_t nameCapacity,
1919
                  UErrorCode* err)
1920
0
{
1921
0
    if (U_FAILURE(*err)) {
1922
0
        return 0;
1923
0
    }
1924
1925
0
    CheckedArrayByteSink sink(name, nameCapacity);
1926
0
    ulocimp_canonicalize(localeID, sink, err);
1927
1928
0
    int32_t reslen = sink.NumberOfBytesAppended();
1929
1930
0
    if (U_FAILURE(*err)) {
1931
0
        return reslen;
1932
0
    }
1933
1934
0
    if (sink.Overflowed()) {
1935
0
        *err = U_BUFFER_OVERFLOW_ERROR;
1936
0
    } else {
1937
0
        u_terminateChars(name, nameCapacity, reslen, err);
1938
0
    }
1939
1940
0
    return reslen;
1941
0
}
1942
1943
U_CAPI void U_EXPORT2
1944
ulocimp_canonicalize(const char* localeID,
1945
                     ByteSink& sink,
1946
                     UErrorCode* err)
1947
0
{
1948
0
    _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
1949
0
}
1950
1951
U_CAPI const char*  U_EXPORT2
1952
uloc_getISO3Language(const char* localeID)
1953
0
{
1954
0
    int16_t offset;
1955
0
    char lang[ULOC_LANG_CAPACITY];
1956
0
    UErrorCode err = U_ZERO_ERROR;
1957
1958
0
    if (localeID == NULL)
1959
0
    {
1960
0
        localeID = uloc_getDefault();
1961
0
    }
1962
0
    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1963
0
    if (U_FAILURE(err))
1964
0
        return "";
1965
0
    offset = _findIndex(LANGUAGES, lang);
1966
0
    if (offset < 0)
1967
0
        return "";
1968
0
    return LANGUAGES_3[offset];
1969
0
}
1970
1971
U_CAPI const char*  U_EXPORT2
1972
uloc_getISO3Country(const char* localeID)
1973
0
{
1974
0
    int16_t offset;
1975
0
    char cntry[ULOC_LANG_CAPACITY];
1976
0
    UErrorCode err = U_ZERO_ERROR;
1977
1978
0
    if (localeID == NULL)
1979
0
    {
1980
0
        localeID = uloc_getDefault();
1981
0
    }
1982
0
    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
1983
0
    if (U_FAILURE(err))
1984
0
        return "";
1985
0
    offset = _findIndex(COUNTRIES, cntry);
1986
0
    if (offset < 0)
1987
0
        return "";
1988
1989
0
    return COUNTRIES_3[offset];
1990
0
}
1991
1992
U_CAPI uint32_t  U_EXPORT2
1993
uloc_getLCID(const char* localeID)
1994
0
{
1995
0
    UErrorCode status = U_ZERO_ERROR;
1996
0
    char       langID[ULOC_FULLNAME_CAPACITY];
1997
0
    uint32_t   lcid = 0;
1998
1999
    /* Check for incomplete id. */
2000
0
    if (!localeID || uprv_strlen(localeID) < 2) {
2001
0
        return 0;
2002
0
    }
2003
2004
    // First, attempt Windows platform lookup if available, but fall
2005
    // through to catch any special cases (ICU vs Windows name differences).
2006
0
    lcid = uprv_convertToLCIDPlatform(localeID, &status);
2007
0
    if (U_FAILURE(status)) {
2008
0
        return 0;
2009
0
    }
2010
0
    if (lcid > 0) {
2011
        // Windows found an LCID, return that
2012
0
        return lcid;
2013
0
    }
2014
2015
0
    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2016
0
    if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
2017
0
        return 0;
2018
0
    }
2019
2020
0
    if (uprv_strchr(localeID, '@')) {
2021
        // uprv_convertToLCID does not support keywords other than collation.
2022
        // Remove all keywords except collation.
2023
0
        int32_t len;
2024
0
        char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2025
2026
0
        CharString collVal;
2027
0
        {
2028
0
            CharStringByteSink sink(&collVal);
2029
0
            ulocimp_getKeywordValue(localeID, "collation", sink, &status);
2030
0
        }
2031
2032
0
        if (U_SUCCESS(status) && !collVal.isEmpty()) {
2033
0
            len = uloc_getBaseName(localeID, tmpLocaleID,
2034
0
                UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2035
2036
0
            if (U_SUCCESS(status) && len > 0) {
2037
0
                tmpLocaleID[len] = 0;
2038
2039
0
                len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
2040
0
                    UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2041
2042
0
                if (U_SUCCESS(status) && len > 0) {
2043
0
                    tmpLocaleID[len] = 0;
2044
0
                    return uprv_convertToLCID(langID, tmpLocaleID, &status);
2045
0
                }
2046
0
            }
2047
0
        }
2048
2049
        // fall through - all keywords are simply ignored
2050
0
        status = U_ZERO_ERROR;
2051
0
    }
2052
2053
0
    return uprv_convertToLCID(langID, localeID, &status);
2054
0
}
2055
2056
U_CAPI int32_t U_EXPORT2
2057
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2058
                UErrorCode *status)
2059
0
{
2060
0
    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2061
0
}
2062
2063
/* ### Default locale **************************************************/
2064
2065
U_CAPI const char*  U_EXPORT2
2066
uloc_getDefault()
2067
0
{
2068
0
    return locale_get_default();
2069
0
}
2070
2071
U_CAPI void  U_EXPORT2
2072
uloc_setDefault(const char*   newDefaultLocale,
2073
             UErrorCode* err)
2074
0
{
2075
0
    if (U_FAILURE(*err))
2076
0
        return;
2077
    /* the error code isn't currently used for anything by this function*/
2078
2079
    /* propagate change to C++ */
2080
0
    locale_set_default(newDefaultLocale);
2081
0
}
2082
2083
/**
2084
 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2085
 * to an array of pointers to arrays of char.  All of these pointers are owned
2086
 * by ICU-- do not delete them, and do not write through them.  The array is
2087
 * terminated with a null pointer.
2088
 */
2089
U_CAPI const char* const*  U_EXPORT2
2090
uloc_getISOLanguages()
2091
0
{
2092
0
    return LANGUAGES;
2093
0
}
2094
2095
/**
2096
 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2097
 * pointer to an array of pointers to arrays of char.  All of these pointers are
2098
 * owned by ICU-- do not delete them, and do not write through them.  The array is
2099
 * terminated with a null pointer.
2100
 */
2101
U_CAPI const char* const*  U_EXPORT2
2102
uloc_getISOCountries()
2103
0
{
2104
0
    return COUNTRIES;
2105
0
}
2106
2107
U_CAPI const char* U_EXPORT2
2108
uloc_toUnicodeLocaleKey(const char* keyword)
2109
0
{
2110
0
    const char* bcpKey = ulocimp_toBcpKey(keyword);
2111
0
    if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2112
        // unknown keyword, but syntax is fine..
2113
0
        return keyword;
2114
0
    }
2115
0
    return bcpKey;
2116
0
}
2117
2118
U_CAPI const char* U_EXPORT2
2119
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2120
0
{
2121
0
    const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2122
0
    if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2123
        // unknown keyword, but syntax is fine..
2124
0
        return value;
2125
0
    }
2126
0
    return bcpType;
2127
0
}
2128
2129
static UBool
2130
isWellFormedLegacyKey(const char* legacyKey)
2131
0
{
2132
0
    const char* p = legacyKey;
2133
0
    while (*p) {
2134
0
        if (!UPRV_ISALPHANUM(*p)) {
2135
0
            return FALSE;
2136
0
        }
2137
0
        p++;
2138
0
    }
2139
0
    return TRUE;
2140
0
}
2141
2142
static UBool
2143
isWellFormedLegacyType(const char* legacyType)
2144
0
{
2145
0
    const char* p = legacyType;
2146
0
    int32_t alphaNumLen = 0;
2147
0
    while (*p) {
2148
0
        if (*p == '_' || *p == '/' || *p == '-') {
2149
0
            if (alphaNumLen == 0) {
2150
0
                return FALSE;
2151
0
            }
2152
0
            alphaNumLen = 0;
2153
0
        } else if (UPRV_ISALPHANUM(*p)) {
2154
0
            alphaNumLen++;
2155
0
        } else {
2156
0
            return FALSE;
2157
0
        }
2158
0
        p++;
2159
0
    }
2160
0
    return (alphaNumLen != 0);
2161
0
}
2162
2163
U_CAPI const char* U_EXPORT2
2164
uloc_toLegacyKey(const char* keyword)
2165
0
{
2166
0
    const char* legacyKey = ulocimp_toLegacyKey(keyword);
2167
0
    if (legacyKey == NULL) {
2168
        // Checks if the specified locale key is well-formed with the legacy locale syntax.
2169
        //
2170
        // Note:
2171
        //  LDML/CLDR provides some definition of keyword syntax in
2172
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2173
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2174
        //  Keys can only consist of [0-9a-zA-Z].
2175
0
        if (isWellFormedLegacyKey(keyword)) {
2176
0
            return keyword;
2177
0
        }
2178
0
    }
2179
0
    return legacyKey;
2180
0
}
2181
2182
U_CAPI const char* U_EXPORT2
2183
uloc_toLegacyType(const char* keyword, const char* value)
2184
0
{
2185
0
    const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2186
0
    if (legacyType == NULL) {
2187
        // Checks if the specified locale type is well-formed with the legacy locale syntax.
2188
        //
2189
        // Note:
2190
        //  LDML/CLDR provides some definition of keyword syntax in
2191
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2192
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2193
        //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2194
        //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2195
0
        if (isWellFormedLegacyType(value)) {
2196
0
            return value;
2197
0
        }
2198
0
    }
2199
0
    return legacyType;
2200
0
}
2201
2202
/*eof*/