Coverage Report

Created: 2024-04-24 06:23

/src/icu/source/common/uloc.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 1997-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*
9
* File ULOC.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   04/01/97    aliu        Creation.
15
*   08/21/98    stephen     JDK 1.2 sync
16
*   12/08/98    rtg         New Locale implementation and C API
17
*   03/15/99    damiba      overhaul.
18
*   04/06/99    stephen     changed setDefault() to realloc and copy
19
*   06/14/99    stephen     Changed calls to ures_open for new params
20
*   07/21/99    stephen     Modified setDefault() to propagate to C++
21
*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
22
*                           brought canonicalization code into line with spec
23
*****************************************************************************/
24
25
/*
26
   POSIX's locale format, from putil.c: [no spaces]
27
28
     ll [ _CC ] [ . MM ] [ @ VV]
29
30
     l = lang, C = ctry, M = charmap, V = variant
31
*/
32
33
#include "unicode/bytestream.h"
34
#include "unicode/errorcode.h"
35
#include "unicode/stringpiece.h"
36
#include "unicode/utypes.h"
37
#include "unicode/ustring.h"
38
#include "unicode/uloc.h"
39
40
#include "bytesinkutil.h"
41
#include "putilimp.h"
42
#include "ustr_imp.h"
43
#include "ulocimp.h"
44
#include "umutex.h"
45
#include "cstring.h"
46
#include "cmemory.h"
47
#include "locmap.h"
48
#include "uarrsort.h"
49
#include "uenumimp.h"
50
#include "uassert.h"
51
#include "charstr.h"
52
53
U_NAMESPACE_USE
54
55
/* ### Declarations **************************************************/
56
57
/* Locale stuff from locid.cpp */
58
U_CFUNC void locale_set_default(const char *id);
59
U_CFUNC const char *locale_get_default(void);
60
61
/* ### Data tables **************************************************/
62
63
/**
64
 * Table of language codes, both 2- and 3-letter, with preference
65
 * given to 2-letter codes where possible.  Includes 3-letter codes
66
 * that lack a 2-letter equivalent.
67
 *
68
 * This list must be in sorted order.  This list is returned directly
69
 * to the user by some API.
70
 *
71
 * This list must be kept in sync with LANGUAGES_3, with corresponding
72
 * entries matched.
73
 *
74
 * This table should be terminated with a NULL entry, followed by a
75
 * second list, and another NULL entry.  The first list is visible to
76
 * user code when this array is returned by API.  The second list
77
 * contains codes we support, but do not expose through user API.
78
 *
79
 * Notes
80
 *
81
 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82
 * include the revisions up to 2001/7/27 *CWB*
83
 *
84
 * The 3 character codes are the terminology codes like RFC 3066.  This
85
 * is compatible with prior ICU codes
86
 *
87
 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88
 * table but now at the end of the table because 3 character codes are
89
 * duplicates.  This avoids bad searches going from 3 to 2 character
90
 * codes.
91
 *
92
 * The range qaa-qtz is reserved for local use
93
 */
94
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
95
/* ISO639 table version is 20150505 */
96
/* Subsequent hand addition of selected languages */
97
static const char * const LANGUAGES[] = {
98
    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
99
    "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
100
    "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
101
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
102
    "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",
103
    "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
104
    "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
105
    "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",
106
    "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",
107
    "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",
108
    "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",
109
    "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",
110
    "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",
111
    "cs",  "csb", "cu",  "cv",  "cy",
112
    "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",
113
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
114
    "dyo", "dyu", "dz",  "dzg",
115
    "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",
116
    "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",
117
    "ext",
118
    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",
119
    "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",
120
    "frs", "fur", "fy",
121
    "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
122
    "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",
123
    "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",
124
    "gur", "guz", "gv",  "gwi",
125
    "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",
126
    "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",
127
    "hup", "hy",  "hz",
128
    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",
129
    "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",
130
    "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131
    "jv",
132
    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
133
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",
134
    "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",
135
    "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",
136
    "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",
137
    "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",
138
    "kv",  "kw",  "ky",
139
    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",
140
    "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",
141
    "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",
142
    "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",
143
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
144
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",
145
    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
146
    "ml",  "mn",  "mnc", "mni",
147
    "moh", "mos", "mr",  "mrj",
148
    "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",
149
    "my",  "mye", "myv", "mzn",
150
    "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",
151
    "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",
152
    "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",
153
    "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",
154
    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",
155
    "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
156
    "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",
157
    "pon", "prg", "pro", "ps",  "pt",
158
    "qu",  "quc", "qug",
159
    "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",
160
    "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",
161
    "rw",  "rwk",
162
    "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",
163
    "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",
164
    "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",
165
    "sgs", "shi", "shn", "shu", "si",  "sid", "sk",
166
    "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",
167
    "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",
168
    "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",
169
    "sv",  "sw",  "swb", "syc", "syr", "szl",
170
    "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",
171
    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr",
172
    "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tpi",
173
    "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",
174
    "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",
175
    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
176
    "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vo",
177
    "vot", "vro", "vun",
178
    "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",
179
    "xal", "xh",  "xmf", "xog",
180
    "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",
181
    "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",
182
    "zun", "zxx", "zza",
183
NULL,
184
    "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  /* obsolete language codes */
185
NULL
186
};
187
188
static const char* const DEPRECATED_LANGUAGES[]={
189
    "in", "iw", "ji", "jw", "mo", NULL, NULL
190
};
191
static const char* const REPLACEMENT_LANGUAGES[]={
192
    "id", "he", "yi", "jv", "ro", NULL, NULL
193
};
194
195
/**
196
 * Table of 3-letter language codes.
197
 *
198
 * This is a lookup table used to convert 3-letter language codes to
199
 * their 2-letter equivalent, where possible.  It must be kept in sync
200
 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
201
 * same language as LANGUAGES_3[i].  The commented-out lines are
202
 * copied from LANGUAGES to make eyeballing this baby easier.
203
 *
204
 * Where a 3-letter language code has no 2-letter equivalent, the
205
 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
206
 *
207
 * This table should be terminated with a NULL entry, followed by a
208
 * second list, and another NULL entry.  The two lists correspond to
209
 * the two lists in LANGUAGES.
210
 */
211
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
212
/* ISO639 table version is 20150505 */
213
/* Subsequent hand addition of selected languages */
214
static const char * const LANGUAGES_3[] = {
215
    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
216
    "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
217
    "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
218
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
219
    "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
220
    "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
221
    "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
222
    "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
223
    "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
224
    "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
225
    "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
226
    "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
227
    "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
228
    "ces", "csb", "chu", "chv", "cym",
229
    "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
230
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
231
    "dyo", "dyu", "dzo", "dzg",
232
    "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
233
    "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
234
    "ext",
235
    "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
236
    "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
237
    "frs", "fur", "fry",
238
    "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
239
    "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
240
    "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
241
    "gur", "guz", "glv", "gwi",
242
    "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
243
    "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
244
    "hup", "hye", "her",
245
    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
246
    "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
247
    "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
248
    "jav",
249
    "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
250
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
251
    "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
252
    "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
253
    "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
254
    "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
255
    "kom", "cor", "kir",
256
    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
257
    "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
258
    "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
259
    "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
260
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
261
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
262
    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
263
    "mal", "mon", "mnc", "mni",
264
    "moh", "mos", "mar", "mrj",
265
    "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
266
    "mya", "mye", "myv", "mzn",
267
    "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
268
    "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
269
    "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
270
    "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
271
    "oci", "oji", "orm", "ori", "oss", "osa", "ota",
272
    "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
273
    "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
274
    "pon", "prg", "pro", "pus", "por",
275
    "que", "quc", "qug",
276
    "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
277
    "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
278
    "kin", "rwk",
279
    "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
280
    "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
281
    "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
282
    "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
283
    "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
284
    "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
285
    "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
286
    "swe", "swa", "swb", "syc", "syr", "szl",
287
    "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
288
    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
289
    "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
290
    "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
291
    "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
292
    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
293
    "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
294
    "vot", "vro", "vun",
295
    "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
296
    "xal", "xho", "xmf", "xog",
297
    "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
298
    "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
299
    "zun", "zxx", "zza",
300
NULL,
301
/*  "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  */
302
    "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
303
NULL
304
};
305
306
/**
307
 * Table of 2-letter country codes.
308
 *
309
 * This list must be in sorted order.  This list is returned directly
310
 * to the user by some API.
311
 *
312
 * This list must be kept in sync with COUNTRIES_3, with corresponding
313
 * entries matched.
314
 *
315
 * This table should be terminated with a NULL entry, followed by a
316
 * second list, and another NULL entry.  The first list is visible to
317
 * user code when this array is returned by API.  The second list
318
 * contains codes we support, but do not expose through user API.
319
 *
320
 * Notes:
321
 *
322
 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
323
 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
324
 * new codes keeping the old ones for compatibility updated to include
325
 * 1999/12/03 revisions *CWB*
326
 *
327
 * RO(ROM) is now RO(ROU) according to
328
 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
329
 */
330
static const char * const COUNTRIES[] = {
331
    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
332
    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
333
    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
334
    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
335
    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
336
    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
337
    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",
338
    "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",
339
    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
340
    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
341
    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
342
    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
343
    "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
344
    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
345
    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
346
    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
347
    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
348
    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
349
    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
350
    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
351
    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
352
    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
353
    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
354
    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
355
    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
356
    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
357
    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
358
    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
359
    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
360
    "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
361
NULL,
362
    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
363
NULL
364
};
365
366
static const char* const DEPRECATED_COUNTRIES[] = {
367
    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
368
};
369
static const char* const REPLACEMENT_COUNTRIES[] = {
370
/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
371
    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
372
};
373
374
/**
375
 * Table of 3-letter country codes.
376
 *
377
 * This is a lookup table used to convert 3-letter country codes to
378
 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
379
 * For all valid i, COUNTRIES[i] must refer to the same country as
380
 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
381
 * to make eyeballing this baby easier.
382
 *
383
 * This table should be terminated with a NULL entry, followed by a
384
 * second list, and another NULL entry.  The two lists correspond to
385
 * the two lists in COUNTRIES.
386
 */
387
static const char * const COUNTRIES_3[] = {
388
/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
389
    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
390
/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
391
    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
392
/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
393
    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394
/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
395
    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
396
/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
397
    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398
/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
399
    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
400
/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",     */
401
    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
402
/*  "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",     */
403
    "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
404
/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
405
    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406
/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
407
    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
408
/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
409
    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410
/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
411
    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
412
/*  "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
413
    "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
414
/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
415
    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416
/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
417
    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418
/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
419
    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420
/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
421
    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
422
/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
423
    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424
/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
425
    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426
/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
427
    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428
/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
429
    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430
/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
431
    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432
/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
433
    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
434
/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
435
    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436
/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
437
    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
438
/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
439
    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440
/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
441
    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442
/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
443
    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444
/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
445
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
446
/*  "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
447
    "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
448
NULL,
449
/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
450
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
451
NULL
452
};
453
454
typedef struct CanonicalizationMap {
455
    const char *id;          /* input ID */
456
    const char *canonicalID; /* canonicalized output ID */
457
} CanonicalizationMap;
458
459
/**
460
 * A map to canonicalize locale IDs.  This handles a variety of
461
 * different semantic kinds of transformations.
462
 */
463
static const CanonicalizationMap CANONICALIZE_MAP[] = {
464
    { "art__LOJBAN",    "jbo" }, /* registered name */
465
    { "hy__AREVELA",    "hy" }, /* Registered IANA variant */
466
    { "hy__AREVMDA",    "hyw" }, /* Registered IANA variant */
467
    { "zh__GUOYU",      "zh" }, /* registered name */
468
    { "zh__HAKKA",      "hak" }, /* registered name */
469
    { "zh__XIANG",      "hsn" }, /* registered name */
470
    // subtags with 3 chars won't be treated as variants.
471
    { "zh_GAN",         "gan" }, /* registered name */
472
    { "zh_MIN_NAN",     "nan" }, /* registered name */
473
    { "zh_WUU",         "wuu" }, /* registered name */
474
    { "zh_YUE",         "yue" }, /* registered name */
475
};
476
477
/* ### BCP47 Conversion *******************************************/
478
/* Test if the locale id has BCP47 u extension and does not have '@' */
479
0
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
481
static const char* _ConvertBCP47(
482
        const char* id, char* buffer, int32_t length,
483
0
        UErrorCode* err, int32_t* pLocaleIdSize) {
484
0
    const char* finalID;
485
0
    int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);
486
0
    if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
487
0
        finalID=id;
488
0
        if (*err == U_STRING_NOT_TERMINATED_WARNING) {
489
0
            *err = U_BUFFER_OVERFLOW_ERROR;
490
0
        }
491
0
    } else {
492
0
        finalID=buffer;
493
0
    }
494
0
    if (pLocaleIdSize != nullptr) {
495
0
        *pLocaleIdSize = localeIDSize;
496
0
    }
497
0
    return finalID;
498
0
}
499
/* Gets the size of the shortest subtag in the given localeID. */
500
0
static int32_t getShortestSubtagLength(const char *localeID) {
501
0
    int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
502
0
    int32_t length = localeIDLength;
503
0
    int32_t tmpLength = 0;
504
0
    int32_t i;
505
0
    UBool reset = TRUE;
506
507
0
    for (i = 0; i < localeIDLength; i++) {
508
0
        if (localeID[i] != '_' && localeID[i] != '-') {
509
0
            if (reset) {
510
0
                tmpLength = 0;
511
0
                reset = FALSE;
512
0
            }
513
0
            tmpLength++;
514
0
        } else {
515
0
            if (tmpLength != 0 && tmpLength < length) {
516
0
                length = tmpLength;
517
0
            }
518
0
            reset = TRUE;
519
0
        }
520
0
    }
521
522
0
    return length;
523
0
}
524
525
/* ### Keywords **************************************************/
526
0
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
527
0
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
528
/* Punctuation/symbols allowed in legacy key values */
529
0
#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
530
531
0
#define ULOC_KEYWORD_BUFFER_LEN 25
532
0
#define ULOC_MAX_NO_KEYWORDS 25
533
534
U_CAPI const char * U_EXPORT2
535
0
locale_getKeywordsStart(const char *localeID) {
536
0
    const char *result = NULL;
537
0
    if((result = uprv_strchr(localeID, '@')) != NULL) {
538
0
        return result;
539
0
    }
540
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
541
    else {
542
        /* We do this because the @ sign is variant, and the @ sign used on one
543
        EBCDIC machine won't be compiled the same way on other EBCDIC based
544
        machines. */
545
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
546
        const uint8_t *charToFind = ebcdicSigns;
547
        while(*charToFind) {
548
            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
549
                return result;
550
            }
551
            charToFind++;
552
        }
553
    }
554
#endif
555
0
    return NULL;
556
0
}
557
558
/**
559
 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
560
 * @param keywordName incoming name to be canonicalized
561
 * @param status return status (keyword too long)
562
 * @return length of the keyword name
563
 */
564
static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
565
0
{
566
0
  int32_t keywordNameLen = 0;
567
568
0
  for (; *keywordName != 0; keywordName++) {
569
0
    if (!UPRV_ISALPHANUM(*keywordName)) {
570
0
      *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
571
0
      return 0;
572
0
    }
573
0
    if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
574
0
      buf[keywordNameLen++] = uprv_tolower(*keywordName);
575
0
    } else {
576
      /* keyword name too long for internal buffer */
577
0
      *status = U_INTERNAL_PROGRAM_ERROR;
578
0
      return 0;
579
0
    }
580
0
  }
581
0
  if (keywordNameLen == 0) {
582
0
    *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
583
0
    return 0;
584
0
  }
585
0
  buf[keywordNameLen] = 0; /* terminate */
586
587
0
  return keywordNameLen;
588
0
}
589
590
typedef struct {
591
    char keyword[ULOC_KEYWORD_BUFFER_LEN];
592
    int32_t keywordLen;
593
    const char *valueStart;
594
    int32_t valueLen;
595
} KeywordStruct;
596
597
static int32_t U_CALLCONV
598
0
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
599
0
    const char* leftString = ((const KeywordStruct *)left)->keyword;
600
0
    const char* rightString = ((const KeywordStruct *)right)->keyword;
601
0
    return uprv_strcmp(leftString, rightString);
602
0
}
603
604
U_CFUNC void
605
ulocimp_getKeywords(const char *localeID,
606
                    char prev,
607
                    ByteSink& sink,
608
                    UBool valuesToo,
609
                    UErrorCode *status)
610
0
{
611
0
    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
612
613
0
    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
614
0
    int32_t numKeywords = 0;
615
0
    const char* pos = localeID;
616
0
    const char* equalSign = NULL;
617
0
    const char* semicolon = NULL;
618
0
    int32_t i = 0, j, n;
619
620
0
    if(prev == '@') { /* start of keyword definition */
621
        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
622
0
        do {
623
0
            UBool duplicate = FALSE;
624
            /* skip leading spaces */
625
0
            while(*pos == ' ') {
626
0
                pos++;
627
0
            }
628
0
            if (!*pos) { /* handle trailing "; " */
629
0
                break;
630
0
            }
631
0
            if(numKeywords == maxKeywords) {
632
0
                *status = U_INTERNAL_PROGRAM_ERROR;
633
0
                return;
634
0
            }
635
0
            equalSign = uprv_strchr(pos, '=');
636
0
            semicolon = uprv_strchr(pos, ';');
637
            /* lack of '=' [foo@currency] is illegal */
638
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
639
0
            if(!equalSign || (semicolon && semicolon<equalSign)) {
640
0
                *status = U_INVALID_FORMAT_ERROR;
641
0
                return;
642
0
            }
643
            /* need to normalize both keyword and keyword name */
644
0
            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
645
                /* keyword name too long for internal buffer */
646
0
                *status = U_INTERNAL_PROGRAM_ERROR;
647
0
                return;
648
0
            }
649
0
            for(i = 0, n = 0; i < equalSign - pos; ++i) {
650
0
                if (pos[i] != ' ') {
651
0
                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
652
0
                }
653
0
            }
654
655
            /* zero-length keyword is an error. */
656
0
            if (n == 0) {
657
0
                *status = U_INVALID_FORMAT_ERROR;
658
0
                return;
659
0
            }
660
661
0
            keywordList[numKeywords].keyword[n] = 0;
662
0
            keywordList[numKeywords].keywordLen = n;
663
            /* now grab the value part. First we skip the '=' */
664
0
            equalSign++;
665
            /* then we leading spaces */
666
0
            while(*equalSign == ' ') {
667
0
                equalSign++;
668
0
            }
669
670
            /* Premature end or zero-length value */
671
0
            if (!*equalSign || equalSign == semicolon) {
672
0
                *status = U_INVALID_FORMAT_ERROR;
673
0
                return;
674
0
            }
675
676
0
            keywordList[numKeywords].valueStart = equalSign;
677
678
0
            pos = semicolon;
679
0
            i = 0;
680
0
            if(pos) {
681
0
                while(*(pos - i - 1) == ' ') {
682
0
                    i++;
683
0
                }
684
0
                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
685
0
                pos++;
686
0
            } else {
687
0
                i = (int32_t)uprv_strlen(equalSign);
688
0
                while(i && equalSign[i-1] == ' ') {
689
0
                    i--;
690
0
                }
691
0
                keywordList[numKeywords].valueLen = i;
692
0
            }
693
            /* If this is a duplicate keyword, then ignore it */
694
0
            for (j=0; j<numKeywords; ++j) {
695
0
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
696
0
                    duplicate = TRUE;
697
0
                    break;
698
0
                }
699
0
            }
700
0
            if (!duplicate) {
701
0
                ++numKeywords;
702
0
            }
703
0
        } while(pos);
704
705
        /* now we have a list of keywords */
706
        /* we need to sort it */
707
0
        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
708
709
        /* Now construct the keyword part */
710
0
        for(i = 0; i < numKeywords; i++) {
711
0
            sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
712
0
            if(valuesToo) {
713
0
                sink.Append("=", 1);
714
0
                sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
715
0
                if(i < numKeywords - 1) {
716
0
                    sink.Append(";", 1);
717
0
                }
718
0
            } else {
719
0
                sink.Append("\0", 1);
720
0
            }
721
0
        }
722
0
    }
723
0
}
724
725
U_CAPI int32_t U_EXPORT2
726
uloc_getKeywordValue(const char* localeID,
727
                     const char* keywordName,
728
                     char* buffer, int32_t bufferCapacity,
729
                     UErrorCode* status)
730
0
{
731
0
    if (U_FAILURE(*status)) {
732
0
        return 0;
733
0
    }
734
735
0
    CheckedArrayByteSink sink(buffer, bufferCapacity);
736
0
    ulocimp_getKeywordValue(localeID, keywordName, sink, status);
737
738
0
    int32_t reslen = sink.NumberOfBytesAppended();
739
740
0
    if (U_FAILURE(*status)) {
741
0
        return reslen;
742
0
    }
743
744
0
    if (sink.Overflowed()) {
745
0
        *status = U_BUFFER_OVERFLOW_ERROR;
746
0
    } else {
747
0
        u_terminateChars(buffer, bufferCapacity, reslen, status);
748
0
    }
749
750
0
    return reslen;
751
0
}
752
753
U_CAPI void U_EXPORT2
754
ulocimp_getKeywordValue(const char* localeID,
755
                        const char* keywordName,
756
                        icu::ByteSink& sink,
757
                        UErrorCode* status)
758
0
{
759
0
    const char* startSearchHere = NULL;
760
0
    const char* nextSeparator = NULL;
761
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
762
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
763
764
0
    if(status && U_SUCCESS(*status) && localeID) {
765
0
      char tempBuffer[ULOC_FULLNAME_CAPACITY];
766
0
      const char* tmpLocaleID;
767
768
0
      if (keywordName == NULL || keywordName[0] == 0) {
769
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
770
0
        return;
771
0
      }
772
773
0
      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
774
0
      if(U_FAILURE(*status)) {
775
0
        return;
776
0
      }
777
778
0
      if (_hasBCP47Extension(localeID)) {
779
0
          tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
780
0
                                      sizeof(tempBuffer), status, nullptr);
781
0
      } else {
782
0
          tmpLocaleID=localeID;
783
0
      }
784
785
0
      startSearchHere = locale_getKeywordsStart(tmpLocaleID);
786
0
      if(startSearchHere == NULL) {
787
          /* no keywords, return at once */
788
0
          return;
789
0
      }
790
791
      /* find the first keyword */
792
0
      while(startSearchHere) {
793
0
          const char* keyValueTail;
794
0
          int32_t keyValueLen;
795
796
0
          startSearchHere++; /* skip @ or ; */
797
0
          nextSeparator = uprv_strchr(startSearchHere, '=');
798
0
          if(!nextSeparator) {
799
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
800
0
              return;
801
0
          }
802
          /* strip leading & trailing spaces (TC decided to tolerate these) */
803
0
          while(*startSearchHere == ' ') {
804
0
              startSearchHere++;
805
0
          }
806
0
          keyValueTail = nextSeparator;
807
0
          while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
808
0
              keyValueTail--;
809
0
          }
810
          /* now keyValueTail points to first char after the keyName */
811
          /* copy & normalize keyName from locale */
812
0
          if (startSearchHere == keyValueTail) {
813
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
814
0
              return;
815
0
          }
816
0
          keyValueLen = 0;
817
0
          while (startSearchHere < keyValueTail) {
818
0
            if (!UPRV_ISALPHANUM(*startSearchHere)) {
819
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
820
0
              return;
821
0
            }
822
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
823
0
              localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
824
0
            } else {
825
              /* keyword name too long for internal buffer */
826
0
              *status = U_INTERNAL_PROGRAM_ERROR;
827
0
              return;
828
0
            }
829
0
          }
830
0
          localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
831
832
0
          startSearchHere = uprv_strchr(nextSeparator, ';');
833
834
0
          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
835
               /* current entry matches the keyword. */
836
0
             nextSeparator++; /* skip '=' */
837
              /* First strip leading & trailing spaces (TC decided to tolerate these) */
838
0
              while(*nextSeparator == ' ') {
839
0
                nextSeparator++;
840
0
              }
841
0
              keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
842
0
              while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
843
0
                keyValueTail--;
844
0
              }
845
              /* Now copy the value, but check well-formedness */
846
0
              if (nextSeparator == keyValueTail) {
847
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
848
0
                return;
849
0
              }
850
0
              while (nextSeparator < keyValueTail) {
851
0
                if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
852
0
                  *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
853
0
                  return;
854
0
                }
855
                /* Should we lowercase value to return here? Tests expect as-is. */
856
0
                sink.Append(nextSeparator++, 1);
857
0
              }
858
0
              return;
859
0
          }
860
0
      }
861
0
    }
862
0
}
863
864
U_CAPI int32_t U_EXPORT2
865
uloc_setKeywordValue(const char* keywordName,
866
                     const char* keywordValue,
867
                     char* buffer, int32_t bufferCapacity,
868
                     UErrorCode* status)
869
0
{
870
    /* TODO: sorting. removal. */
871
0
    int32_t keywordNameLen;
872
0
    int32_t keywordValueLen;
873
0
    int32_t bufLen;
874
0
    int32_t needLen = 0;
875
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
876
0
    char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
877
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
878
0
    int32_t rc;
879
0
    char* nextSeparator = NULL;
880
0
    char* nextEqualsign = NULL;
881
0
    char* startSearchHere = NULL;
882
0
    char* keywordStart = NULL;
883
0
    CharString updatedKeysAndValues;
884
0
    UBool handledInputKeyAndValue = FALSE;
885
0
    char keyValuePrefix = '@';
886
887
0
    if(U_FAILURE(*status)) {
888
0
        return -1;
889
0
    }
890
0
    if (*status == U_STRING_NOT_TERMINATED_WARNING) {
891
0
        *status = U_ZERO_ERROR;
892
0
    }
893
0
    if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
894
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
895
0
        return 0;
896
0
    }
897
0
    bufLen = (int32_t)uprv_strlen(buffer);
898
0
    if(bufferCapacity<bufLen) {
899
        /* The capacity is less than the length?! Is this NULL terminated? */
900
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
901
0
        return 0;
902
0
    }
903
0
    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
904
0
    if(U_FAILURE(*status)) {
905
0
        return 0;
906
0
    }
907
908
0
    keywordValueLen = 0;
909
0
    if(keywordValue) {
910
0
        while (*keywordValue != 0) {
911
0
            if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
912
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
913
0
                return 0;
914
0
            }
915
0
            if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
916
                /* Should we force lowercase in value to set? */
917
0
                keywordValueBuffer[keywordValueLen++] = *keywordValue++;
918
0
            } else {
919
                /* keywordValue too long for internal buffer */
920
0
                *status = U_INTERNAL_PROGRAM_ERROR;
921
0
                return 0;
922
0
            }
923
0
        }
924
0
    }
925
0
    keywordValueBuffer[keywordValueLen] = 0; /* terminate */
926
927
0
    startSearchHere = (char*)locale_getKeywordsStart(buffer);
928
0
    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
929
0
        if(keywordValueLen == 0) { /* no keywords = nothing to remove */
930
0
            U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
931
0
            return bufLen;
932
0
        }
933
934
0
        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
935
0
        if(startSearchHere) { /* had a single @ */
936
0
            needLen--; /* already had the @ */
937
            /* startSearchHere points at the @ */
938
0
        } else {
939
0
            startSearchHere=buffer+bufLen;
940
0
        }
941
0
        if(needLen >= bufferCapacity) {
942
0
            *status = U_BUFFER_OVERFLOW_ERROR;
943
0
            return needLen; /* no change */
944
0
        }
945
0
        *startSearchHere++ = '@';
946
0
        uprv_strcpy(startSearchHere, keywordNameBuffer);
947
0
        startSearchHere += keywordNameLen;
948
0
        *startSearchHere++ = '=';
949
0
        uprv_strcpy(startSearchHere, keywordValueBuffer);
950
0
        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
951
0
        return needLen;
952
0
    } /* end shortcut - no @ */
953
954
0
    keywordStart = startSearchHere;
955
    /* search for keyword */
956
0
    while(keywordStart) {
957
0
        const char* keyValueTail;
958
0
        int32_t keyValueLen;
959
960
0
        keywordStart++; /* skip @ or ; */
961
0
        nextEqualsign = uprv_strchr(keywordStart, '=');
962
0
        if (!nextEqualsign) {
963
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
964
0
            return 0;
965
0
        }
966
        /* strip leading & trailing spaces (TC decided to tolerate these) */
967
0
        while(*keywordStart == ' ') {
968
0
            keywordStart++;
969
0
        }
970
0
        keyValueTail = nextEqualsign;
971
0
        while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
972
0
            keyValueTail--;
973
0
        }
974
        /* now keyValueTail points to first char after the keyName */
975
        /* copy & normalize keyName from locale */
976
0
        if (keywordStart == keyValueTail) {
977
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
978
0
            return 0;
979
0
        }
980
0
        keyValueLen = 0;
981
0
        while (keywordStart < keyValueTail) {
982
0
            if (!UPRV_ISALPHANUM(*keywordStart)) {
983
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
984
0
                return 0;
985
0
            }
986
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
987
0
                localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
988
0
            } else {
989
                /* keyword name too long for internal buffer */
990
0
                *status = U_INTERNAL_PROGRAM_ERROR;
991
0
                return 0;
992
0
            }
993
0
        }
994
0
        localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
995
996
0
        nextSeparator = uprv_strchr(nextEqualsign, ';');
997
998
        /* start processing the value part */
999
0
        nextEqualsign++; /* skip '=' */
1000
        /* First strip leading & trailing spaces (TC decided to tolerate these) */
1001
0
        while(*nextEqualsign == ' ') {
1002
0
            nextEqualsign++;
1003
0
        }
1004
0
        keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1005
0
        while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1006
0
            keyValueTail--;
1007
0
        }
1008
0
        if (nextEqualsign == keyValueTail) {
1009
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1010
0
            return 0;
1011
0
        }
1012
1013
0
        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1014
0
        if(rc == 0) {
1015
            /* Current entry matches the input keyword. Update the entry */
1016
0
            if(keywordValueLen > 0) { /* updating a value */
1017
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1018
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1019
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1020
0
                updatedKeysAndValues.append('=', *status);
1021
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1022
0
            } /* else removing this entry, don't emit anything */
1023
0
            handledInputKeyAndValue = TRUE;
1024
0
        } else {
1025
           /* input keyword sorts earlier than current entry, add before current entry */
1026
0
            if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1027
                /* insert new entry at this location */
1028
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1029
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1030
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1031
0
                updatedKeysAndValues.append('=', *status);
1032
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1033
0
                handledInputKeyAndValue = TRUE;
1034
0
            }
1035
            /* copy the current entry */
1036
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1037
0
            keyValuePrefix = ';'; /* for any subsequent key-value pair */
1038
0
            updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1039
0
            updatedKeysAndValues.append('=', *status);
1040
0
            updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
1041
0
        }
1042
0
        if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1043
            /* append new entry at the end, it sorts later than existing entries */
1044
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1045
            /* skip keyValuePrefix update, no subsequent key-value pair */
1046
0
            updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1047
0
            updatedKeysAndValues.append('=', *status);
1048
0
            updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1049
0
            handledInputKeyAndValue = TRUE;
1050
0
        }
1051
0
        keywordStart = nextSeparator;
1052
0
    } /* end loop searching */
1053
1054
    /* Any error from updatedKeysAndValues.append above would be internal and not due to
1055
     * problems with the passed-in locale. So if we did encounter problems with the
1056
     * passed-in locale above, those errors took precedence and overrode any error
1057
     * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1058
     * are errors here they are from updatedKeysAndValues.append; they do cause an
1059
     * error return but the passed-in locale is unmodified and the original bufLen is
1060
     * returned.
1061
     */
1062
0
    if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1063
        /* if input key/value specified removal of a keyword not present in locale, or
1064
         * there was an error in CharString.append, leave original locale alone. */
1065
0
        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1066
0
        return bufLen;
1067
0
    }
1068
1069
    // needLen = length of the part before '@'
1070
0
    needLen = (int32_t)(startSearchHere - buffer);
1071
    // Check to see can we fit the startSearchHere, if not, return
1072
    // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1073
    // We do this because this API function does not behave like most others:
1074
    // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1075
    // When the contents fits but without the terminating NUL, in this case we need to not change
1076
    // the buffer contents and return with a buffer overflow error.
1077
0
    int32_t appendLength = updatedKeysAndValues.length();
1078
0
    if (appendLength >= bufferCapacity - needLen) {
1079
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1080
0
        return needLen + appendLength;
1081
0
    }
1082
0
    needLen += updatedKeysAndValues.extract(
1083
0
                         startSearchHere, bufferCapacity - needLen, *status);
1084
0
    U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1085
0
    return needLen;
1086
0
}
1087
1088
/* ### ID parsing implementation **************************************************/
1089
1090
0
#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1091
1092
/*returns TRUE if one of the special prefixes is here (s=string)
1093
  'x-' or 'i-' */
1094
0
#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1095
1096
/* Dot terminates it because of POSIX form  where dot precedes the codepage
1097
 * except for variant
1098
 */
1099
0
#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1100
1101
/**
1102
 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1103
 * a NULL entry, followed by more entries, and a second NULL entry.
1104
 *
1105
 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1106
 * COUNTRIES_3.
1107
 */
1108
static int16_t _findIndex(const char* const* list, const char* key)
1109
0
{
1110
0
    const char* const* anchor = list;
1111
0
    int32_t pass = 0;
1112
1113
    /* Make two passes through two NULL-terminated arrays at 'list' */
1114
0
    while (pass++ < 2) {
1115
0
        while (*list) {
1116
0
            if (uprv_strcmp(key, *list) == 0) {
1117
0
                return (int16_t)(list - anchor);
1118
0
            }
1119
0
            list++;
1120
0
        }
1121
0
        ++list;     /* skip final NULL *CWB*/
1122
0
    }
1123
0
    return -1;
1124
0
}
1125
1126
U_CFUNC const char*
1127
0
uloc_getCurrentCountryID(const char* oldID){
1128
0
    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1129
0
    if (offset >= 0) {
1130
0
        return REPLACEMENT_COUNTRIES[offset];
1131
0
    }
1132
0
    return oldID;
1133
0
}
1134
U_CFUNC const char*
1135
0
uloc_getCurrentLanguageID(const char* oldID){
1136
0
    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1137
0
    if (offset >= 0) {
1138
0
        return REPLACEMENT_LANGUAGES[offset];
1139
0
    }
1140
0
    return oldID;
1141
0
}
1142
/*
1143
 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1144
 * avoid duplicating code to handle the earlier locale ID pieces
1145
 * in the functions for the later ones by
1146
 * setting the *pEnd pointer to where they stopped parsing
1147
 *
1148
 * TODO try to use this in Locale
1149
 */
1150
CharString U_EXPORT2
1151
ulocimp_getLanguage(const char *localeID,
1152
                    const char **pEnd,
1153
0
                    UErrorCode &status) {
1154
0
    CharString result;
1155
1156
0
    if (uprv_stricmp(localeID, "root") == 0) {
1157
0
        localeID += 4;
1158
0
    } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
1159
0
               (localeID[3] == '\0' ||
1160
0
                localeID[3] == '-' ||
1161
0
                localeID[3] == '_' ||
1162
0
                localeID[3] == '@')) {
1163
0
        localeID += 3;
1164
0
    }
1165
1166
    /* if it starts with i- or x- then copy that prefix */
1167
0
    if(_isIDPrefix(localeID)) {
1168
0
        result.append((char)uprv_tolower(*localeID), status);
1169
0
        result.append('-', status);
1170
0
        localeID+=2;
1171
0
    }
1172
1173
    /* copy the language as far as possible and count its length */
1174
0
    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1175
0
        result.append((char)uprv_tolower(*localeID), status);
1176
0
        localeID++;
1177
0
    }
1178
1179
0
    if(result.length()==3) {
1180
        /* convert 3 character code to 2 character code if possible *CWB*/
1181
0
        int32_t offset = _findIndex(LANGUAGES_3, result.data());
1182
0
        if(offset>=0) {
1183
0
            result.clear();
1184
0
            result.append(LANGUAGES[offset], status);
1185
0
        }
1186
0
    }
1187
1188
0
    if(pEnd!=NULL) {
1189
0
        *pEnd=localeID;
1190
0
    }
1191
1192
0
    return result;
1193
0
}
1194
1195
CharString U_EXPORT2
1196
ulocimp_getScript(const char *localeID,
1197
                  const char **pEnd,
1198
0
                  UErrorCode &status) {
1199
0
    CharString result;
1200
0
    int32_t idLen = 0;
1201
1202
0
    if (pEnd != NULL) {
1203
0
        *pEnd = localeID;
1204
0
    }
1205
1206
    /* copy the second item as far as possible and count its length */
1207
0
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1208
0
            && uprv_isASCIILetter(localeID[idLen])) {
1209
0
        idLen++;
1210
0
    }
1211
1212
    /* If it's exactly 4 characters long, then it's a script and not a country. */
1213
0
    if (idLen == 4) {
1214
0
        int32_t i;
1215
0
        if (pEnd != NULL) {
1216
0
            *pEnd = localeID+idLen;
1217
0
        }
1218
0
        if (idLen >= 1) {
1219
0
            result.append((char)uprv_toupper(*(localeID++)), status);
1220
0
        }
1221
0
        for (i = 1; i < idLen; i++) {
1222
0
            result.append((char)uprv_tolower(*(localeID++)), status);
1223
0
        }
1224
0
    }
1225
1226
0
    return result;
1227
0
}
1228
1229
CharString U_EXPORT2
1230
ulocimp_getCountry(const char *localeID,
1231
                   const char **pEnd,
1232
0
                   UErrorCode &status) {
1233
0
    CharString result;
1234
0
    int32_t idLen=0;
1235
1236
    /* copy the country as far as possible and count its length */
1237
0
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1238
0
        result.append((char)uprv_toupper(localeID[idLen]), status);
1239
0
        idLen++;
1240
0
    }
1241
1242
    /* the country should be either length 2 or 3 */
1243
0
    if (idLen == 2 || idLen == 3) {
1244
        /* convert 3 character code to 2 character code if possible *CWB*/
1245
0
        if(idLen==3) {
1246
0
            int32_t offset = _findIndex(COUNTRIES_3, result.data());
1247
0
            if(offset>=0) {
1248
0
                result.clear();
1249
0
                result.append(COUNTRIES[offset], status);
1250
0
            }
1251
0
        }
1252
0
        localeID+=idLen;
1253
0
    } else {
1254
0
        result.clear();
1255
0
    }
1256
1257
0
    if(pEnd!=NULL) {
1258
0
        *pEnd=localeID;
1259
0
    }
1260
1261
0
    return result;
1262
0
}
1263
1264
/**
1265
 * @param needSeparator if true, then add leading '_' if any variants
1266
 * are added to 'variant'
1267
 */
1268
static void
1269
_getVariant(const char *localeID,
1270
            char prev,
1271
            ByteSink& sink,
1272
0
            UBool needSeparator) {
1273
0
    UBool hasVariant = FALSE;
1274
1275
    /* get one or more variant tags and separate them with '_' */
1276
0
    if(_isIDSeparator(prev)) {
1277
        /* get a variant string after a '-' or '_' */
1278
0
        while(!_isTerminator(*localeID)) {
1279
0
            if (needSeparator) {
1280
0
                sink.Append("_", 1);
1281
0
                needSeparator = FALSE;
1282
0
            }
1283
0
            char c = (char)uprv_toupper(*localeID);
1284
0
            if (c == '-') c = '_';
1285
0
            sink.Append(&c, 1);
1286
0
            hasVariant = TRUE;
1287
0
            localeID++;
1288
0
        }
1289
0
    }
1290
1291
    /* if there is no variant tag after a '-' or '_' then look for '@' */
1292
0
    if(!hasVariant) {
1293
0
        if(prev=='@') {
1294
            /* keep localeID */
1295
0
        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1296
0
            ++localeID; /* point after the '@' */
1297
0
        } else {
1298
0
            return;
1299
0
        }
1300
0
        while(!_isTerminator(*localeID)) {
1301
0
            if (needSeparator) {
1302
0
                sink.Append("_", 1);
1303
0
                needSeparator = FALSE;
1304
0
            }
1305
0
            char c = (char)uprv_toupper(*localeID);
1306
0
            if (c == '-' || c == ',') c = '_';
1307
0
            sink.Append(&c, 1);
1308
0
            localeID++;
1309
0
        }
1310
0
    }
1311
0
}
1312
1313
/* Keyword enumeration */
1314
1315
typedef struct UKeywordsContext {
1316
    char* keywords;
1317
    char* current;
1318
} UKeywordsContext;
1319
1320
U_CDECL_BEGIN
1321
1322
static void U_CALLCONV
1323
0
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1324
0
    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1325
0
    uprv_free(enumerator->context);
1326
0
    uprv_free(enumerator);
1327
0
}
1328
1329
static int32_t U_CALLCONV
1330
0
uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1331
0
    char *kw = ((UKeywordsContext *)en->context)->keywords;
1332
0
    int32_t result = 0;
1333
0
    while(*kw) {
1334
0
        result++;
1335
0
        kw += uprv_strlen(kw)+1;
1336
0
    }
1337
0
    return result;
1338
0
}
1339
1340
static const char * U_CALLCONV
1341
uloc_kw_nextKeyword(UEnumeration* en,
1342
                    int32_t* resultLength,
1343
0
                    UErrorCode* /*status*/) {
1344
0
    const char* result = ((UKeywordsContext *)en->context)->current;
1345
0
    int32_t len = 0;
1346
0
    if(*result) {
1347
0
        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1348
0
        ((UKeywordsContext *)en->context)->current += len+1;
1349
0
    } else {
1350
0
        result = NULL;
1351
0
    }
1352
0
    if (resultLength) {
1353
0
        *resultLength = len;
1354
0
    }
1355
0
    return result;
1356
0
}
1357
1358
static void U_CALLCONV
1359
uloc_kw_resetKeywords(UEnumeration* en,
1360
0
                      UErrorCode* /*status*/) {
1361
0
    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1362
0
}
1363
1364
U_CDECL_END
1365
1366
1367
static const UEnumeration gKeywordsEnum = {
1368
    NULL,
1369
    NULL,
1370
    uloc_kw_closeKeywords,
1371
    uloc_kw_countKeywords,
1372
    uenum_unextDefault,
1373
    uloc_kw_nextKeyword,
1374
    uloc_kw_resetKeywords
1375
};
1376
1377
U_CAPI UEnumeration* U_EXPORT2
1378
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1379
0
{
1380
0
    LocalMemory<UKeywordsContext> myContext;
1381
0
    LocalMemory<UEnumeration> result;
1382
1383
0
    if (U_FAILURE(*status)) {
1384
0
        return nullptr;
1385
0
    }
1386
0
    myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1387
0
    result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1388
0
    if (myContext.isNull() || result.isNull()) {
1389
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1390
0
        return nullptr;
1391
0
    }
1392
0
    uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1393
0
    myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1394
0
    if (myContext->keywords == nullptr) {
1395
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1396
0
        return nullptr;
1397
0
    }
1398
0
    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1399
0
    myContext->keywords[keywordListSize] = 0;
1400
0
    myContext->current = myContext->keywords;
1401
0
    result->context = myContext.orphan();
1402
0
    return result.orphan();
1403
0
}
1404
1405
U_CAPI UEnumeration* U_EXPORT2
1406
uloc_openKeywords(const char* localeID,
1407
                        UErrorCode* status)
1408
0
{
1409
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1410
0
    const char* tmpLocaleID;
1411
1412
0
    if(status==NULL || U_FAILURE(*status)) {
1413
0
        return 0;
1414
0
    }
1415
1416
0
    if (_hasBCP47Extension(localeID)) {
1417
0
        tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
1418
0
                                    sizeof(tempBuffer), status, nullptr);
1419
0
    } else {
1420
0
        if (localeID==NULL) {
1421
0
            localeID=uloc_getDefault();
1422
0
        }
1423
0
        tmpLocaleID=localeID;
1424
0
    }
1425
1426
    /* Skip the language */
1427
0
    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
1428
0
    if (U_FAILURE(*status)) {
1429
0
        return 0;
1430
0
    }
1431
1432
0
    if(_isIDSeparator(*tmpLocaleID)) {
1433
0
        const char *scriptID;
1434
        /* Skip the script if available */
1435
0
        ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
1436
0
        if (U_FAILURE(*status)) {
1437
0
            return 0;
1438
0
        }
1439
0
        if(scriptID != tmpLocaleID+1) {
1440
            /* Found optional script */
1441
0
            tmpLocaleID = scriptID;
1442
0
        }
1443
        /* Skip the Country */
1444
0
        if (_isIDSeparator(*tmpLocaleID)) {
1445
0
            ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
1446
0
            if (U_FAILURE(*status)) {
1447
0
                return 0;
1448
0
            }
1449
0
        }
1450
0
    }
1451
1452
    /* keywords are located after '@' */
1453
0
    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1454
0
        CharString keywords;
1455
0
        CharStringByteSink sink(&keywords);
1456
0
        ulocimp_getKeywords(tmpLocaleID+1, '@', sink, FALSE, status);
1457
0
        if (U_FAILURE(*status)) {
1458
0
            return NULL;
1459
0
        }
1460
0
        return uloc_openKeywordList(keywords.data(), keywords.length(), status);
1461
0
    }
1462
0
    return NULL;
1463
0
}
1464
1465
1466
/* bit-flags for 'options' parameter of _canonicalize */
1467
0
#define _ULOC_STRIP_KEYWORDS 0x2
1468
0
#define _ULOC_CANONICALIZE   0x1
1469
1470
0
#define OPTION_SET(options, mask) ((options & mask) != 0)
1471
1472
static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1473
0
#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1474
1475
/**
1476
 * Canonicalize the given localeID, to level 1 or to level 2,
1477
 * depending on the options.  To specify level 1, pass in options=0.
1478
 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1479
 *
1480
 * This is the code underlying uloc_getName and uloc_canonicalize.
1481
 */
1482
static void
1483
_canonicalize(const char* localeID,
1484
              ByteSink& sink,
1485
              uint32_t options,
1486
0
              UErrorCode* err) {
1487
0
    if (U_FAILURE(*err)) {
1488
0
        return;
1489
0
    }
1490
1491
0
    int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
1492
0
    PreflightingLocaleIDBuffer tempBuffer;  // if localeID has a BCP47 extension, tmpLocaleID points to this
1493
0
    CharString localeIDWithHyphens;  // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
1494
0
    const char* origLocaleID;
1495
0
    const char* tmpLocaleID;
1496
0
    const char* keywordAssign = NULL;
1497
0
    const char* separatorIndicator = NULL;
1498
1499
0
    if (_hasBCP47Extension(localeID)) {
1500
0
        const char* localeIDPtr = localeID;
1501
1502
        // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
1503
0
        if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') {
1504
0
            localeIDWithHyphens.append(localeID, -1, *err);
1505
0
            if (U_SUCCESS(*err)) {
1506
0
                for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
1507
0
                    if (*p == '_') {
1508
0
                        *p = '-';
1509
0
                    }
1510
0
                }
1511
0
                localeIDPtr = localeIDWithHyphens.data();
1512
0
            }
1513
0
        }
1514
1515
0
        do {
1516
            // After this call tmpLocaleID may point to localeIDPtr which may
1517
            // point to either localeID or localeIDWithHyphens.data().
1518
0
            tmpLocaleID = _ConvertBCP47(localeIDPtr, tempBuffer.getBuffer(),
1519
0
                                        tempBuffer.getCapacity(), err,
1520
0
                                        &(tempBuffer.requestedCapacity));
1521
0
        } while (tempBuffer.needToTryAgain(err));
1522
0
    } else {
1523
0
        if (localeID==NULL) {
1524
0
           localeID=uloc_getDefault();
1525
0
        }
1526
0
        tmpLocaleID=localeID;
1527
0
    }
1528
1529
0
    origLocaleID=tmpLocaleID;
1530
1531
    /* get all pieces, one after another, and separate with '_' */
1532
0
    CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1533
1534
0
    if (tag.length() == I_DEFAULT_LENGTH &&
1535
0
            uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
1536
0
        tag.clear();
1537
0
        tag.append(uloc_getDefault(), *err);
1538
0
    } else if(_isIDSeparator(*tmpLocaleID)) {
1539
0
        const char *scriptID;
1540
1541
0
        ++fieldCount;
1542
0
        tag.append('_', *err);
1543
1544
0
        CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1545
0
        tag.append(script, *err);
1546
0
        scriptSize = script.length();
1547
0
        if(scriptSize > 0) {
1548
            /* Found optional script */
1549
0
            tmpLocaleID = scriptID;
1550
0
            ++fieldCount;
1551
0
            if (_isIDSeparator(*tmpLocaleID)) {
1552
                /* If there is something else, then we add the _ */
1553
0
                tag.append('_', *err);
1554
0
            }
1555
0
        }
1556
1557
0
        if (_isIDSeparator(*tmpLocaleID)) {
1558
0
            const char *cntryID;
1559
1560
0
            CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1561
0
            tag.append(country, *err);
1562
0
            if (!country.isEmpty()) {
1563
                /* Found optional country */
1564
0
                tmpLocaleID = cntryID;
1565
0
            }
1566
0
            if(_isIDSeparator(*tmpLocaleID)) {
1567
                /* If there is something else, then we add the _  if we found country before. */
1568
0
                if (!_isIDSeparator(*(tmpLocaleID+1))) {
1569
0
                    ++fieldCount;
1570
0
                    tag.append('_', *err);
1571
0
                }
1572
1573
0
                variantSize = -tag.length();
1574
0
                {
1575
0
                    CharStringByteSink s(&tag);
1576
0
                    _getVariant(tmpLocaleID+1, *tmpLocaleID, s, FALSE);
1577
0
                }
1578
0
                variantSize += tag.length();
1579
0
                if (variantSize > 0) {
1580
0
                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1581
0
                }
1582
0
            }
1583
0
        }
1584
0
    }
1585
1586
    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1587
0
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1588
0
        UBool done = FALSE;
1589
0
        do {
1590
0
            char c = *tmpLocaleID;
1591
0
            switch (c) {
1592
0
            case 0:
1593
0
            case '@':
1594
0
                done = TRUE;
1595
0
                break;
1596
0
            default:
1597
0
                tag.append(c, *err);
1598
0
                ++tmpLocaleID;
1599
0
                break;
1600
0
            }
1601
0
        } while (!done);
1602
0
    }
1603
1604
    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1605
       After this, tmpLocaleID either points to '@' or is NULL */
1606
0
    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1607
0
        keywordAssign = uprv_strchr(tmpLocaleID, '=');
1608
0
        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1609
0
    }
1610
1611
    /* Copy POSIX-style variant, if any [mr@FOO] */
1612
0
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1613
0
        tmpLocaleID != NULL && keywordAssign == NULL) {
1614
0
        for (;;) {
1615
0
            char c = *tmpLocaleID;
1616
0
            if (c == 0) {
1617
0
                break;
1618
0
            }
1619
0
            tag.append(c, *err);
1620
0
            ++tmpLocaleID;
1621
0
        }
1622
0
    }
1623
1624
0
    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1625
        /* Handle @FOO variant if @ is present and not followed by = */
1626
0
        if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1627
            /* Add missing '_' if needed */
1628
0
            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1629
0
                do {
1630
0
                    tag.append('_', *err);
1631
0
                    ++fieldCount;
1632
0
                } while(fieldCount<2);
1633
0
            }
1634
1635
0
            int32_t posixVariantSize = -tag.length();
1636
0
            {
1637
0
                CharStringByteSink s(&tag);
1638
0
                _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
1639
0
            }
1640
0
            posixVariantSize += tag.length();
1641
0
            if (posixVariantSize > 0) {
1642
0
                variantSize += posixVariantSize;
1643
0
            }
1644
0
        }
1645
1646
        /* Look up the ID in the canonicalization map */
1647
0
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1648
0
            StringPiece id(CANONICALIZE_MAP[j].id);
1649
0
            if (tag == id) {
1650
0
                if (id.empty() && tmpLocaleID != NULL) {
1651
0
                    break; /* Don't remap "" if keywords present */
1652
0
                }
1653
0
                tag.clear();
1654
0
                tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
1655
0
                break;
1656
0
            }
1657
0
        }
1658
0
    }
1659
1660
0
    sink.Append(tag.data(), tag.length());
1661
1662
0
    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1663
0
        if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1664
0
            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1665
0
            sink.Append("@", 1);
1666
0
            ++fieldCount;
1667
0
            ulocimp_getKeywords(tmpLocaleID+1, '@', sink, TRUE, err);
1668
0
        }
1669
0
    }
1670
0
}
1671
1672
/* ### ID parsing API **************************************************/
1673
1674
U_CAPI int32_t  U_EXPORT2
1675
uloc_getParent(const char*    localeID,
1676
               char* parent,
1677
               int32_t parentCapacity,
1678
               UErrorCode* err)
1679
0
{
1680
0
    const char *lastUnderscore;
1681
0
    int32_t i;
1682
1683
0
    if (U_FAILURE(*err))
1684
0
        return 0;
1685
1686
0
    if (localeID == NULL)
1687
0
        localeID = uloc_getDefault();
1688
1689
0
    lastUnderscore=uprv_strrchr(localeID, '_');
1690
0
    if(lastUnderscore!=NULL) {
1691
0
        i=(int32_t)(lastUnderscore-localeID);
1692
0
    } else {
1693
0
        i=0;
1694
0
    }
1695
1696
0
    if (i > 0) {
1697
0
        if (uprv_strnicmp(localeID, "und_", 4) == 0) {
1698
0
            localeID += 3;
1699
0
            i -= 3;
1700
0
            uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1701
0
        } else if (parent != localeID) {
1702
0
            uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1703
0
        }
1704
0
    }
1705
1706
0
    return u_terminateChars(parent, parentCapacity, i, err);
1707
0
}
1708
1709
U_CAPI int32_t U_EXPORT2
1710
uloc_getLanguage(const char*    localeID,
1711
         char* language,
1712
         int32_t languageCapacity,
1713
         UErrorCode* err)
1714
0
{
1715
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1716
1717
0
    if (err==NULL || U_FAILURE(*err)) {
1718
0
        return 0;
1719
0
    }
1720
1721
0
    if(localeID==NULL) {
1722
0
        localeID=uloc_getDefault();
1723
0
    }
1724
1725
0
    return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
1726
0
}
1727
1728
U_CAPI int32_t U_EXPORT2
1729
uloc_getScript(const char*    localeID,
1730
         char* script,
1731
         int32_t scriptCapacity,
1732
         UErrorCode* err)
1733
0
{
1734
0
    if(err==NULL || U_FAILURE(*err)) {
1735
0
        return 0;
1736
0
    }
1737
1738
0
    if(localeID==NULL) {
1739
0
        localeID=uloc_getDefault();
1740
0
    }
1741
1742
    /* skip the language */
1743
0
    ulocimp_getLanguage(localeID, &localeID, *err);
1744
0
    if (U_FAILURE(*err)) {
1745
0
        return 0;
1746
0
    }
1747
1748
0
    if(_isIDSeparator(*localeID)) {
1749
0
        return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
1750
0
    }
1751
0
    return u_terminateChars(script, scriptCapacity, 0, err);
1752
0
}
1753
1754
U_CAPI int32_t  U_EXPORT2
1755
uloc_getCountry(const char* localeID,
1756
            char* country,
1757
            int32_t countryCapacity,
1758
            UErrorCode* err)
1759
0
{
1760
0
    if(err==NULL || U_FAILURE(*err)) {
1761
0
        return 0;
1762
0
    }
1763
1764
0
    if(localeID==NULL) {
1765
0
        localeID=uloc_getDefault();
1766
0
    }
1767
1768
    /* Skip the language */
1769
0
    ulocimp_getLanguage(localeID, &localeID, *err);
1770
0
    if (U_FAILURE(*err)) {
1771
0
        return 0;
1772
0
    }
1773
1774
0
    if(_isIDSeparator(*localeID)) {
1775
0
        const char *scriptID;
1776
        /* Skip the script if available */
1777
0
        ulocimp_getScript(localeID+1, &scriptID, *err);
1778
0
        if (U_FAILURE(*err)) {
1779
0
            return 0;
1780
0
        }
1781
0
        if(scriptID != localeID+1) {
1782
            /* Found optional script */
1783
0
            localeID = scriptID;
1784
0
        }
1785
0
        if(_isIDSeparator(*localeID)) {
1786
0
            return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
1787
0
        }
1788
0
    }
1789
0
    return u_terminateChars(country, countryCapacity, 0, err);
1790
0
}
1791
1792
U_CAPI int32_t  U_EXPORT2
1793
uloc_getVariant(const char* localeID,
1794
                char* variant,
1795
                int32_t variantCapacity,
1796
                UErrorCode* err)
1797
0
{
1798
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1799
0
    const char* tmpLocaleID;
1800
0
    int32_t i=0;
1801
1802
0
    if(err==NULL || U_FAILURE(*err)) {
1803
0
        return 0;
1804
0
    }
1805
1806
0
    if (_hasBCP47Extension(localeID)) {
1807
0
        tmpLocaleID =_ConvertBCP47(localeID, tempBuffer, sizeof(tempBuffer), err, nullptr);
1808
0
    } else {
1809
0
        if (localeID==NULL) {
1810
0
           localeID=uloc_getDefault();
1811
0
        }
1812
0
        tmpLocaleID=localeID;
1813
0
    }
1814
1815
    /* Skip the language */
1816
0
    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1817
0
    if (U_FAILURE(*err)) {
1818
0
        return 0;
1819
0
    }
1820
1821
0
    if(_isIDSeparator(*tmpLocaleID)) {
1822
0
        const char *scriptID;
1823
        /* Skip the script if available */
1824
0
        ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1825
0
        if (U_FAILURE(*err)) {
1826
0
            return 0;
1827
0
        }
1828
0
        if(scriptID != tmpLocaleID+1) {
1829
            /* Found optional script */
1830
0
            tmpLocaleID = scriptID;
1831
0
        }
1832
        /* Skip the Country */
1833
0
        if (_isIDSeparator(*tmpLocaleID)) {
1834
0
            const char *cntryID;
1835
0
            ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1836
0
            if (U_FAILURE(*err)) {
1837
0
                return 0;
1838
0
            }
1839
0
            if (cntryID != tmpLocaleID+1) {
1840
                /* Found optional country */
1841
0
                tmpLocaleID = cntryID;
1842
0
            }
1843
0
            if(_isIDSeparator(*tmpLocaleID)) {
1844
                /* If there was no country ID, skip a possible extra IDSeparator */
1845
0
                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1846
0
                    tmpLocaleID++;
1847
0
                }
1848
1849
0
                CheckedArrayByteSink sink(variant, variantCapacity);
1850
0
                _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, FALSE);
1851
1852
0
                i = sink.NumberOfBytesAppended();
1853
1854
0
                if (U_FAILURE(*err)) {
1855
0
                    return i;
1856
0
                }
1857
1858
0
                if (sink.Overflowed()) {
1859
0
                    *err = U_BUFFER_OVERFLOW_ERROR;
1860
0
                    return i;
1861
0
                }
1862
0
            }
1863
0
        }
1864
0
    }
1865
1866
0
    return u_terminateChars(variant, variantCapacity, i, err);
1867
0
}
1868
1869
U_CAPI int32_t  U_EXPORT2
1870
uloc_getName(const char* localeID,
1871
             char* name,
1872
             int32_t nameCapacity,
1873
             UErrorCode* err)
1874
0
{
1875
0
    if (U_FAILURE(*err)) {
1876
0
        return 0;
1877
0
    }
1878
1879
0
    CheckedArrayByteSink sink(name, nameCapacity);
1880
0
    ulocimp_getName(localeID, sink, err);
1881
1882
0
    int32_t reslen = sink.NumberOfBytesAppended();
1883
1884
0
    if (U_FAILURE(*err)) {
1885
0
        return reslen;
1886
0
    }
1887
1888
0
    if (sink.Overflowed()) {
1889
0
        *err = U_BUFFER_OVERFLOW_ERROR;
1890
0
    } else {
1891
0
        u_terminateChars(name, nameCapacity, reslen, err);
1892
0
    }
1893
1894
0
    return reslen;
1895
0
}
1896
1897
U_CAPI void U_EXPORT2
1898
ulocimp_getName(const char* localeID,
1899
                ByteSink& sink,
1900
                UErrorCode* err)
1901
0
{
1902
0
    _canonicalize(localeID, sink, 0, err);
1903
0
}
1904
1905
U_CAPI int32_t  U_EXPORT2
1906
uloc_getBaseName(const char* localeID,
1907
                 char* name,
1908
                 int32_t nameCapacity,
1909
                 UErrorCode* err)
1910
0
{
1911
0
    if (U_FAILURE(*err)) {
1912
0
        return 0;
1913
0
    }
1914
1915
0
    CheckedArrayByteSink sink(name, nameCapacity);
1916
0
    ulocimp_getBaseName(localeID, sink, err);
1917
1918
0
    int32_t reslen = sink.NumberOfBytesAppended();
1919
1920
0
    if (U_FAILURE(*err)) {
1921
0
        return reslen;
1922
0
    }
1923
1924
0
    if (sink.Overflowed()) {
1925
0
        *err = U_BUFFER_OVERFLOW_ERROR;
1926
0
    } else {
1927
0
        u_terminateChars(name, nameCapacity, reslen, err);
1928
0
    }
1929
1930
0
    return reslen;
1931
0
}
1932
1933
U_CAPI void U_EXPORT2
1934
ulocimp_getBaseName(const char* localeID,
1935
                    ByteSink& sink,
1936
                    UErrorCode* err)
1937
0
{
1938
0
    _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
1939
0
}
1940
1941
U_CAPI int32_t  U_EXPORT2
1942
uloc_canonicalize(const char* localeID,
1943
                  char* name,
1944
                  int32_t nameCapacity,
1945
                  UErrorCode* err)
1946
0
{
1947
0
    if (U_FAILURE(*err)) {
1948
0
        return 0;
1949
0
    }
1950
1951
0
    CheckedArrayByteSink sink(name, nameCapacity);
1952
0
    ulocimp_canonicalize(localeID, sink, err);
1953
1954
0
    int32_t reslen = sink.NumberOfBytesAppended();
1955
1956
0
    if (U_FAILURE(*err)) {
1957
0
        return reslen;
1958
0
    }
1959
1960
0
    if (sink.Overflowed()) {
1961
0
        *err = U_BUFFER_OVERFLOW_ERROR;
1962
0
    } else {
1963
0
        u_terminateChars(name, nameCapacity, reslen, err);
1964
0
    }
1965
1966
0
    return reslen;
1967
0
}
1968
1969
U_CAPI void U_EXPORT2
1970
ulocimp_canonicalize(const char* localeID,
1971
                     ByteSink& sink,
1972
                     UErrorCode* err)
1973
0
{
1974
0
    _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
1975
0
}
1976
1977
U_CAPI const char*  U_EXPORT2
1978
uloc_getISO3Language(const char* localeID)
1979
0
{
1980
0
    int16_t offset;
1981
0
    char lang[ULOC_LANG_CAPACITY];
1982
0
    UErrorCode err = U_ZERO_ERROR;
1983
1984
0
    if (localeID == NULL)
1985
0
    {
1986
0
        localeID = uloc_getDefault();
1987
0
    }
1988
0
    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1989
0
    if (U_FAILURE(err))
1990
0
        return "";
1991
0
    offset = _findIndex(LANGUAGES, lang);
1992
0
    if (offset < 0)
1993
0
        return "";
1994
0
    return LANGUAGES_3[offset];
1995
0
}
1996
1997
U_CAPI const char*  U_EXPORT2
1998
uloc_getISO3Country(const char* localeID)
1999
0
{
2000
0
    int16_t offset;
2001
0
    char cntry[ULOC_LANG_CAPACITY];
2002
0
    UErrorCode err = U_ZERO_ERROR;
2003
2004
0
    if (localeID == NULL)
2005
0
    {
2006
0
        localeID = uloc_getDefault();
2007
0
    }
2008
0
    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2009
0
    if (U_FAILURE(err))
2010
0
        return "";
2011
0
    offset = _findIndex(COUNTRIES, cntry);
2012
0
    if (offset < 0)
2013
0
        return "";
2014
2015
0
    return COUNTRIES_3[offset];
2016
0
}
2017
2018
U_CAPI uint32_t  U_EXPORT2
2019
uloc_getLCID(const char* localeID)
2020
0
{
2021
0
    UErrorCode status = U_ZERO_ERROR;
2022
0
    char       langID[ULOC_FULLNAME_CAPACITY];
2023
0
    uint32_t   lcid = 0;
2024
2025
    /* Check for incomplete id. */
2026
0
    if (!localeID || uprv_strlen(localeID) < 2) {
2027
0
        return 0;
2028
0
    }
2029
2030
    // First, attempt Windows platform lookup if available, but fall
2031
    // through to catch any special cases (ICU vs Windows name differences).
2032
0
    lcid = uprv_convertToLCIDPlatform(localeID, &status);
2033
0
    if (U_FAILURE(status)) {
2034
0
        return 0;
2035
0
    }
2036
0
    if (lcid > 0) {
2037
        // Windows found an LCID, return that
2038
0
        return lcid;
2039
0
    }
2040
2041
0
    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2042
0
    if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
2043
0
        return 0;
2044
0
    }
2045
2046
0
    if (uprv_strchr(localeID, '@')) {
2047
        // uprv_convertToLCID does not support keywords other than collation.
2048
        // Remove all keywords except collation.
2049
0
        int32_t len;
2050
0
        char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2051
2052
0
        CharString collVal;
2053
0
        {
2054
0
            CharStringByteSink sink(&collVal);
2055
0
            ulocimp_getKeywordValue(localeID, "collation", sink, &status);
2056
0
        }
2057
2058
0
        if (U_SUCCESS(status) && !collVal.isEmpty()) {
2059
0
            len = uloc_getBaseName(localeID, tmpLocaleID,
2060
0
                UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2061
2062
0
            if (U_SUCCESS(status) && len > 0) {
2063
0
                tmpLocaleID[len] = 0;
2064
2065
0
                len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
2066
0
                    UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2067
2068
0
                if (U_SUCCESS(status) && len > 0) {
2069
0
                    tmpLocaleID[len] = 0;
2070
0
                    return uprv_convertToLCID(langID, tmpLocaleID, &status);
2071
0
                }
2072
0
            }
2073
0
        }
2074
2075
        // fall through - all keywords are simply ignored
2076
0
        status = U_ZERO_ERROR;
2077
0
    }
2078
2079
0
    return uprv_convertToLCID(langID, localeID, &status);
2080
0
}
2081
2082
U_CAPI int32_t U_EXPORT2
2083
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2084
                UErrorCode *status)
2085
0
{
2086
0
    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2087
0
}
2088
2089
/* ### Default locale **************************************************/
2090
2091
U_CAPI const char*  U_EXPORT2
2092
uloc_getDefault()
2093
0
{
2094
0
    return locale_get_default();
2095
0
}
2096
2097
U_CAPI void  U_EXPORT2
2098
uloc_setDefault(const char*   newDefaultLocale,
2099
             UErrorCode* err)
2100
0
{
2101
0
    if (U_FAILURE(*err))
2102
0
        return;
2103
    /* the error code isn't currently used for anything by this function*/
2104
2105
    /* propagate change to C++ */
2106
0
    locale_set_default(newDefaultLocale);
2107
0
}
2108
2109
/**
2110
 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2111
 * to an array of pointers to arrays of char.  All of these pointers are owned
2112
 * by ICU-- do not delete them, and do not write through them.  The array is
2113
 * terminated with a null pointer.
2114
 */
2115
U_CAPI const char* const*  U_EXPORT2
2116
uloc_getISOLanguages()
2117
0
{
2118
0
    return LANGUAGES;
2119
0
}
2120
2121
/**
2122
 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2123
 * pointer to an array of pointers to arrays of char.  All of these pointers are
2124
 * owned by ICU-- do not delete them, and do not write through them.  The array is
2125
 * terminated with a null pointer.
2126
 */
2127
U_CAPI const char* const*  U_EXPORT2
2128
uloc_getISOCountries()
2129
0
{
2130
0
    return COUNTRIES;
2131
0
}
2132
2133
U_CAPI const char* U_EXPORT2
2134
uloc_toUnicodeLocaleKey(const char* keyword)
2135
0
{
2136
0
    const char* bcpKey = ulocimp_toBcpKey(keyword);
2137
0
    if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2138
        // unknown keyword, but syntax is fine..
2139
0
        return keyword;
2140
0
    }
2141
0
    return bcpKey;
2142
0
}
2143
2144
U_CAPI const char* U_EXPORT2
2145
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2146
0
{
2147
0
    const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2148
0
    if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2149
        // unknown keyword, but syntax is fine..
2150
0
        return value;
2151
0
    }
2152
0
    return bcpType;
2153
0
}
2154
2155
static UBool
2156
isWellFormedLegacyKey(const char* legacyKey)
2157
0
{
2158
0
    const char* p = legacyKey;
2159
0
    while (*p) {
2160
0
        if (!UPRV_ISALPHANUM(*p)) {
2161
0
            return FALSE;
2162
0
        }
2163
0
        p++;
2164
0
    }
2165
0
    return TRUE;
2166
0
}
2167
2168
static UBool
2169
isWellFormedLegacyType(const char* legacyType)
2170
0
{
2171
0
    const char* p = legacyType;
2172
0
    int32_t alphaNumLen = 0;
2173
0
    while (*p) {
2174
0
        if (*p == '_' || *p == '/' || *p == '-') {
2175
0
            if (alphaNumLen == 0) {
2176
0
                return FALSE;
2177
0
            }
2178
0
            alphaNumLen = 0;
2179
0
        } else if (UPRV_ISALPHANUM(*p)) {
2180
0
            alphaNumLen++;
2181
0
        } else {
2182
0
            return FALSE;
2183
0
        }
2184
0
        p++;
2185
0
    }
2186
0
    return (alphaNumLen != 0);
2187
0
}
2188
2189
U_CAPI const char* U_EXPORT2
2190
uloc_toLegacyKey(const char* keyword)
2191
0
{
2192
0
    const char* legacyKey = ulocimp_toLegacyKey(keyword);
2193
0
    if (legacyKey == NULL) {
2194
        // Checks if the specified locale key is well-formed with the legacy locale syntax.
2195
        //
2196
        // Note:
2197
        //  LDML/CLDR provides some definition of keyword syntax in
2198
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2199
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2200
        //  Keys can only consist of [0-9a-zA-Z].
2201
0
        if (isWellFormedLegacyKey(keyword)) {
2202
0
            return keyword;
2203
0
        }
2204
0
    }
2205
0
    return legacyKey;
2206
0
}
2207
2208
U_CAPI const char* U_EXPORT2
2209
uloc_toLegacyType(const char* keyword, const char* value)
2210
0
{
2211
0
    const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2212
0
    if (legacyType == NULL) {
2213
        // Checks if the specified locale type is well-formed with the legacy locale syntax.
2214
        //
2215
        // Note:
2216
        //  LDML/CLDR provides some definition of keyword syntax in
2217
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2218
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2219
        //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2220
        //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2221
0
        if (isWellFormedLegacyType(value)) {
2222
0
            return value;
2223
0
        }
2224
0
    }
2225
0
    return legacyType;
2226
0
}
2227
2228
/*eof*/