Coverage Report

Created: 2022-11-20 06:13

/src/icu/icu4c/source/common/uloc.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 1997-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*
9
* File ULOC.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   04/01/97    aliu        Creation.
15
*   08/21/98    stephen     JDK 1.2 sync
16
*   12/08/98    rtg         New Locale implementation and C API
17
*   03/15/99    damiba      overhaul.
18
*   04/06/99    stephen     changed setDefault() to realloc and copy
19
*   06/14/99    stephen     Changed calls to ures_open for new params
20
*   07/21/99    stephen     Modified setDefault() to propagate to C++
21
*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
22
*                           brought canonicalization code into line with spec
23
*****************************************************************************/
24
25
/*
26
   POSIX's locale format, from putil.c: [no spaces]
27
28
     ll [ _CC ] [ . MM ] [ @ VV]
29
30
     l = lang, C = ctry, M = charmap, V = variant
31
*/
32
33
#include "unicode/bytestream.h"
34
#include "unicode/errorcode.h"
35
#include "unicode/stringpiece.h"
36
#include "unicode/utypes.h"
37
#include "unicode/ustring.h"
38
#include "unicode/uloc.h"
39
40
#include "bytesinkutil.h"
41
#include "putilimp.h"
42
#include "ustr_imp.h"
43
#include "ulocimp.h"
44
#include "umutex.h"
45
#include "cstring.h"
46
#include "cmemory.h"
47
#include "locmap.h"
48
#include "uarrsort.h"
49
#include "uenumimp.h"
50
#include "uassert.h"
51
#include "charstr.h"
52
53
U_NAMESPACE_USE
54
55
/* ### Declarations **************************************************/
56
57
/* Locale stuff from locid.cpp */
58
U_CFUNC void locale_set_default(const char *id);
59
U_CFUNC const char *locale_get_default(void);
60
61
/* ### Data tables **************************************************/
62
63
/**
64
 * Table of language codes, both 2- and 3-letter, with preference
65
 * given to 2-letter codes where possible.  Includes 3-letter codes
66
 * that lack a 2-letter equivalent.
67
 *
68
 * This list must be in sorted order.  This list is returned directly
69
 * to the user by some API.
70
 *
71
 * This list must be kept in sync with LANGUAGES_3, with corresponding
72
 * entries matched.
73
 *
74
 * This table should be terminated with a NULL entry, followed by a
75
 * second list, and another NULL entry.  The first list is visible to
76
 * user code when this array is returned by API.  The second list
77
 * contains codes we support, but do not expose through user API.
78
 *
79
 * Notes
80
 *
81
 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82
 * include the revisions up to 2001/7/27 *CWB*
83
 *
84
 * The 3 character codes are the terminology codes like RFC 3066.  This
85
 * is compatible with prior ICU codes
86
 *
87
 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88
 * table but now at the end of the table because 3 character codes are
89
 * duplicates.  This avoids bad searches going from 3 to 2 character
90
 * codes.
91
 *
92
 * The range qaa-qtz is reserved for local use
93
 */
94
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
95
/* ISO639 table version is 20150505 */
96
/* Subsequent hand addition of selected languages */
97
static const char * const LANGUAGES[] = {
98
    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
99
    "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
100
    "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
101
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
102
    "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",
103
    "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
104
    "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
105
    "bgc", "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",
106
    "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",
107
    "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",
108
    "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",
109
    "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",
110
    "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",
111
    "cs",  "csb", "cu",  "cv",  "cy",
112
    "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",
113
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
114
    "dyo", "dyu", "dz",  "dzg",
115
    "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",
116
    "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",
117
    "ext",
118
    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",
119
    "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",
120
    "frs", "fur", "fy",
121
    "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
122
    "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",
123
    "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",
124
    "gur", "guz", "gv",  "gwi",
125
    "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",
126
    "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",
127
    "hup", "hy",  "hz",
128
    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",
129
    "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",
130
    "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131
    "jv",
132
    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
133
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",
134
    "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",
135
    "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",
136
    "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",
137
    "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",
138
    "kv",  "kw",  "ky",
139
    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",
140
    "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",
141
    "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",
142
    "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",
143
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
144
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",
145
    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
146
    "ml",  "mn",  "mnc", "mni",
147
    "moh", "mos", "mr",  "mrj",
148
    "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",
149
    "my",  "mye", "myv", "mzn",
150
    "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",
151
    "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",
152
    "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",
153
    "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",
154
    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",
155
    "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
156
    "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",
157
    "pon", "prg", "pro", "ps",  "pt",
158
    "qu",  "quc", "qug",
159
    "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",
160
    "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",
161
    "rw",  "rwk",
162
    "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",
163
    "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",
164
    "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",
165
    "sgs", "shi", "shn", "shu", "si",  "sid", "sk",
166
    "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",
167
    "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",
168
    "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",
169
    "sv",  "sw",  "swb", "syc", "syr", "szl",
170
    "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",
171
    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr",
172
    "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tpi",
173
    "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",
174
    "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",
175
    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
176
    "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vo",
177
    "vot", "vro", "vun",
178
    "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",
179
    "xal", "xh",  "xmf", "xog",
180
    "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",
181
    "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",
182
    "zun", "zxx", "zza",
183
NULL,
184
    "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  /* obsolete language codes */
185
NULL
186
};
187
188
static const char* const DEPRECATED_LANGUAGES[]={
189
    "in", "iw", "ji", "jw", "mo", NULL, NULL
190
};
191
static const char* const REPLACEMENT_LANGUAGES[]={
192
    "id", "he", "yi", "jv", "ro", NULL, NULL
193
};
194
195
/**
196
 * Table of 3-letter language codes.
197
 *
198
 * This is a lookup table used to convert 3-letter language codes to
199
 * their 2-letter equivalent, where possible.  It must be kept in sync
200
 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
201
 * same language as LANGUAGES_3[i].  The commented-out lines are
202
 * copied from LANGUAGES to make eyeballing this baby easier.
203
 *
204
 * Where a 3-letter language code has no 2-letter equivalent, the
205
 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
206
 *
207
 * This table should be terminated with a NULL entry, followed by a
208
 * second list, and another NULL entry.  The two lists correspond to
209
 * the two lists in LANGUAGES.
210
 */
211
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
212
/* ISO639 table version is 20150505 */
213
/* Subsequent hand addition of selected languages */
214
static const char * const LANGUAGES_3[] = {
215
    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
216
    "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
217
    "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
218
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
219
    "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
220
    "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
221
    "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
222
    "bgc", "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
223
    "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
224
    "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
225
    "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
226
    "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
227
    "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
228
    "ces", "csb", "chu", "chv", "cym",
229
    "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
230
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
231
    "dyo", "dyu", "dzo", "dzg",
232
    "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
233
    "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
234
    "ext",
235
    "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
236
    "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
237
    "frs", "fur", "fry",
238
    "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
239
    "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
240
    "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
241
    "gur", "guz", "glv", "gwi",
242
    "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
243
    "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
244
    "hup", "hye", "her",
245
    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
246
    "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
247
    "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
248
    "jav",
249
    "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
250
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
251
    "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
252
    "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
253
    "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
254
    "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
255
    "kom", "cor", "kir",
256
    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
257
    "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
258
    "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
259
    "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
260
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
261
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
262
    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
263
    "mal", "mon", "mnc", "mni",
264
    "moh", "mos", "mar", "mrj",
265
    "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
266
    "mya", "mye", "myv", "mzn",
267
    "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
268
    "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
269
    "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
270
    "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
271
    "oci", "oji", "orm", "ori", "oss", "osa", "ota",
272
    "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
273
    "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
274
    "pon", "prg", "pro", "pus", "por",
275
    "que", "quc", "qug",
276
    "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
277
    "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
278
    "kin", "rwk",
279
    "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
280
    "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
281
    "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
282
    "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
283
    "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
284
    "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
285
    "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
286
    "swe", "swa", "swb", "syc", "syr", "szl",
287
    "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
288
    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
289
    "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
290
    "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
291
    "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
292
    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
293
    "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
294
    "vot", "vro", "vun",
295
    "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
296
    "xal", "xho", "xmf", "xog",
297
    "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
298
    "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
299
    "zun", "zxx", "zza",
300
NULL,
301
/*  "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  */
302
    "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
303
NULL
304
};
305
306
/**
307
 * Table of 2-letter country codes.
308
 *
309
 * This list must be in sorted order.  This list is returned directly
310
 * to the user by some API.
311
 *
312
 * This list must be kept in sync with COUNTRIES_3, with corresponding
313
 * entries matched.
314
 *
315
 * This table should be terminated with a NULL entry, followed by a
316
 * second list, and another NULL entry.  The first list is visible to
317
 * user code when this array is returned by API.  The second list
318
 * contains codes we support, but do not expose through user API.
319
 *
320
 * Notes:
321
 *
322
 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
323
 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
324
 * new codes keeping the old ones for compatibility updated to include
325
 * 1999/12/03 revisions *CWB*
326
 *
327
 * RO(ROM) is now RO(ROU) according to
328
 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
329
 */
330
static const char * const COUNTRIES[] = {
331
    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
332
    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
333
    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
334
    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
335
    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
336
    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
337
    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",
338
    "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",
339
    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
340
    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
341
    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
342
    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
343
    "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
344
    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
345
    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
346
    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
347
    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
348
    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
349
    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
350
    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
351
    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
352
    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
353
    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
354
    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
355
    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
356
    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
357
    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
358
    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
359
    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
360
    "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
361
NULL,
362
    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
363
NULL
364
};
365
366
static const char* const DEPRECATED_COUNTRIES[] = {
367
    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
368
};
369
static const char* const REPLACEMENT_COUNTRIES[] = {
370
/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
371
    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
372
};
373
374
/**
375
 * Table of 3-letter country codes.
376
 *
377
 * This is a lookup table used to convert 3-letter country codes to
378
 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
379
 * For all valid i, COUNTRIES[i] must refer to the same country as
380
 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
381
 * to make eyeballing this baby easier.
382
 *
383
 * This table should be terminated with a NULL entry, followed by a
384
 * second list, and another NULL entry.  The two lists correspond to
385
 * the two lists in COUNTRIES.
386
 */
387
static const char * const COUNTRIES_3[] = {
388
/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
389
    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
390
/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
391
    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
392
/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
393
    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394
/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
395
    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
396
/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
397
    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398
/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
399
    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
400
/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",     */
401
    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
402
/*  "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",     */
403
    "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
404
/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
405
    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406
/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
407
    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
408
/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
409
    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410
/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
411
    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
412
/*  "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
413
    "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
414
/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
415
    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416
/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
417
    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418
/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
419
    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420
/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
421
    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
422
/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
423
    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424
/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
425
    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426
/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
427
    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428
/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
429
    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430
/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
431
    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432
/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
433
    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
434
/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
435
    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436
/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
437
    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
438
/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
439
    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440
/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
441
    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442
/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
443
    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444
/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
445
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
446
/*  "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
447
    "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
448
NULL,
449
/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
450
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
451
NULL
452
};
453
454
typedef struct CanonicalizationMap {
455
    const char *id;          /* input ID */
456
    const char *canonicalID; /* canonicalized output ID */
457
} CanonicalizationMap;
458
459
/**
460
 * A map to canonicalize locale IDs.  This handles a variety of
461
 * different semantic kinds of transformations.
462
 */
463
static const CanonicalizationMap CANONICALIZE_MAP[] = {
464
    { "art__LOJBAN",    "jbo" }, /* registered name */
465
    { "hy__AREVELA",    "hy" }, /* Registered IANA variant */
466
    { "hy__AREVMDA",    "hyw" }, /* Registered IANA variant */
467
    { "zh__GUOYU",      "zh" }, /* registered name */
468
    { "zh__HAKKA",      "hak" }, /* registered name */
469
    { "zh__XIANG",      "hsn" }, /* registered name */
470
    // subtags with 3 chars won't be treated as variants.
471
    { "zh_GAN",         "gan" }, /* registered name */
472
    { "zh_MIN_NAN",     "nan" }, /* registered name */
473
    { "zh_WUU",         "wuu" }, /* registered name */
474
    { "zh_YUE",         "yue" }, /* registered name */
475
};
476
477
/* ### BCP47 Conversion *******************************************/
478
/* Test if the locale id has BCP47 u extension and does not have '@' */
479
3.55k
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
481
static const char* _ConvertBCP47(
482
        const char* id, char* buffer, int32_t length,
483
3.37k
        UErrorCode* err, int32_t* pLocaleIdSize) {
484
3.37k
    const char* finalID;
485
3.37k
    int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);
486
3.37k
    if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
487
1.41k
        finalID=id;
488
1.41k
        if (*err == U_STRING_NOT_TERMINATED_WARNING) {
489
267
            *err = U_BUFFER_OVERFLOW_ERROR;
490
267
        }
491
1.95k
    } else {
492
1.95k
        finalID=buffer;
493
1.95k
    }
494
3.37k
    if (pLocaleIdSize != nullptr) {
495
3.37k
        *pLocaleIdSize = localeIDSize;
496
3.37k
    }
497
3.37k
    return finalID;
498
3.37k
}
499
/* Gets the size of the shortest subtag in the given localeID. */
500
2.97k
static int32_t getShortestSubtagLength(const char *localeID) {
501
2.97k
    int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
502
2.97k
    int32_t length = localeIDLength;
503
2.97k
    int32_t tmpLength = 0;
504
2.97k
    int32_t i;
505
2.97k
    UBool reset = true;
506
507
203k
    for (i = 0; i < localeIDLength; i++) {
508
200k
        if (localeID[i] != '_' && localeID[i] != '-') {
509
161k
            if (reset) {
510
38.3k
                tmpLength = 0;
511
38.3k
                reset = false;
512
38.3k
            }
513
161k
            tmpLength++;
514
161k
        } else {
515
38.4k
            if (tmpLength != 0 && tmpLength < length) {
516
4.45k
                length = tmpLength;
517
4.45k
            }
518
38.4k
            reset = true;
519
38.4k
        }
520
200k
    }
521
522
2.97k
    return length;
523
2.97k
}
524
525
/* ### Keywords **************************************************/
526
20.3k
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
527
48.5k
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
528
/* Punctuation/symbols allowed in legacy key values */
529
0
#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
530
531
13.2k
#define ULOC_KEYWORD_BUFFER_LEN 25
532
2.08k
#define ULOC_MAX_NO_KEYWORDS 25
533
534
U_CAPI const char * U_EXPORT2
535
3.67k
locale_getKeywordsStart(const char *localeID) {
536
3.67k
    const char *result = NULL;
537
3.67k
    if((result = uprv_strchr(localeID, '@')) != NULL) {
538
2.21k
        return result;
539
2.21k
    }
540
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
541
    else {
542
        /* We do this because the @ sign is variant, and the @ sign used on one
543
        EBCDIC machine won't be compiled the same way on other EBCDIC based
544
        machines. */
545
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
546
        const uint8_t *charToFind = ebcdicSigns;
547
        while(*charToFind) {
548
            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
549
                return result;
550
            }
551
            charToFind++;
552
        }
553
    }
554
#endif
555
1.45k
    return NULL;
556
3.67k
}
557
558
/**
559
 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
560
 * @param keywordName incoming name to be canonicalized
561
 * @param status return status (keyword too long)
562
 * @return length of the keyword name
563
 */
564
static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
565
0
{
566
0
  int32_t keywordNameLen = 0;
567
568
0
  for (; *keywordName != 0; keywordName++) {
569
0
    if (!UPRV_ISALPHANUM(*keywordName)) {
570
0
      *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
571
0
      return 0;
572
0
    }
573
0
    if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
574
0
      buf[keywordNameLen++] = uprv_tolower(*keywordName);
575
0
    } else {
576
      /* keyword name too long for internal buffer */
577
0
      *status = U_INTERNAL_PROGRAM_ERROR;
578
0
      return 0;
579
0
    }
580
0
  }
581
0
  if (keywordNameLen == 0) {
582
0
    *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
583
0
    return 0;
584
0
  }
585
0
  buf[keywordNameLen] = 0; /* terminate */
586
587
0
  return keywordNameLen;
588
0
}
589
590
typedef struct {
591
    char keyword[ULOC_KEYWORD_BUFFER_LEN];
592
    int32_t keywordLen;
593
    const char *valueStart;
594
    int32_t valueLen;
595
} KeywordStruct;
596
597
static int32_t U_CALLCONV
598
40.3k
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
599
40.3k
    const char* leftString = ((const KeywordStruct *)left)->keyword;
600
40.3k
    const char* rightString = ((const KeywordStruct *)right)->keyword;
601
40.3k
    return uprv_strcmp(leftString, rightString);
602
40.3k
}
603
604
U_CFUNC void
605
ulocimp_getKeywords(const char *localeID,
606
                    char prev,
607
                    ByteSink& sink,
608
                    UBool valuesToo,
609
                    UErrorCode *status)
610
2.08k
{
611
2.08k
    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
612
613
2.08k
    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
614
2.08k
    int32_t numKeywords = 0;
615
2.08k
    const char* pos = localeID;
616
2.08k
    const char* equalSign = NULL;
617
2.08k
    const char* semicolon = NULL;
618
2.08k
    int32_t i = 0, j, n;
619
620
2.08k
    if(prev == '@') { /* start of keyword definition */
621
        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
622
13.2k
        do {
623
13.2k
            UBool duplicate = false;
624
            /* skip leading spaces */
625
13.4k
            while(*pos == ' ') {
626
213
                pos++;
627
213
            }
628
13.2k
            if (!*pos) { /* handle trailing "; " */
629
16
                break;
630
16
            }
631
13.2k
            if(numKeywords == maxKeywords) {
632
22
                *status = U_INTERNAL_PROGRAM_ERROR;
633
22
                return;
634
22
            }
635
13.2k
            equalSign = uprv_strchr(pos, '=');
636
13.2k
            semicolon = uprv_strchr(pos, ';');
637
            /* lack of '=' [foo@currency] is illegal */
638
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
639
13.2k
            if(!equalSign || (semicolon && semicolon<equalSign)) {
640
11
                *status = U_INVALID_FORMAT_ERROR;
641
11
                return;
642
11
            }
643
            /* need to normalize both keyword and keyword name */
644
13.2k
            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
645
                /* keyword name too long for internal buffer */
646
14
                *status = U_INTERNAL_PROGRAM_ERROR;
647
14
                return;
648
14
            }
649
111k
            for(i = 0, n = 0; i < equalSign - pos; ++i) {
650
98.4k
                if (pos[i] != ' ') {
651
98.2k
                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
652
98.2k
                }
653
98.4k
            }
654
655
            /* zero-length keyword is an error. */
656
13.1k
            if (n == 0) {
657
21
                *status = U_INVALID_FORMAT_ERROR;
658
21
                return;
659
21
            }
660
661
13.1k
            keywordList[numKeywords].keyword[n] = 0;
662
13.1k
            keywordList[numKeywords].keywordLen = n;
663
            /* now grab the value part. First we skip the '=' */
664
13.1k
            equalSign++;
665
            /* then we leading spaces */
666
13.3k
            while(*equalSign == ' ') {
667
203
                equalSign++;
668
203
            }
669
670
            /* Premature end or zero-length value */
671
13.1k
            if (!*equalSign || equalSign == semicolon) {
672
40
                *status = U_INVALID_FORMAT_ERROR;
673
40
                return;
674
40
            }
675
676
13.1k
            keywordList[numKeywords].valueStart = equalSign;
677
678
13.1k
            pos = semicolon;
679
13.1k
            i = 0;
680
13.1k
            if(pos) {
681
11.3k
                while(*(pos - i - 1) == ' ') {
682
210
                    i++;
683
210
                }
684
11.1k
                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
685
11.1k
                pos++;
686
11.1k
            } else {
687
1.96k
                i = (int32_t)uprv_strlen(equalSign);
688
2.33k
                while(i && equalSign[i-1] == ' ') {
689
374
                    i--;
690
374
                }
691
1.96k
                keywordList[numKeywords].valueLen = i;
692
1.96k
            }
693
            /* If this is a duplicate keyword, then ignore it */
694
84.9k
            for (j=0; j<numKeywords; ++j) {
695
72.3k
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
696
563
                    duplicate = true;
697
563
                    break;
698
563
                }
699
72.3k
            }
700
13.1k
            if (!duplicate) {
701
12.5k
                ++numKeywords;
702
12.5k
            }
703
13.1k
        } while(pos);
704
705
        /* now we have a list of keywords */
706
        /* we need to sort it */
707
1.97k
        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, false, status);
708
709
        /* Now construct the keyword part */
710
13.9k
        for(i = 0; i < numKeywords; i++) {
711
11.9k
            sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
712
11.9k
            if(valuesToo) {
713
11.9k
                sink.Append("=", 1);
714
11.9k
                sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
715
11.9k
                if(i < numKeywords - 1) {
716
10.0k
                    sink.Append(";", 1);
717
10.0k
                }
718
11.9k
            } else {
719
0
                sink.Append("\0", 1);
720
0
            }
721
11.9k
        }
722
1.97k
    }
723
2.08k
}
724
725
U_CAPI int32_t U_EXPORT2
726
uloc_getKeywordValue(const char* localeID,
727
                     const char* keywordName,
728
                     char* buffer, int32_t bufferCapacity,
729
                     UErrorCode* status)
730
0
{
731
0
    if (U_FAILURE(*status)) {
732
0
        return 0;
733
0
    }
734
735
0
    CheckedArrayByteSink sink(buffer, bufferCapacity);
736
0
    ulocimp_getKeywordValue(localeID, keywordName, sink, status);
737
738
0
    int32_t reslen = sink.NumberOfBytesAppended();
739
740
0
    if (U_FAILURE(*status)) {
741
0
        return reslen;
742
0
    }
743
744
0
    if (sink.Overflowed()) {
745
0
        *status = U_BUFFER_OVERFLOW_ERROR;
746
0
    } else {
747
0
        u_terminateChars(buffer, bufferCapacity, reslen, status);
748
0
    }
749
750
0
    return reslen;
751
0
}
752
753
U_CAPI void U_EXPORT2
754
ulocimp_getKeywordValue(const char* localeID,
755
                        const char* keywordName,
756
                        icu::ByteSink& sink,
757
                        UErrorCode* status)
758
0
{
759
0
    const char* startSearchHere = NULL;
760
0
    const char* nextSeparator = NULL;
761
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
762
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
763
764
0
    if(status && U_SUCCESS(*status) && localeID) {
765
0
      char tempBuffer[ULOC_FULLNAME_CAPACITY];
766
0
      const char* tmpLocaleID;
767
768
0
      if (keywordName == NULL || keywordName[0] == 0) {
769
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
770
0
        return;
771
0
      }
772
773
0
      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
774
0
      if(U_FAILURE(*status)) {
775
0
        return;
776
0
      }
777
778
0
      if (_hasBCP47Extension(localeID)) {
779
0
          tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
780
0
                                      sizeof(tempBuffer), status, nullptr);
781
0
      } else {
782
0
          tmpLocaleID=localeID;
783
0
      }
784
785
0
      startSearchHere = locale_getKeywordsStart(tmpLocaleID);
786
0
      if(startSearchHere == NULL) {
787
          /* no keywords, return at once */
788
0
          return;
789
0
      }
790
791
      /* find the first keyword */
792
0
      while(startSearchHere) {
793
0
          const char* keyValueTail;
794
0
          int32_t keyValueLen;
795
796
0
          startSearchHere++; /* skip @ or ; */
797
0
          nextSeparator = uprv_strchr(startSearchHere, '=');
798
0
          if(!nextSeparator) {
799
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
800
0
              return;
801
0
          }
802
          /* strip leading & trailing spaces (TC decided to tolerate these) */
803
0
          while(*startSearchHere == ' ') {
804
0
              startSearchHere++;
805
0
          }
806
0
          keyValueTail = nextSeparator;
807
0
          while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
808
0
              keyValueTail--;
809
0
          }
810
          /* now keyValueTail points to first char after the keyName */
811
          /* copy & normalize keyName from locale */
812
0
          if (startSearchHere == keyValueTail) {
813
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
814
0
              return;
815
0
          }
816
0
          keyValueLen = 0;
817
0
          while (startSearchHere < keyValueTail) {
818
0
            if (!UPRV_ISALPHANUM(*startSearchHere)) {
819
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
820
0
              return;
821
0
            }
822
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
823
0
              localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
824
0
            } else {
825
              /* keyword name too long for internal buffer */
826
0
              *status = U_INTERNAL_PROGRAM_ERROR;
827
0
              return;
828
0
            }
829
0
          }
830
0
          localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
831
832
0
          startSearchHere = uprv_strchr(nextSeparator, ';');
833
834
0
          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
835
               /* current entry matches the keyword. */
836
0
             nextSeparator++; /* skip '=' */
837
              /* First strip leading & trailing spaces (TC decided to tolerate these) */
838
0
              while(*nextSeparator == ' ') {
839
0
                nextSeparator++;
840
0
              }
841
0
              keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
842
0
              while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
843
0
                keyValueTail--;
844
0
              }
845
              /* Now copy the value, but check well-formedness */
846
0
              if (nextSeparator == keyValueTail) {
847
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
848
0
                return;
849
0
              }
850
0
              while (nextSeparator < keyValueTail) {
851
0
                if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
852
0
                  *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
853
0
                  return;
854
0
                }
855
                /* Should we lowercase value to return here? Tests expect as-is. */
856
0
                sink.Append(nextSeparator++, 1);
857
0
              }
858
0
              return;
859
0
          }
860
0
      }
861
0
    }
862
0
}
863
864
U_CAPI int32_t U_EXPORT2
865
uloc_setKeywordValue(const char* keywordName,
866
                     const char* keywordValue,
867
                     char* buffer, int32_t bufferCapacity,
868
                     UErrorCode* status)
869
0
{
870
    /* TODO: sorting. removal. */
871
0
    int32_t keywordNameLen;
872
0
    int32_t keywordValueLen;
873
0
    int32_t bufLen;
874
0
    int32_t needLen = 0;
875
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
876
0
    char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
877
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
878
0
    int32_t rc;
879
0
    char* nextSeparator = NULL;
880
0
    char* nextEqualsign = NULL;
881
0
    char* startSearchHere = NULL;
882
0
    char* keywordStart = NULL;
883
0
    CharString updatedKeysAndValues;
884
0
    UBool handledInputKeyAndValue = false;
885
0
    char keyValuePrefix = '@';
886
887
0
    if(U_FAILURE(*status)) {
888
0
        return -1;
889
0
    }
890
0
    if (*status == U_STRING_NOT_TERMINATED_WARNING) {
891
0
        *status = U_ZERO_ERROR;
892
0
    }
893
0
    if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
894
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
895
0
        return 0;
896
0
    }
897
0
    bufLen = (int32_t)uprv_strlen(buffer);
898
0
    if(bufferCapacity<bufLen) {
899
        /* The capacity is less than the length?! Is this NULL terminated? */
900
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
901
0
        return 0;
902
0
    }
903
0
    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
904
0
    if(U_FAILURE(*status)) {
905
0
        return 0;
906
0
    }
907
908
0
    keywordValueLen = 0;
909
0
    if(keywordValue) {
910
0
        while (*keywordValue != 0) {
911
0
            if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
912
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
913
0
                return 0;
914
0
            }
915
0
            if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
916
                /* Should we force lowercase in value to set? */
917
0
                keywordValueBuffer[keywordValueLen++] = *keywordValue++;
918
0
            } else {
919
                /* keywordValue too long for internal buffer */
920
0
                *status = U_INTERNAL_PROGRAM_ERROR;
921
0
                return 0;
922
0
            }
923
0
        }
924
0
    }
925
0
    keywordValueBuffer[keywordValueLen] = 0; /* terminate */
926
927
0
    startSearchHere = (char*)locale_getKeywordsStart(buffer);
928
0
    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
929
0
        if(keywordValueLen == 0) { /* no keywords = nothing to remove */
930
0
            U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
931
0
            return bufLen;
932
0
        }
933
934
0
        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
935
0
        if(startSearchHere) { /* had a single @ */
936
0
            needLen--; /* already had the @ */
937
            /* startSearchHere points at the @ */
938
0
        } else {
939
0
            startSearchHere=buffer+bufLen;
940
0
        }
941
0
        if(needLen >= bufferCapacity) {
942
0
            *status = U_BUFFER_OVERFLOW_ERROR;
943
0
            return needLen; /* no change */
944
0
        }
945
0
        *startSearchHere++ = '@';
946
0
        uprv_strcpy(startSearchHere, keywordNameBuffer);
947
0
        startSearchHere += keywordNameLen;
948
0
        *startSearchHere++ = '=';
949
0
        uprv_strcpy(startSearchHere, keywordValueBuffer);
950
0
        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
951
0
        return needLen;
952
0
    } /* end shortcut - no @ */
953
954
0
    keywordStart = startSearchHere;
955
    /* search for keyword */
956
0
    while(keywordStart) {
957
0
        const char* keyValueTail;
958
0
        int32_t keyValueLen;
959
960
0
        keywordStart++; /* skip @ or ; */
961
0
        nextEqualsign = uprv_strchr(keywordStart, '=');
962
0
        if (!nextEqualsign) {
963
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
964
0
            return 0;
965
0
        }
966
        /* strip leading & trailing spaces (TC decided to tolerate these) */
967
0
        while(*keywordStart == ' ') {
968
0
            keywordStart++;
969
0
        }
970
0
        keyValueTail = nextEqualsign;
971
0
        while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
972
0
            keyValueTail--;
973
0
        }
974
        /* now keyValueTail points to first char after the keyName */
975
        /* copy & normalize keyName from locale */
976
0
        if (keywordStart == keyValueTail) {
977
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
978
0
            return 0;
979
0
        }
980
0
        keyValueLen = 0;
981
0
        while (keywordStart < keyValueTail) {
982
0
            if (!UPRV_ISALPHANUM(*keywordStart)) {
983
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
984
0
                return 0;
985
0
            }
986
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
987
0
                localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
988
0
            } else {
989
                /* keyword name too long for internal buffer */
990
0
                *status = U_INTERNAL_PROGRAM_ERROR;
991
0
                return 0;
992
0
            }
993
0
        }
994
0
        localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
995
996
0
        nextSeparator = uprv_strchr(nextEqualsign, ';');
997
998
        /* start processing the value part */
999
0
        nextEqualsign++; /* skip '=' */
1000
        /* First strip leading & trailing spaces (TC decided to tolerate these) */
1001
0
        while(*nextEqualsign == ' ') {
1002
0
            nextEqualsign++;
1003
0
        }
1004
0
        keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1005
0
        while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1006
0
            keyValueTail--;
1007
0
        }
1008
0
        if (nextEqualsign == keyValueTail) {
1009
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1010
0
            return 0;
1011
0
        }
1012
1013
0
        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1014
0
        if(rc == 0) {
1015
            /* Current entry matches the input keyword. Update the entry */
1016
0
            if(keywordValueLen > 0) { /* updating a value */
1017
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1018
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1019
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1020
0
                updatedKeysAndValues.append('=', *status);
1021
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1022
0
            } /* else removing this entry, don't emit anything */
1023
0
            handledInputKeyAndValue = true;
1024
0
        } else {
1025
           /* input keyword sorts earlier than current entry, add before current entry */
1026
0
            if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1027
                /* insert new entry at this location */
1028
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1029
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1030
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1031
0
                updatedKeysAndValues.append('=', *status);
1032
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1033
0
                handledInputKeyAndValue = true;
1034
0
            }
1035
            /* copy the current entry */
1036
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1037
0
            keyValuePrefix = ';'; /* for any subsequent key-value pair */
1038
0
            updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1039
0
            updatedKeysAndValues.append('=', *status);
1040
0
            updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
1041
0
        }
1042
0
        if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1043
            /* append new entry at the end, it sorts later than existing entries */
1044
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1045
            /* skip keyValuePrefix update, no subsequent key-value pair */
1046
0
            updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1047
0
            updatedKeysAndValues.append('=', *status);
1048
0
            updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1049
0
            handledInputKeyAndValue = true;
1050
0
        }
1051
0
        keywordStart = nextSeparator;
1052
0
    } /* end loop searching */
1053
1054
    /* Any error from updatedKeysAndValues.append above would be internal and not due to
1055
     * problems with the passed-in locale. So if we did encounter problems with the
1056
     * passed-in locale above, those errors took precedence and overrode any error
1057
     * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1058
     * are errors here they are from updatedKeysAndValues.append; they do cause an
1059
     * error return but the passed-in locale is unmodified and the original bufLen is
1060
     * returned.
1061
     */
1062
0
    if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1063
        /* if input key/value specified removal of a keyword not present in locale, or
1064
         * there was an error in CharString.append, leave original locale alone. */
1065
0
        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1066
0
        return bufLen;
1067
0
    }
1068
1069
    // needLen = length of the part before '@'
1070
0
    needLen = (int32_t)(startSearchHere - buffer);
1071
    // Check to see can we fit the startSearchHere, if not, return
1072
    // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1073
    // We do this because this API function does not behave like most others:
1074
    // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1075
    // When the contents fits but without the terminating NUL, in this case we need to not change
1076
    // the buffer contents and return with a buffer overflow error.
1077
0
    int32_t appendLength = updatedKeysAndValues.length();
1078
0
    if (appendLength >= bufferCapacity - needLen) {
1079
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1080
0
        return needLen + appendLength;
1081
0
    }
1082
0
    needLen += updatedKeysAndValues.extract(
1083
0
                         startSearchHere, bufferCapacity - needLen, *status);
1084
0
    U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1085
0
    return needLen;
1086
0
}
1087
1088
/* ### ID parsing implementation **************************************************/
1089
1090
7.10k
#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1091
1092
/*returns true if one of the special prefixes is here (s=string)
1093
  'x-' or 'i-' */
1094
3.55k
#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1095
1096
/* Dot terminates it because of POSIX form  where dot precedes the codepage
1097
 * except for variant
1098
 */
1099
154k
#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1100
1101
/**
1102
 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1103
 * a NULL entry, followed by more entries, and a second NULL entry.
1104
 *
1105
 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1106
 * COUNTRIES_3.
1107
 */
1108
static int16_t _findIndex(const char* const* list, const char* key)
1109
250
{
1110
250
    const char* const* anchor = list;
1111
250
    int32_t pass = 0;
1112
1113
    /* Make two passes through two NULL-terminated arrays at 'list' */
1114
549
    while (pass++ < 2) {
1115
113k
        while (*list) {
1116
113k
            if (uprv_strcmp(key, *list) == 0) {
1117
106
                return (int16_t)(list - anchor);
1118
106
            }
1119
113k
            list++;
1120
113k
        }
1121
299
        ++list;     /* skip final NULL *CWB*/
1122
299
    }
1123
144
    return -1;
1124
250
}
1125
1126
U_CFUNC const char*
1127
0
uloc_getCurrentCountryID(const char* oldID){
1128
0
    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1129
0
    if (offset >= 0) {
1130
0
        return REPLACEMENT_COUNTRIES[offset];
1131
0
    }
1132
0
    return oldID;
1133
0
}
1134
U_CFUNC const char*
1135
0
uloc_getCurrentLanguageID(const char* oldID){
1136
0
    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1137
0
    if (offset >= 0) {
1138
0
        return REPLACEMENT_LANGUAGES[offset];
1139
0
    }
1140
0
    return oldID;
1141
0
}
1142
/*
1143
 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1144
 * avoid duplicating code to handle the earlier locale ID pieces
1145
 * in the functions for the later ones by
1146
 * setting the *pEnd pointer to where they stopped parsing
1147
 *
1148
 * TODO try to use this in Locale
1149
 */
1150
CharString U_EXPORT2
1151
ulocimp_getLanguage(const char *localeID,
1152
                    const char **pEnd,
1153
3.55k
                    UErrorCode &status) {
1154
3.55k
    CharString result;
1155
1156
3.55k
    if (uprv_stricmp(localeID, "root") == 0) {
1157
2
        localeID += 4;
1158
3.55k
    } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
1159
3.55k
               (localeID[3] == '\0' ||
1160
71
                localeID[3] == '-' ||
1161
71
                localeID[3] == '_' ||
1162
71
                localeID[3] == '@')) {
1163
44
        localeID += 3;
1164
44
    }
1165
1166
    /* if it starts with i- or x- then copy that prefix */
1167
3.55k
    if(_isIDPrefix(localeID)) {
1168
227
        result.append((char)uprv_tolower(*localeID), status);
1169
227
        result.append('-', status);
1170
227
        localeID+=2;
1171
227
    }
1172
1173
    /* copy the language as far as possible and count its length */
1174
20.5k
    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1175
17.0k
        result.append((char)uprv_tolower(*localeID), status);
1176
17.0k
        localeID++;
1177
17.0k
    }
1178
1179
3.55k
    if(result.length()==3) {
1180
        /* convert 3 character code to 2 character code if possible *CWB*/
1181
198
        int32_t offset = _findIndex(LANGUAGES_3, result.data());
1182
198
        if(offset>=0) {
1183
87
            result.clear();
1184
87
            result.append(LANGUAGES[offset], status);
1185
87
        }
1186
198
    }
1187
1188
3.55k
    if(pEnd!=NULL) {
1189
3.55k
        *pEnd=localeID;
1190
3.55k
    }
1191
1192
3.55k
    return result;
1193
3.55k
}
1194
1195
CharString U_EXPORT2
1196
ulocimp_getScript(const char *localeID,
1197
                  const char **pEnd,
1198
1.06k
                  UErrorCode &status) {
1199
1.06k
    CharString result;
1200
1.06k
    int32_t idLen = 0;
1201
1202
1.06k
    if (pEnd != NULL) {
1203
1.06k
        *pEnd = localeID;
1204
1.06k
    }
1205
1206
    /* copy the second item as far as possible and count its length */
1207
3.06k
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1208
3.06k
            && uprv_isASCIILetter(localeID[idLen])) {
1209
1.99k
        idLen++;
1210
1.99k
    }
1211
1212
    /* If it's exactly 4 characters long, then it's a script and not a country. */
1213
1.06k
    if (idLen == 4) {
1214
52
        int32_t i;
1215
52
        if (pEnd != NULL) {
1216
52
            *pEnd = localeID+idLen;
1217
52
        }
1218
52
        if (idLen >= 1) {
1219
52
            result.append((char)uprv_toupper(*(localeID++)), status);
1220
52
        }
1221
208
        for (i = 1; i < idLen; i++) {
1222
156
            result.append((char)uprv_tolower(*(localeID++)), status);
1223
156
        }
1224
52
    }
1225
1226
1.06k
    return result;
1227
1.06k
}
1228
1229
CharString U_EXPORT2
1230
ulocimp_getCountry(const char *localeID,
1231
                   const char **pEnd,
1232
1.03k
                   UErrorCode &status) {
1233
1.03k
    CharString result;
1234
1.03k
    int32_t idLen=0;
1235
1236
    /* copy the country as far as possible and count its length */
1237
17.7k
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1238
16.7k
        result.append((char)uprv_toupper(localeID[idLen]), status);
1239
16.7k
        idLen++;
1240
16.7k
    }
1241
1242
    /* the country should be either length 2 or 3 */
1243
1.03k
    if (idLen == 2 || idLen == 3) {
1244
        /* convert 3 character code to 2 character code if possible *CWB*/
1245
86
        if(idLen==3) {
1246
52
            int32_t offset = _findIndex(COUNTRIES_3, result.data());
1247
52
            if(offset>=0) {
1248
19
                result.clear();
1249
19
                result.append(COUNTRIES[offset], status);
1250
19
            }
1251
52
        }
1252
86
        localeID+=idLen;
1253
953
    } else {
1254
953
        result.clear();
1255
953
    }
1256
1257
1.03k
    if(pEnd!=NULL) {
1258
1.03k
        *pEnd=localeID;
1259
1.03k
    }
1260
1261
1.03k
    return result;
1262
1.03k
}
1263
1264
/**
1265
 * @param needSeparator if true, then add leading '_' if any variants
1266
 * are added to 'variant'
1267
 */
1268
static void
1269
_getVariant(const char *localeID,
1270
            char prev,
1271
            ByteSink& sink,
1272
977
            UBool needSeparator) {
1273
977
    UBool hasVariant = false;
1274
1275
    /* get one or more variant tags and separate them with '_' */
1276
977
    if(_isIDSeparator(prev)) {
1277
        /* get a variant string after a '-' or '_' */
1278
69.6k
        while(!_isTerminator(*localeID)) {
1279
68.6k
            if (needSeparator) {
1280
0
                sink.Append("_", 1);
1281
0
                needSeparator = false;
1282
0
            }
1283
68.6k
            char c = (char)uprv_toupper(*localeID);
1284
68.6k
            if (c == '-') c = '_';
1285
68.6k
            sink.Append(&c, 1);
1286
68.6k
            hasVariant = true;
1287
68.6k
            localeID++;
1288
68.6k
        }
1289
977
    }
1290
1291
    /* if there is no variant tag after a '-' or '_' then look for '@' */
1292
977
    if(!hasVariant) {
1293
117
        if(prev=='@') {
1294
            /* keep localeID */
1295
117
        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1296
63
            ++localeID; /* point after the '@' */
1297
63
        } else {
1298
54
            return;
1299
54
        }
1300
2.18k
        while(!_isTerminator(*localeID)) {
1301
2.12k
            if (needSeparator) {
1302
0
                sink.Append("_", 1);
1303
0
                needSeparator = false;
1304
0
            }
1305
2.12k
            char c = (char)uprv_toupper(*localeID);
1306
2.12k
            if (c == '-' || c == ',') c = '_';
1307
2.12k
            sink.Append(&c, 1);
1308
2.12k
            localeID++;
1309
2.12k
        }
1310
63
    }
1311
977
}
1312
1313
/* Keyword enumeration */
1314
1315
typedef struct UKeywordsContext {
1316
    char* keywords;
1317
    char* current;
1318
} UKeywordsContext;
1319
1320
U_CDECL_BEGIN
1321
1322
static void U_CALLCONV
1323
0
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1324
0
    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1325
0
    uprv_free(enumerator->context);
1326
0
    uprv_free(enumerator);
1327
0
}
1328
1329
static int32_t U_CALLCONV
1330
0
uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1331
0
    char *kw = ((UKeywordsContext *)en->context)->keywords;
1332
0
    int32_t result = 0;
1333
0
    while(*kw) {
1334
0
        result++;
1335
0
        kw += uprv_strlen(kw)+1;
1336
0
    }
1337
0
    return result;
1338
0
}
1339
1340
static const char * U_CALLCONV
1341
uloc_kw_nextKeyword(UEnumeration* en,
1342
                    int32_t* resultLength,
1343
0
                    UErrorCode* /*status*/) {
1344
0
    const char* result = ((UKeywordsContext *)en->context)->current;
1345
0
    int32_t len = 0;
1346
0
    if(*result) {
1347
0
        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1348
0
        ((UKeywordsContext *)en->context)->current += len+1;
1349
0
    } else {
1350
0
        result = NULL;
1351
0
    }
1352
0
    if (resultLength) {
1353
0
        *resultLength = len;
1354
0
    }
1355
0
    return result;
1356
0
}
1357
1358
static void U_CALLCONV
1359
uloc_kw_resetKeywords(UEnumeration* en,
1360
0
                      UErrorCode* /*status*/) {
1361
0
    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1362
0
}
1363
1364
U_CDECL_END
1365
1366
1367
static const UEnumeration gKeywordsEnum = {
1368
    NULL,
1369
    NULL,
1370
    uloc_kw_closeKeywords,
1371
    uloc_kw_countKeywords,
1372
    uenum_unextDefault,
1373
    uloc_kw_nextKeyword,
1374
    uloc_kw_resetKeywords
1375
};
1376
1377
U_CAPI UEnumeration* U_EXPORT2
1378
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1379
0
{
1380
0
    LocalMemory<UKeywordsContext> myContext;
1381
0
    LocalMemory<UEnumeration> result;
1382
1383
0
    if (U_FAILURE(*status)) {
1384
0
        return nullptr;
1385
0
    }
1386
0
    myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1387
0
    result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1388
0
    if (myContext.isNull() || result.isNull()) {
1389
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1390
0
        return nullptr;
1391
0
    }
1392
0
    uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1393
0
    myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1394
0
    if (myContext->keywords == nullptr) {
1395
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1396
0
        return nullptr;
1397
0
    }
1398
0
    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1399
0
    myContext->keywords[keywordListSize] = 0;
1400
0
    myContext->current = myContext->keywords;
1401
0
    result->context = myContext.orphan();
1402
0
    return result.orphan();
1403
0
}
1404
1405
U_CAPI UEnumeration* U_EXPORT2
1406
uloc_openKeywords(const char* localeID,
1407
                        UErrorCode* status)
1408
0
{
1409
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1410
0
    const char* tmpLocaleID;
1411
1412
0
    if(status==NULL || U_FAILURE(*status)) {
1413
0
        return 0;
1414
0
    }
1415
1416
0
    if (_hasBCP47Extension(localeID)) {
1417
0
        tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
1418
0
                                    sizeof(tempBuffer), status, nullptr);
1419
0
    } else {
1420
0
        if (localeID==NULL) {
1421
0
            localeID=uloc_getDefault();
1422
0
        }
1423
0
        tmpLocaleID=localeID;
1424
0
    }
1425
1426
    /* Skip the language */
1427
0
    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
1428
0
    if (U_FAILURE(*status)) {
1429
0
        return 0;
1430
0
    }
1431
1432
0
    if(_isIDSeparator(*tmpLocaleID)) {
1433
0
        const char *scriptID;
1434
        /* Skip the script if available */
1435
0
        ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
1436
0
        if (U_FAILURE(*status)) {
1437
0
            return 0;
1438
0
        }
1439
0
        if(scriptID != tmpLocaleID+1) {
1440
            /* Found optional script */
1441
0
            tmpLocaleID = scriptID;
1442
0
        }
1443
        /* Skip the Country */
1444
0
        if (_isIDSeparator(*tmpLocaleID)) {
1445
0
            ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
1446
0
            if (U_FAILURE(*status)) {
1447
0
                return 0;
1448
0
            }
1449
0
        }
1450
0
    }
1451
1452
    /* keywords are located after '@' */
1453
0
    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1454
0
        CharString keywords;
1455
0
        CharStringByteSink sink(&keywords);
1456
0
        ulocimp_getKeywords(tmpLocaleID+1, '@', sink, false, status);
1457
0
        if (U_FAILURE(*status)) {
1458
0
            return NULL;
1459
0
        }
1460
0
        return uloc_openKeywordList(keywords.data(), keywords.length(), status);
1461
0
    }
1462
0
    return NULL;
1463
0
}
1464
1465
1466
/* bit-flags for 'options' parameter of _canonicalize */
1467
0
#define _ULOC_STRIP_KEYWORDS 0x2
1468
1
#define _ULOC_CANONICALIZE   0x1
1469
1470
21.3k
#define OPTION_SET(options, mask) ((options & mask) != 0)
1471
1472
static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1473
3.55k
#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1474
1475
/**
1476
 * Canonicalize the given localeID, to level 1 or to level 2,
1477
 * depending on the options.  To specify level 1, pass in options=0.
1478
 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1479
 *
1480
 * This is the code underlying uloc_getName and uloc_canonicalize.
1481
 */
1482
static void
1483
_canonicalize(const char* localeID,
1484
              ByteSink& sink,
1485
              uint32_t options,
1486
3.55k
              UErrorCode* err) {
1487
3.55k
    if (U_FAILURE(*err)) {
1488
0
        return;
1489
0
    }
1490
1491
3.55k
    int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
1492
3.55k
    PreflightingLocaleIDBuffer tempBuffer;  // if localeID has a BCP47 extension, tmpLocaleID points to this
1493
3.55k
    CharString localeIDWithHyphens;  // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
1494
3.55k
    const char* origLocaleID;
1495
3.55k
    const char* tmpLocaleID;
1496
3.55k
    const char* keywordAssign = NULL;
1497
3.55k
    const char* separatorIndicator = NULL;
1498
1499
3.55k
    if (_hasBCP47Extension(localeID)) {
1500
2.50k
        const char* localeIDPtr = localeID;
1501
1502
        // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
1503
2.50k
        if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') {
1504
300
            localeIDWithHyphens.append(localeID, -1, *err);
1505
300
            if (U_SUCCESS(*err)) {
1506
60.7k
                for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
1507
60.4k
                    if (*p == '_') {
1508
947
                        *p = '-';
1509
947
                    }
1510
60.4k
                }
1511
300
                localeIDPtr = localeIDWithHyphens.data();
1512
300
            }
1513
300
        }
1514
1515
3.37k
        do {
1516
            // After this call tmpLocaleID may point to localeIDPtr which may
1517
            // point to either localeID or localeIDWithHyphens.data().
1518
3.37k
            tmpLocaleID = _ConvertBCP47(localeIDPtr, tempBuffer.getBuffer(),
1519
3.37k
                                        tempBuffer.getCapacity(), err,
1520
3.37k
                                        &(tempBuffer.requestedCapacity));
1521
3.37k
        } while (tempBuffer.needToTryAgain(err));
1522
2.50k
    } else {
1523
1.04k
        if (localeID==NULL) {
1524
0
           localeID=uloc_getDefault();
1525
0
        }
1526
1.04k
        tmpLocaleID=localeID;
1527
1.04k
    }
1528
1529
3.55k
    origLocaleID=tmpLocaleID;
1530
1531
    /* get all pieces, one after another, and separate with '_' */
1532
3.55k
    CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1533
1534
3.55k
    if (tag.length() == I_DEFAULT_LENGTH &&
1535
3.55k
            uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
1536
4
        tag.clear();
1537
4
        tag.append(uloc_getDefault(), *err);
1538
3.55k
    } else if(_isIDSeparator(*tmpLocaleID)) {
1539
1.06k
        const char *scriptID;
1540
1541
1.06k
        ++fieldCount;
1542
1.06k
        tag.append('_', *err);
1543
1544
1.06k
        CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1545
1.06k
        tag.append(script, *err);
1546
1.06k
        scriptSize = script.length();
1547
1.06k
        if(scriptSize > 0) {
1548
            /* Found optional script */
1549
45
            tmpLocaleID = scriptID;
1550
45
            ++fieldCount;
1551
45
            if (_isIDSeparator(*tmpLocaleID)) {
1552
                /* If there is something else, then we add the _ */
1553
16
                tag.append('_', *err);
1554
16
            }
1555
45
        }
1556
1557
1.06k
        if (_isIDSeparator(*tmpLocaleID)) {
1558
1.03k
            const char *cntryID;
1559
1560
1.03k
            CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1561
1.03k
            tag.append(country, *err);
1562
1.03k
            if (!country.isEmpty()) {
1563
                /* Found optional country */
1564
86
                tmpLocaleID = cntryID;
1565
86
            }
1566
1.03k
            if(_isIDSeparator(*tmpLocaleID)) {
1567
                /* If there is something else, then we add the _  if we found country before. */
1568
977
                if (!_isIDSeparator(*(tmpLocaleID+1))) {
1569
527
                    ++fieldCount;
1570
527
                    tag.append('_', *err);
1571
527
                }
1572
1573
977
                variantSize = -tag.length();
1574
977
                {
1575
977
                    CharStringByteSink s(&tag);
1576
977
                    _getVariant(tmpLocaleID+1, *tmpLocaleID, s, false);
1577
977
                }
1578
977
                variantSize += tag.length();
1579
977
                if (variantSize > 0) {
1580
914
                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1581
914
                }
1582
977
            }
1583
1.03k
        }
1584
1.06k
    }
1585
1586
    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1587
3.55k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1588
96
        UBool done = false;
1589
10.7k
        do {
1590
10.7k
            char c = *tmpLocaleID;
1591
10.7k
            switch (c) {
1592
79
            case 0:
1593
96
            case '@':
1594
96
                done = true;
1595
96
                break;
1596
10.6k
            default:
1597
10.6k
                tag.append(c, *err);
1598
10.6k
                ++tmpLocaleID;
1599
10.6k
                break;
1600
10.7k
            }
1601
10.7k
        } while (!done);
1602
96
    }
1603
1604
    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1605
       After this, tmpLocaleID either points to '@' or is NULL */
1606
3.55k
    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1607
2.14k
        keywordAssign = uprv_strchr(tmpLocaleID, '=');
1608
2.14k
        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1609
2.14k
    }
1610
1611
    /* Copy POSIX-style variant, if any [mr@FOO] */
1612
3.55k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1613
3.55k
        tmpLocaleID != NULL && keywordAssign == NULL) {
1614
4.14k
        for (;;) {
1615
4.14k
            char c = *tmpLocaleID;
1616
4.14k
            if (c == 0) {
1617
51
                break;
1618
51
            }
1619
4.08k
            tag.append(c, *err);
1620
4.08k
            ++tmpLocaleID;
1621
4.08k
        }
1622
51
    }
1623
1624
3.55k
    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1625
        /* Handle @FOO variant if @ is present and not followed by = */
1626
1
        if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1627
            /* Add missing '_' if needed */
1628
0
            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1629
0
                do {
1630
0
                    tag.append('_', *err);
1631
0
                    ++fieldCount;
1632
0
                } while(fieldCount<2);
1633
0
            }
1634
1635
0
            int32_t posixVariantSize = -tag.length();
1636
0
            {
1637
0
                CharStringByteSink s(&tag);
1638
0
                _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
1639
0
            }
1640
0
            posixVariantSize += tag.length();
1641
0
            if (posixVariantSize > 0) {
1642
0
                variantSize += posixVariantSize;
1643
0
            }
1644
0
        }
1645
1646
        /* Look up the ID in the canonicalization map */
1647
11
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1648
10
            StringPiece id(CANONICALIZE_MAP[j].id);
1649
10
            if (tag == id) {
1650
0
                if (id.empty() && tmpLocaleID != NULL) {
1651
0
                    break; /* Don't remap "" if keywords present */
1652
0
                }
1653
0
                tag.clear();
1654
0
                tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
1655
0
                break;
1656
0
            }
1657
10
        }
1658
1
    }
1659
1660
3.55k
    sink.Append(tag.data(), tag.length());
1661
1662
3.55k
    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1663
3.55k
        if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1664
3.55k
            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1665
2.08k
            sink.Append("@", 1);
1666
2.08k
            ++fieldCount;
1667
2.08k
            ulocimp_getKeywords(tmpLocaleID+1, '@', sink, true, err);
1668
2.08k
        }
1669
3.55k
    }
1670
3.55k
}
1671
1672
/* ### ID parsing API **************************************************/
1673
1674
U_CAPI int32_t  U_EXPORT2
1675
uloc_getParent(const char*    localeID,
1676
               char* parent,
1677
               int32_t parentCapacity,
1678
               UErrorCode* err)
1679
0
{
1680
0
    const char *lastUnderscore;
1681
0
    int32_t i;
1682
1683
0
    if (U_FAILURE(*err))
1684
0
        return 0;
1685
1686
0
    if (localeID == NULL)
1687
0
        localeID = uloc_getDefault();
1688
1689
0
    lastUnderscore=uprv_strrchr(localeID, '_');
1690
0
    if(lastUnderscore!=NULL) {
1691
0
        i=(int32_t)(lastUnderscore-localeID);
1692
0
    } else {
1693
0
        i=0;
1694
0
    }
1695
1696
0
    if (i > 0) {
1697
0
        if (uprv_strnicmp(localeID, "und_", 4) == 0) {
1698
0
            localeID += 3;
1699
0
            i -= 3;
1700
0
            uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1701
0
        } else if (parent != localeID) {
1702
0
            uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1703
0
        }
1704
0
    }
1705
1706
0
    return u_terminateChars(parent, parentCapacity, i, err);
1707
0
}
1708
1709
U_CAPI int32_t U_EXPORT2
1710
uloc_getLanguage(const char*    localeID,
1711
         char* language,
1712
         int32_t languageCapacity,
1713
         UErrorCode* err)
1714
0
{
1715
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1716
1717
0
    if (err==NULL || U_FAILURE(*err)) {
1718
0
        return 0;
1719
0
    }
1720
1721
0
    if(localeID==NULL) {
1722
0
        localeID=uloc_getDefault();
1723
0
    }
1724
1725
0
    return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
1726
0
}
1727
1728
U_CAPI int32_t U_EXPORT2
1729
uloc_getScript(const char*    localeID,
1730
         char* script,
1731
         int32_t scriptCapacity,
1732
         UErrorCode* err)
1733
0
{
1734
0
    if(err==NULL || U_FAILURE(*err)) {
1735
0
        return 0;
1736
0
    }
1737
1738
0
    if(localeID==NULL) {
1739
0
        localeID=uloc_getDefault();
1740
0
    }
1741
1742
    /* skip the language */
1743
0
    ulocimp_getLanguage(localeID, &localeID, *err);
1744
0
    if (U_FAILURE(*err)) {
1745
0
        return 0;
1746
0
    }
1747
1748
0
    if(_isIDSeparator(*localeID)) {
1749
0
        return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
1750
0
    }
1751
0
    return u_terminateChars(script, scriptCapacity, 0, err);
1752
0
}
1753
1754
U_CAPI int32_t  U_EXPORT2
1755
uloc_getCountry(const char* localeID,
1756
            char* country,
1757
            int32_t countryCapacity,
1758
            UErrorCode* err)
1759
0
{
1760
0
    if(err==NULL || U_FAILURE(*err)) {
1761
0
        return 0;
1762
0
    }
1763
1764
0
    if(localeID==NULL) {
1765
0
        localeID=uloc_getDefault();
1766
0
    }
1767
1768
    /* Skip the language */
1769
0
    ulocimp_getLanguage(localeID, &localeID, *err);
1770
0
    if (U_FAILURE(*err)) {
1771
0
        return 0;
1772
0
    }
1773
1774
0
    if(_isIDSeparator(*localeID)) {
1775
0
        const char *scriptID;
1776
        /* Skip the script if available */
1777
0
        ulocimp_getScript(localeID+1, &scriptID, *err);
1778
0
        if (U_FAILURE(*err)) {
1779
0
            return 0;
1780
0
        }
1781
0
        if(scriptID != localeID+1) {
1782
            /* Found optional script */
1783
0
            localeID = scriptID;
1784
0
        }
1785
0
        if(_isIDSeparator(*localeID)) {
1786
0
            return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
1787
0
        }
1788
0
    }
1789
0
    return u_terminateChars(country, countryCapacity, 0, err);
1790
0
}
1791
1792
U_CAPI int32_t  U_EXPORT2
1793
uloc_getVariant(const char* localeID,
1794
                char* variant,
1795
                int32_t variantCapacity,
1796
                UErrorCode* err)
1797
0
{
1798
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1799
0
    const char* tmpLocaleID;
1800
0
    int32_t i=0;
1801
1802
0
    if(err==NULL || U_FAILURE(*err)) {
1803
0
        return 0;
1804
0
    }
1805
1806
0
    if (_hasBCP47Extension(localeID)) {
1807
0
        tmpLocaleID =_ConvertBCP47(localeID, tempBuffer, sizeof(tempBuffer), err, nullptr);
1808
0
    } else {
1809
0
        if (localeID==NULL) {
1810
0
           localeID=uloc_getDefault();
1811
0
        }
1812
0
        tmpLocaleID=localeID;
1813
0
    }
1814
1815
    /* Skip the language */
1816
0
    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1817
0
    if (U_FAILURE(*err)) {
1818
0
        return 0;
1819
0
    }
1820
1821
0
    if(_isIDSeparator(*tmpLocaleID)) {
1822
0
        const char *scriptID;
1823
        /* Skip the script if available */
1824
0
        ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1825
0
        if (U_FAILURE(*err)) {
1826
0
            return 0;
1827
0
        }
1828
0
        if(scriptID != tmpLocaleID+1) {
1829
            /* Found optional script */
1830
0
            tmpLocaleID = scriptID;
1831
0
        }
1832
        /* Skip the Country */
1833
0
        if (_isIDSeparator(*tmpLocaleID)) {
1834
0
            const char *cntryID;
1835
0
            ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1836
0
            if (U_FAILURE(*err)) {
1837
0
                return 0;
1838
0
            }
1839
0
            if (cntryID != tmpLocaleID+1) {
1840
                /* Found optional country */
1841
0
                tmpLocaleID = cntryID;
1842
0
            }
1843
0
            if(_isIDSeparator(*tmpLocaleID)) {
1844
                /* If there was no country ID, skip a possible extra IDSeparator */
1845
0
                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1846
0
                    tmpLocaleID++;
1847
0
                }
1848
1849
0
                CheckedArrayByteSink sink(variant, variantCapacity);
1850
0
                _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, false);
1851
1852
0
                i = sink.NumberOfBytesAppended();
1853
1854
0
                if (U_FAILURE(*err)) {
1855
0
                    return i;
1856
0
                }
1857
1858
0
                if (sink.Overflowed()) {
1859
0
                    *err = U_BUFFER_OVERFLOW_ERROR;
1860
0
                    return i;
1861
0
                }
1862
0
            }
1863
0
        }
1864
0
    }
1865
1866
0
    return u_terminateChars(variant, variantCapacity, i, err);
1867
0
}
1868
1869
U_CAPI int32_t  U_EXPORT2
1870
uloc_getName(const char* localeID,
1871
             char* name,
1872
             int32_t nameCapacity,
1873
             UErrorCode* err)
1874
3.55k
{
1875
3.55k
    if (U_FAILURE(*err)) {
1876
0
        return 0;
1877
0
    }
1878
1879
3.55k
    CheckedArrayByteSink sink(name, nameCapacity);
1880
3.55k
    ulocimp_getName(localeID, sink, err);
1881
1882
3.55k
    int32_t reslen = sink.NumberOfBytesAppended();
1883
1884
3.55k
    if (U_FAILURE(*err)) {
1885
131
        return reslen;
1886
131
    }
1887
1888
3.42k
    if (sink.Overflowed()) {
1889
369
        *err = U_BUFFER_OVERFLOW_ERROR;
1890
3.05k
    } else {
1891
3.05k
        u_terminateChars(name, nameCapacity, reslen, err);
1892
3.05k
    }
1893
1894
3.42k
    return reslen;
1895
3.55k
}
1896
1897
U_CAPI void U_EXPORT2
1898
ulocimp_getName(const char* localeID,
1899
                ByteSink& sink,
1900
                UErrorCode* err)
1901
3.55k
{
1902
3.55k
    _canonicalize(localeID, sink, 0, err);
1903
3.55k
}
1904
1905
U_CAPI int32_t  U_EXPORT2
1906
uloc_getBaseName(const char* localeID,
1907
                 char* name,
1908
                 int32_t nameCapacity,
1909
                 UErrorCode* err)
1910
0
{
1911
0
    if (U_FAILURE(*err)) {
1912
0
        return 0;
1913
0
    }
1914
1915
0
    CheckedArrayByteSink sink(name, nameCapacity);
1916
0
    ulocimp_getBaseName(localeID, sink, err);
1917
1918
0
    int32_t reslen = sink.NumberOfBytesAppended();
1919
1920
0
    if (U_FAILURE(*err)) {
1921
0
        return reslen;
1922
0
    }
1923
1924
0
    if (sink.Overflowed()) {
1925
0
        *err = U_BUFFER_OVERFLOW_ERROR;
1926
0
    } else {
1927
0
        u_terminateChars(name, nameCapacity, reslen, err);
1928
0
    }
1929
1930
0
    return reslen;
1931
0
}
1932
1933
U_CAPI void U_EXPORT2
1934
ulocimp_getBaseName(const char* localeID,
1935
                    ByteSink& sink,
1936
                    UErrorCode* err)
1937
0
{
1938
0
    _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
1939
0
}
1940
1941
U_CAPI int32_t  U_EXPORT2
1942
uloc_canonicalize(const char* localeID,
1943
                  char* name,
1944
                  int32_t nameCapacity,
1945
                  UErrorCode* err)
1946
0
{
1947
0
    if (U_FAILURE(*err)) {
1948
0
        return 0;
1949
0
    }
1950
1951
0
    CheckedArrayByteSink sink(name, nameCapacity);
1952
0
    ulocimp_canonicalize(localeID, sink, err);
1953
1954
0
    int32_t reslen = sink.NumberOfBytesAppended();
1955
1956
0
    if (U_FAILURE(*err)) {
1957
0
        return reslen;
1958
0
    }
1959
1960
0
    if (sink.Overflowed()) {
1961
0
        *err = U_BUFFER_OVERFLOW_ERROR;
1962
0
    } else {
1963
0
        u_terminateChars(name, nameCapacity, reslen, err);
1964
0
    }
1965
1966
0
    return reslen;
1967
0
}
1968
1969
U_CAPI void U_EXPORT2
1970
ulocimp_canonicalize(const char* localeID,
1971
                     ByteSink& sink,
1972
                     UErrorCode* err)
1973
1
{
1974
1
    _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
1975
1
}
1976
1977
U_CAPI const char*  U_EXPORT2
1978
uloc_getISO3Language(const char* localeID)
1979
0
{
1980
0
    int16_t offset;
1981
0
    char lang[ULOC_LANG_CAPACITY];
1982
0
    UErrorCode err = U_ZERO_ERROR;
1983
1984
0
    if (localeID == NULL)
1985
0
    {
1986
0
        localeID = uloc_getDefault();
1987
0
    }
1988
0
    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1989
0
    if (U_FAILURE(err))
1990
0
        return "";
1991
0
    offset = _findIndex(LANGUAGES, lang);
1992
0
    if (offset < 0)
1993
0
        return "";
1994
0
    return LANGUAGES_3[offset];
1995
0
}
1996
1997
U_CAPI const char*  U_EXPORT2
1998
uloc_getISO3Country(const char* localeID)
1999
0
{
2000
0
    int16_t offset;
2001
0
    char cntry[ULOC_LANG_CAPACITY];
2002
0
    UErrorCode err = U_ZERO_ERROR;
2003
2004
0
    if (localeID == NULL)
2005
0
    {
2006
0
        localeID = uloc_getDefault();
2007
0
    }
2008
0
    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2009
0
    if (U_FAILURE(err))
2010
0
        return "";
2011
0
    offset = _findIndex(COUNTRIES, cntry);
2012
0
    if (offset < 0)
2013
0
        return "";
2014
2015
0
    return COUNTRIES_3[offset];
2016
0
}
2017
2018
U_CAPI uint32_t  U_EXPORT2
2019
uloc_getLCID(const char* localeID)
2020
0
{
2021
0
    UErrorCode status = U_ZERO_ERROR;
2022
0
    char       langID[ULOC_FULLNAME_CAPACITY];
2023
0
    uint32_t   lcid = 0;
2024
2025
    /* Check for incomplete id. */
2026
0
    if (!localeID || uprv_strlen(localeID) < 2) {
2027
0
        return 0;
2028
0
    }
2029
2030
    // First, attempt Windows platform lookup if available, but fall
2031
    // through to catch any special cases (ICU vs Windows name differences).
2032
0
    lcid = uprv_convertToLCIDPlatform(localeID, &status);
2033
0
    if (U_FAILURE(status)) {
2034
0
        return 0;
2035
0
    }
2036
0
    if (lcid > 0) {
2037
        // Windows found an LCID, return that
2038
0
        return lcid;
2039
0
    }
2040
2041
0
    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2042
0
    if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
2043
0
        return 0;
2044
0
    }
2045
2046
0
    if (uprv_strchr(localeID, '@')) {
2047
        // uprv_convertToLCID does not support keywords other than collation.
2048
        // Remove all keywords except collation.
2049
0
        int32_t len;
2050
0
        char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2051
2052
0
        CharString collVal;
2053
0
        {
2054
0
            CharStringByteSink sink(&collVal);
2055
0
            ulocimp_getKeywordValue(localeID, "collation", sink, &status);
2056
0
        }
2057
2058
0
        if (U_SUCCESS(status) && !collVal.isEmpty()) {
2059
0
            len = uloc_getBaseName(localeID, tmpLocaleID,
2060
0
                UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2061
2062
0
            if (U_SUCCESS(status) && len > 0) {
2063
0
                tmpLocaleID[len] = 0;
2064
2065
0
                len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
2066
0
                    UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2067
2068
0
                if (U_SUCCESS(status) && len > 0) {
2069
0
                    tmpLocaleID[len] = 0;
2070
0
                    return uprv_convertToLCID(langID, tmpLocaleID, &status);
2071
0
                }
2072
0
            }
2073
0
        }
2074
2075
        // fall through - all keywords are simply ignored
2076
0
        status = U_ZERO_ERROR;
2077
0
    }
2078
2079
0
    return uprv_convertToLCID(langID, localeID, &status);
2080
0
}
2081
2082
U_CAPI int32_t U_EXPORT2
2083
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2084
                UErrorCode *status)
2085
0
{
2086
0
    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2087
0
}
2088
2089
/* ### Default locale **************************************************/
2090
2091
U_CAPI const char*  U_EXPORT2
2092
uloc_getDefault()
2093
4
{
2094
4
    return locale_get_default();
2095
4
}
2096
2097
U_CAPI void  U_EXPORT2
2098
uloc_setDefault(const char*   newDefaultLocale,
2099
             UErrorCode* err)
2100
0
{
2101
0
    if (U_FAILURE(*err))
2102
0
        return;
2103
    /* the error code isn't currently used for anything by this function*/
2104
2105
    /* propagate change to C++ */
2106
0
    locale_set_default(newDefaultLocale);
2107
0
}
2108
2109
/**
2110
 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2111
 * to an array of pointers to arrays of char.  All of these pointers are owned
2112
 * by ICU-- do not delete them, and do not write through them.  The array is
2113
 * terminated with a null pointer.
2114
 */
2115
U_CAPI const char* const*  U_EXPORT2
2116
uloc_getISOLanguages()
2117
0
{
2118
0
    return LANGUAGES;
2119
0
}
2120
2121
/**
2122
 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2123
 * pointer to an array of pointers to arrays of char.  All of these pointers are
2124
 * owned by ICU-- do not delete them, and do not write through them.  The array is
2125
 * terminated with a null pointer.
2126
 */
2127
U_CAPI const char* const*  U_EXPORT2
2128
uloc_getISOCountries()
2129
0
{
2130
0
    return COUNTRIES;
2131
0
}
2132
2133
U_CAPI const char* U_EXPORT2
2134
uloc_toUnicodeLocaleKey(const char* keyword)
2135
0
{
2136
0
    const char* bcpKey = ulocimp_toBcpKey(keyword);
2137
0
    if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2138
        // unknown keyword, but syntax is fine..
2139
0
        return keyword;
2140
0
    }
2141
0
    return bcpKey;
2142
0
}
2143
2144
U_CAPI const char* U_EXPORT2
2145
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2146
0
{
2147
0
    const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2148
0
    if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2149
        // unknown keyword, but syntax is fine..
2150
0
        return value;
2151
0
    }
2152
0
    return bcpType;
2153
0
}
2154
2155
static UBool
2156
isWellFormedLegacyKey(const char* legacyKey)
2157
15.8k
{
2158
15.8k
    const char* p = legacyKey;
2159
47.6k
    while (*p) {
2160
31.7k
        if (!UPRV_ISALPHANUM(*p)) {
2161
0
            return false;
2162
0
        }
2163
31.7k
        p++;
2164
31.7k
    }
2165
15.8k
    return true;
2166
15.8k
}
2167
2168
static UBool
2169
isWellFormedLegacyType(const char* legacyType)
2170
3.32k
{
2171
3.32k
    const char* p = legacyType;
2172
3.32k
    int32_t alphaNumLen = 0;
2173
22.7k
    while (*p) {
2174
19.4k
        if (*p == '_' || *p == '/' || *p == '-') {
2175
2.67k
            if (alphaNumLen == 0) {
2176
0
                return false;
2177
0
            }
2178
2.67k
            alphaNumLen = 0;
2179
16.8k
        } else if (UPRV_ISALPHANUM(*p)) {
2180
16.8k
            alphaNumLen++;
2181
16.8k
        } else {
2182
0
            return false;
2183
0
        }
2184
19.4k
        p++;
2185
19.4k
    }
2186
3.32k
    return (alphaNumLen != 0);
2187
3.32k
}
2188
2189
U_CAPI const char* U_EXPORT2
2190
uloc_toLegacyKey(const char* keyword)
2191
31.0k
{
2192
31.0k
    const char* legacyKey = ulocimp_toLegacyKey(keyword);
2193
31.0k
    if (legacyKey == NULL) {
2194
        // Checks if the specified locale key is well-formed with the legacy locale syntax.
2195
        //
2196
        // Note:
2197
        //  LDML/CLDR provides some definition of keyword syntax in
2198
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2199
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2200
        //  Keys can only consist of [0-9a-zA-Z].
2201
15.8k
        if (isWellFormedLegacyKey(keyword)) {
2202
15.8k
            return keyword;
2203
15.8k
        }
2204
15.8k
    }
2205
15.2k
    return legacyKey;
2206
31.0k
}
2207
2208
U_CAPI const char* U_EXPORT2
2209
uloc_toLegacyType(const char* keyword, const char* value)
2210
4.33k
{
2211
4.33k
    const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2212
4.33k
    if (legacyType == NULL) {
2213
        // Checks if the specified locale type is well-formed with the legacy locale syntax.
2214
        //
2215
        // Note:
2216
        //  LDML/CLDR provides some definition of keyword syntax in
2217
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2218
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2219
        //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2220
        //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2221
3.32k
        if (isWellFormedLegacyType(value)) {
2222
3.32k
            return value;
2223
3.32k
        }
2224
3.32k
    }
2225
1.01k
    return legacyType;
2226
4.33k
}
2227
2228
/*eof*/