Coverage Report

Created: 2022-11-20 06:20

/src/icu/icu4c/source/common/uloc.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 1997-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*
9
* File ULOC.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   04/01/97    aliu        Creation.
15
*   08/21/98    stephen     JDK 1.2 sync
16
*   12/08/98    rtg         New Locale implementation and C API
17
*   03/15/99    damiba      overhaul.
18
*   04/06/99    stephen     changed setDefault() to realloc and copy
19
*   06/14/99    stephen     Changed calls to ures_open for new params
20
*   07/21/99    stephen     Modified setDefault() to propagate to C++
21
*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
22
*                           brought canonicalization code into line with spec
23
*****************************************************************************/
24
25
/*
26
   POSIX's locale format, from putil.c: [no spaces]
27
28
     ll [ _CC ] [ . MM ] [ @ VV]
29
30
     l = lang, C = ctry, M = charmap, V = variant
31
*/
32
33
#include "unicode/bytestream.h"
34
#include "unicode/errorcode.h"
35
#include "unicode/stringpiece.h"
36
#include "unicode/utypes.h"
37
#include "unicode/ustring.h"
38
#include "unicode/uloc.h"
39
40
#include "bytesinkutil.h"
41
#include "putilimp.h"
42
#include "ustr_imp.h"
43
#include "ulocimp.h"
44
#include "umutex.h"
45
#include "cstring.h"
46
#include "cmemory.h"
47
#include "locmap.h"
48
#include "uarrsort.h"
49
#include "uenumimp.h"
50
#include "uassert.h"
51
#include "charstr.h"
52
53
U_NAMESPACE_USE
54
55
/* ### Declarations **************************************************/
56
57
/* Locale stuff from locid.cpp */
58
U_CFUNC void locale_set_default(const char *id);
59
U_CFUNC const char *locale_get_default(void);
60
61
/* ### Data tables **************************************************/
62
63
/**
64
 * Table of language codes, both 2- and 3-letter, with preference
65
 * given to 2-letter codes where possible.  Includes 3-letter codes
66
 * that lack a 2-letter equivalent.
67
 *
68
 * This list must be in sorted order.  This list is returned directly
69
 * to the user by some API.
70
 *
71
 * This list must be kept in sync with LANGUAGES_3, with corresponding
72
 * entries matched.
73
 *
74
 * This table should be terminated with a NULL entry, followed by a
75
 * second list, and another NULL entry.  The first list is visible to
76
 * user code when this array is returned by API.  The second list
77
 * contains codes we support, but do not expose through user API.
78
 *
79
 * Notes
80
 *
81
 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
82
 * include the revisions up to 2001/7/27 *CWB*
83
 *
84
 * The 3 character codes are the terminology codes like RFC 3066.  This
85
 * is compatible with prior ICU codes
86
 *
87
 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
88
 * table but now at the end of the table because 3 character codes are
89
 * duplicates.  This avoids bad searches going from 3 to 2 character
90
 * codes.
91
 *
92
 * The range qaa-qtz is reserved for local use
93
 */
94
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
95
/* ISO639 table version is 20150505 */
96
/* Subsequent hand addition of selected languages */
97
static const char * const LANGUAGES[] = {
98
    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
99
    "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
100
    "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
101
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
102
    "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",
103
    "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
104
    "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
105
    "bgc", "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",
106
    "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",
107
    "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",
108
    "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",
109
    "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",
110
    "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",
111
    "cs",  "csb", "cu",  "cv",  "cy",
112
    "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",
113
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
114
    "dyo", "dyu", "dz",  "dzg",
115
    "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",
116
    "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",
117
    "ext",
118
    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",
119
    "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",
120
    "frs", "fur", "fy",
121
    "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
122
    "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",
123
    "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",
124
    "gur", "guz", "gv",  "gwi",
125
    "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",
126
    "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",
127
    "hup", "hy",  "hz",
128
    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",
129
    "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",
130
    "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
131
    "jv",
132
    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
133
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",
134
    "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",
135
    "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",
136
    "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",
137
    "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",
138
    "kv",  "kw",  "ky",
139
    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",
140
    "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",
141
    "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",
142
    "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",
143
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
144
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",
145
    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
146
    "ml",  "mn",  "mnc", "mni",
147
    "moh", "mos", "mr",  "mrj",
148
    "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",
149
    "my",  "mye", "myv", "mzn",
150
    "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",
151
    "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",
152
    "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",
153
    "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",
154
    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",
155
    "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
156
    "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",
157
    "pon", "prg", "pro", "ps",  "pt",
158
    "qu",  "quc", "qug",
159
    "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",
160
    "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",
161
    "rw",  "rwk",
162
    "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",
163
    "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",
164
    "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",
165
    "sgs", "shi", "shn", "shu", "si",  "sid", "sk",
166
    "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",
167
    "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",
168
    "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",
169
    "sv",  "sw",  "swb", "syc", "syr", "szl",
170
    "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",
171
    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr",
172
    "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tpi",
173
    "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",
174
    "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",
175
    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
176
    "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vo",
177
    "vot", "vro", "vun",
178
    "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",
179
    "xal", "xh",  "xmf", "xog",
180
    "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",
181
    "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",
182
    "zun", "zxx", "zza",
183
NULL,
184
    "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  /* obsolete language codes */
185
NULL
186
};
187
188
static const char* const DEPRECATED_LANGUAGES[]={
189
    "in", "iw", "ji", "jw", "mo", NULL, NULL
190
};
191
static const char* const REPLACEMENT_LANGUAGES[]={
192
    "id", "he", "yi", "jv", "ro", NULL, NULL
193
};
194
195
/**
196
 * Table of 3-letter language codes.
197
 *
198
 * This is a lookup table used to convert 3-letter language codes to
199
 * their 2-letter equivalent, where possible.  It must be kept in sync
200
 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
201
 * same language as LANGUAGES_3[i].  The commented-out lines are
202
 * copied from LANGUAGES to make eyeballing this baby easier.
203
 *
204
 * Where a 3-letter language code has no 2-letter equivalent, the
205
 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
206
 *
207
 * This table should be terminated with a NULL entry, followed by a
208
 * second list, and another NULL entry.  The two lists correspond to
209
 * the two lists in LANGUAGES.
210
 */
211
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
212
/* ISO639 table version is 20150505 */
213
/* Subsequent hand addition of selected languages */
214
static const char * const LANGUAGES_3[] = {
215
    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
216
    "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
217
    "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
218
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
219
    "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
220
    "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
221
    "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
222
    "bgc", "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
223
    "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
224
    "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
225
    "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
226
    "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
227
    "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
228
    "ces", "csb", "chu", "chv", "cym",
229
    "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
230
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
231
    "dyo", "dyu", "dzo", "dzg",
232
    "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
233
    "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
234
    "ext",
235
    "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
236
    "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
237
    "frs", "fur", "fry",
238
    "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
239
    "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
240
    "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
241
    "gur", "guz", "glv", "gwi",
242
    "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
243
    "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
244
    "hup", "hye", "her",
245
    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
246
    "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
247
    "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
248
    "jav",
249
    "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
250
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
251
    "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
252
    "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
253
    "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
254
    "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
255
    "kom", "cor", "kir",
256
    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
257
    "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
258
    "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
259
    "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
260
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
261
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
262
    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
263
    "mal", "mon", "mnc", "mni",
264
    "moh", "mos", "mar", "mrj",
265
    "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
266
    "mya", "mye", "myv", "mzn",
267
    "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
268
    "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
269
    "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
270
    "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
271
    "oci", "oji", "orm", "ori", "oss", "osa", "ota",
272
    "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
273
    "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
274
    "pon", "prg", "pro", "pus", "por",
275
    "que", "quc", "qug",
276
    "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
277
    "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
278
    "kin", "rwk",
279
    "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
280
    "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
281
    "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
282
    "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
283
    "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
284
    "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
285
    "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
286
    "swe", "swa", "swb", "syc", "syr", "szl",
287
    "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
288
    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
289
    "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
290
    "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
291
    "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
292
    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
293
    "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
294
    "vot", "vro", "vun",
295
    "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
296
    "xal", "xho", "xmf", "xog",
297
    "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
298
    "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
299
    "zun", "zxx", "zza",
300
NULL,
301
/*  "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  */
302
    "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
303
NULL
304
};
305
306
/**
307
 * Table of 2-letter country codes.
308
 *
309
 * This list must be in sorted order.  This list is returned directly
310
 * to the user by some API.
311
 *
312
 * This list must be kept in sync with COUNTRIES_3, with corresponding
313
 * entries matched.
314
 *
315
 * This table should be terminated with a NULL entry, followed by a
316
 * second list, and another NULL entry.  The first list is visible to
317
 * user code when this array is returned by API.  The second list
318
 * contains codes we support, but do not expose through user API.
319
 *
320
 * Notes:
321
 *
322
 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
323
 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
324
 * new codes keeping the old ones for compatibility updated to include
325
 * 1999/12/03 revisions *CWB*
326
 *
327
 * RO(ROM) is now RO(ROU) according to
328
 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
329
 */
330
static const char * const COUNTRIES[] = {
331
    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
332
    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
333
    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
334
    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
335
    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
336
    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
337
    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",
338
    "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",
339
    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
340
    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
341
    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
342
    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
343
    "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
344
    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
345
    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
346
    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
347
    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
348
    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
349
    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
350
    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
351
    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
352
    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
353
    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
354
    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
355
    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
356
    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
357
    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
358
    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
359
    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
360
    "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
361
NULL,
362
    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
363
NULL
364
};
365
366
static const char* const DEPRECATED_COUNTRIES[] = {
367
    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
368
};
369
static const char* const REPLACEMENT_COUNTRIES[] = {
370
/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
371
    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
372
};
373
374
/**
375
 * Table of 3-letter country codes.
376
 *
377
 * This is a lookup table used to convert 3-letter country codes to
378
 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
379
 * For all valid i, COUNTRIES[i] must refer to the same country as
380
 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
381
 * to make eyeballing this baby easier.
382
 *
383
 * This table should be terminated with a NULL entry, followed by a
384
 * second list, and another NULL entry.  The two lists correspond to
385
 * the two lists in COUNTRIES.
386
 */
387
static const char * const COUNTRIES_3[] = {
388
/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
389
    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
390
/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
391
    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
392
/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
393
    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
394
/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
395
    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
396
/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
397
    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
398
/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
399
    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
400
/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",     */
401
    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
402
/*  "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",     */
403
    "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
404
/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
405
    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
406
/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
407
    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
408
/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
409
    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
410
/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
411
    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
412
/*  "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
413
    "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
414
/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
415
    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
416
/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
417
    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
418
/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
419
    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
420
/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
421
    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
422
/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
423
    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
424
/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
425
    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
426
/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
427
    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
428
/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
429
    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
430
/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
431
    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
432
/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
433
    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
434
/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
435
    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
436
/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
437
    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
438
/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
439
    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
440
/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
441
    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
442
/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
443
    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
444
/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
445
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
446
/*  "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
447
    "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
448
NULL,
449
/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
450
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
451
NULL
452
};
453
454
typedef struct CanonicalizationMap {
455
    const char *id;          /* input ID */
456
    const char *canonicalID; /* canonicalized output ID */
457
} CanonicalizationMap;
458
459
/**
460
 * A map to canonicalize locale IDs.  This handles a variety of
461
 * different semantic kinds of transformations.
462
 */
463
static const CanonicalizationMap CANONICALIZE_MAP[] = {
464
    { "art__LOJBAN",    "jbo" }, /* registered name */
465
    { "hy__AREVELA",    "hy" }, /* Registered IANA variant */
466
    { "hy__AREVMDA",    "hyw" }, /* Registered IANA variant */
467
    { "zh__GUOYU",      "zh" }, /* registered name */
468
    { "zh__HAKKA",      "hak" }, /* registered name */
469
    { "zh__XIANG",      "hsn" }, /* registered name */
470
    // subtags with 3 chars won't be treated as variants.
471
    { "zh_GAN",         "gan" }, /* registered name */
472
    { "zh_MIN_NAN",     "nan" }, /* registered name */
473
    { "zh_WUU",         "wuu" }, /* registered name */
474
    { "zh_YUE",         "yue" }, /* registered name */
475
};
476
477
/* ### BCP47 Conversion *******************************************/
478
/* Test if the locale id has BCP47 u extension and does not have '@' */
479
190k
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
480
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
481
static const char* _ConvertBCP47(
482
        const char* id, char* buffer, int32_t length,
483
15.0k
        UErrorCode* err, int32_t* pLocaleIdSize) {
484
15.0k
    const char* finalID;
485
15.0k
    int32_t localeIDSize = uloc_forLanguageTag(id, buffer, length, NULL, err);
486
15.0k
    if (localeIDSize <= 0 || U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) {
487
5.82k
        finalID=id;
488
5.82k
        if (*err == U_STRING_NOT_TERMINATED_WARNING) {
489
820
            *err = U_BUFFER_OVERFLOW_ERROR;
490
820
        }
491
9.19k
    } else {
492
9.19k
        finalID=buffer;
493
9.19k
    }
494
15.0k
    if (pLocaleIdSize != nullptr) {
495
12.8k
        *pLocaleIdSize = localeIDSize;
496
12.8k
    }
497
15.0k
    return finalID;
498
15.0k
}
499
/* Gets the size of the shortest subtag in the given localeID. */
500
187k
static int32_t getShortestSubtagLength(const char *localeID) {
501
187k
    int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
502
187k
    int32_t length = localeIDLength;
503
187k
    int32_t tmpLength = 0;
504
187k
    int32_t i;
505
187k
    UBool reset = true;
506
507
401M
    for (i = 0; i < localeIDLength; i++) {
508
401M
        if (localeID[i] != '_' && localeID[i] != '-') {
509
368M
            if (reset) {
510
33.4M
                tmpLength = 0;
511
33.4M
                reset = false;
512
33.4M
            }
513
368M
            tmpLength++;
514
368M
        } else {
515
33.4M
            if (tmpLength != 0 && tmpLength < length) {
516
68.1k
                length = tmpLength;
517
68.1k
            }
518
33.4M
            reset = true;
519
33.4M
        }
520
401M
    }
521
522
187k
    return length;
523
187k
}
524
525
/* ### Keywords **************************************************/
526
367k
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
527
46.9M
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
528
/* Punctuation/symbols allowed in legacy key values */
529
0
#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
530
531
164k
#define ULOC_KEYWORD_BUFFER_LEN 25
532
9.20k
#define ULOC_MAX_NO_KEYWORDS 25
533
534
U_CAPI const char * U_EXPORT2
535
173k
locale_getKeywordsStart(const char *localeID) {
536
173k
    const char *result = NULL;
537
173k
    if((result = uprv_strchr(localeID, '@')) != NULL) {
538
10.3k
        return result;
539
10.3k
    }
540
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
541
    else {
542
        /* We do this because the @ sign is variant, and the @ sign used on one
543
        EBCDIC machine won't be compiled the same way on other EBCDIC based
544
        machines. */
545
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
546
        const uint8_t *charToFind = ebcdicSigns;
547
        while(*charToFind) {
548
            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
549
                return result;
550
            }
551
            charToFind++;
552
        }
553
    }
554
#endif
555
162k
    return NULL;
556
173k
}
557
558
/**
559
 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
560
 * @param keywordName incoming name to be canonicalized
561
 * @param status return status (keyword too long)
562
 * @return length of the keyword name
563
 */
564
static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
565
19.9k
{
566
19.9k
  int32_t keywordNameLen = 0;
567
568
129k
  for (; *keywordName != 0; keywordName++) {
569
109k
    if (!UPRV_ISALPHANUM(*keywordName)) {
570
0
      *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
571
0
      return 0;
572
0
    }
573
109k
    if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
574
109k
      buf[keywordNameLen++] = uprv_tolower(*keywordName);
575
109k
    } else {
576
      /* keyword name too long for internal buffer */
577
0
      *status = U_INTERNAL_PROGRAM_ERROR;
578
0
      return 0;
579
0
    }
580
109k
  }
581
19.9k
  if (keywordNameLen == 0) {
582
0
    *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
583
0
    return 0;
584
0
  }
585
19.9k
  buf[keywordNameLen] = 0; /* terminate */
586
587
19.9k
  return keywordNameLen;
588
19.9k
}
589
590
typedef struct {
591
    char keyword[ULOC_KEYWORD_BUFFER_LEN];
592
    int32_t keywordLen;
593
    const char *valueStart;
594
    int32_t valueLen;
595
} KeywordStruct;
596
597
static int32_t U_CALLCONV
598
139k
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
599
139k
    const char* leftString = ((const KeywordStruct *)left)->keyword;
600
139k
    const char* rightString = ((const KeywordStruct *)right)->keyword;
601
139k
    return uprv_strcmp(leftString, rightString);
602
139k
}
603
604
U_CFUNC void
605
ulocimp_getKeywords(const char *localeID,
606
                    char prev,
607
                    ByteSink& sink,
608
                    UBool valuesToo,
609
                    UErrorCode *status)
610
9.20k
{
611
9.20k
    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
612
613
9.20k
    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
614
9.20k
    int32_t numKeywords = 0;
615
9.20k
    const char* pos = localeID;
616
9.20k
    const char* equalSign = NULL;
617
9.20k
    const char* semicolon = NULL;
618
9.20k
    int32_t i = 0, j, n;
619
620
9.20k
    if(prev == '@') { /* start of keyword definition */
621
        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
622
54.9k
        do {
623
54.9k
            UBool duplicate = false;
624
            /* skip leading spaces */
625
56.0k
            while(*pos == ' ') {
626
1.07k
                pos++;
627
1.07k
            }
628
54.9k
            if (!*pos) { /* handle trailing "; " */
629
90
                break;
630
90
            }
631
54.8k
            if(numKeywords == maxKeywords) {
632
265
                *status = U_INTERNAL_PROGRAM_ERROR;
633
265
                return;
634
265
            }
635
54.5k
            equalSign = uprv_strchr(pos, '=');
636
54.5k
            semicolon = uprv_strchr(pos, ';');
637
            /* lack of '=' [foo@currency] is illegal */
638
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
639
54.5k
            if(!equalSign || (semicolon && semicolon<equalSign)) {
640
36
                *status = U_INVALID_FORMAT_ERROR;
641
36
                return;
642
36
            }
643
            /* need to normalize both keyword and keyword name */
644
54.5k
            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
645
                /* keyword name too long for internal buffer */
646
71
                *status = U_INTERNAL_PROGRAM_ERROR;
647
71
                return;
648
71
            }
649
391k
            for(i = 0, n = 0; i < equalSign - pos; ++i) {
650
336k
                if (pos[i] != ' ') {
651
335k
                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
652
335k
                }
653
336k
            }
654
655
            /* zero-length keyword is an error. */
656
54.4k
            if (n == 0) {
657
82
                *status = U_INVALID_FORMAT_ERROR;
658
82
                return;
659
82
            }
660
661
54.3k
            keywordList[numKeywords].keyword[n] = 0;
662
54.3k
            keywordList[numKeywords].keywordLen = n;
663
            /* now grab the value part. First we skip the '=' */
664
54.3k
            equalSign++;
665
            /* then we leading spaces */
666
55.3k
            while(*equalSign == ' ') {
667
991
                equalSign++;
668
991
            }
669
670
            /* Premature end or zero-length value */
671
54.3k
            if (!*equalSign || equalSign == semicolon) {
672
179
                *status = U_INVALID_FORMAT_ERROR;
673
179
                return;
674
179
            }
675
676
54.2k
            keywordList[numKeywords].valueStart = equalSign;
677
678
54.2k
            pos = semicolon;
679
54.2k
            i = 0;
680
54.2k
            if(pos) {
681
46.7k
                while(*(pos - i - 1) == ' ') {
682
986
                    i++;
683
986
                }
684
45.7k
                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
685
45.7k
                pos++;
686
45.7k
            } else {
687
8.48k
                i = (int32_t)uprv_strlen(equalSign);
688
10.3k
                while(i && equalSign[i-1] == ' ') {
689
1.90k
                    i--;
690
1.90k
                }
691
8.48k
                keywordList[numKeywords].valueLen = i;
692
8.48k
            }
693
            /* If this is a duplicate keyword, then ignore it */
694
367k
            for (j=0; j<numKeywords; ++j) {
695
316k
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
696
3.41k
                    duplicate = true;
697
3.41k
                    break;
698
3.41k
                }
699
316k
            }
700
54.2k
            if (!duplicate) {
701
50.7k
                ++numKeywords;
702
50.7k
            }
703
54.2k
        } while(pos);
704
705
        /* now we have a list of keywords */
706
        /* we need to sort it */
707
8.57k
        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, false, status);
708
709
        /* Now construct the keyword part */
710
52.5k
        for(i = 0; i < numKeywords; i++) {
711
43.9k
            sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
712
43.9k
            if(valuesToo) {
713
39.1k
                sink.Append("=", 1);
714
39.1k
                sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
715
39.1k
                if(i < numKeywords - 1) {
716
32.0k
                    sink.Append(";", 1);
717
32.0k
                }
718
39.1k
            } else {
719
4.75k
                sink.Append("\0", 1);
720
4.75k
            }
721
43.9k
        }
722
8.57k
    }
723
9.20k
}
724
725
U_CAPI int32_t U_EXPORT2
726
uloc_getKeywordValue(const char* localeID,
727
                     const char* keywordName,
728
                     char* buffer, int32_t bufferCapacity,
729
                     UErrorCode* status)
730
19.2k
{
731
19.2k
    if (U_FAILURE(*status)) {
732
0
        return 0;
733
0
    }
734
735
19.2k
    CheckedArrayByteSink sink(buffer, bufferCapacity);
736
19.2k
    ulocimp_getKeywordValue(localeID, keywordName, sink, status);
737
738
19.2k
    int32_t reslen = sink.NumberOfBytesAppended();
739
740
19.2k
    if (U_FAILURE(*status)) {
741
0
        return reslen;
742
0
    }
743
744
19.2k
    if (sink.Overflowed()) {
745
0
        *status = U_BUFFER_OVERFLOW_ERROR;
746
19.2k
    } else {
747
19.2k
        u_terminateChars(buffer, bufferCapacity, reslen, status);
748
19.2k
    }
749
750
19.2k
    return reslen;
751
19.2k
}
752
753
U_CAPI void U_EXPORT2
754
ulocimp_getKeywordValue(const char* localeID,
755
                        const char* keywordName,
756
                        icu::ByteSink& sink,
757
                        UErrorCode* status)
758
19.9k
{
759
19.9k
    const char* startSearchHere = NULL;
760
19.9k
    const char* nextSeparator = NULL;
761
19.9k
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
762
19.9k
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
763
764
19.9k
    if(status && U_SUCCESS(*status) && localeID) {
765
19.9k
      char tempBuffer[ULOC_FULLNAME_CAPACITY];
766
19.9k
      const char* tmpLocaleID;
767
768
19.9k
      if (keywordName == NULL || keywordName[0] == 0) {
769
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
770
0
        return;
771
0
      }
772
773
19.9k
      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
774
19.9k
      if(U_FAILURE(*status)) {
775
0
        return;
776
0
      }
777
778
19.9k
      if (_hasBCP47Extension(localeID)) {
779
0
          tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
780
0
                                      sizeof(tempBuffer), status, nullptr);
781
19.9k
      } else {
782
19.9k
          tmpLocaleID=localeID;
783
19.9k
      }
784
785
19.9k
      startSearchHere = locale_getKeywordsStart(tmpLocaleID);
786
19.9k
      if(startSearchHere == NULL) {
787
          /* no keywords, return at once */
788
19.7k
          return;
789
19.7k
      }
790
791
      /* find the first keyword */
792
596
      while(startSearchHere) {
793
350
          const char* keyValueTail;
794
350
          int32_t keyValueLen;
795
796
350
          startSearchHere++; /* skip @ or ; */
797
350
          nextSeparator = uprv_strchr(startSearchHere, '=');
798
350
          if(!nextSeparator) {
799
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
800
0
              return;
801
0
          }
802
          /* strip leading & trailing spaces (TC decided to tolerate these) */
803
350
          while(*startSearchHere == ' ') {
804
0
              startSearchHere++;
805
0
          }
806
350
          keyValueTail = nextSeparator;
807
350
          while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
808
0
              keyValueTail--;
809
0
          }
810
          /* now keyValueTail points to first char after the keyName */
811
          /* copy & normalize keyName from locale */
812
350
          if (startSearchHere == keyValueTail) {
813
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
814
0
              return;
815
0
          }
816
350
          keyValueLen = 0;
817
919
          while (startSearchHere < keyValueTail) {
818
569
            if (!UPRV_ISALPHANUM(*startSearchHere)) {
819
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
820
0
              return;
821
0
            }
822
569
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
823
569
              localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
824
569
            } else {
825
              /* keyword name too long for internal buffer */
826
0
              *status = U_INTERNAL_PROGRAM_ERROR;
827
0
              return;
828
0
            }
829
569
          }
830
350
          localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
831
832
350
          startSearchHere = uprv_strchr(nextSeparator, ';');
833
834
350
          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
835
               /* current entry matches the keyword. */
836
0
             nextSeparator++; /* skip '=' */
837
              /* First strip leading & trailing spaces (TC decided to tolerate these) */
838
0
              while(*nextSeparator == ' ') {
839
0
                nextSeparator++;
840
0
              }
841
0
              keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
842
0
              while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
843
0
                keyValueTail--;
844
0
              }
845
              /* Now copy the value, but check well-formedness */
846
0
              if (nextSeparator == keyValueTail) {
847
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
848
0
                return;
849
0
              }
850
0
              while (nextSeparator < keyValueTail) {
851
0
                if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
852
0
                  *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
853
0
                  return;
854
0
                }
855
                /* Should we lowercase value to return here? Tests expect as-is. */
856
0
                sink.Append(nextSeparator++, 1);
857
0
              }
858
0
              return;
859
0
          }
860
350
      }
861
246
    }
862
19.9k
}
863
864
U_CAPI int32_t U_EXPORT2
865
uloc_setKeywordValue(const char* keywordName,
866
                     const char* keywordValue,
867
                     char* buffer, int32_t bufferCapacity,
868
                     UErrorCode* status)
869
1
{
870
    /* TODO: sorting. removal. */
871
1
    int32_t keywordNameLen;
872
1
    int32_t keywordValueLen;
873
1
    int32_t bufLen;
874
1
    int32_t needLen = 0;
875
1
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
876
1
    char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
877
1
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
878
1
    int32_t rc;
879
1
    char* nextSeparator = NULL;
880
1
    char* nextEqualsign = NULL;
881
1
    char* startSearchHere = NULL;
882
1
    char* keywordStart = NULL;
883
1
    CharString updatedKeysAndValues;
884
1
    UBool handledInputKeyAndValue = false;
885
1
    char keyValuePrefix = '@';
886
887
1
    if(U_FAILURE(*status)) {
888
0
        return -1;
889
0
    }
890
1
    if (*status == U_STRING_NOT_TERMINATED_WARNING) {
891
0
        *status = U_ZERO_ERROR;
892
0
    }
893
1
    if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
894
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
895
0
        return 0;
896
0
    }
897
1
    bufLen = (int32_t)uprv_strlen(buffer);
898
1
    if(bufferCapacity<bufLen) {
899
        /* The capacity is less than the length?! Is this NULL terminated? */
900
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
901
0
        return 0;
902
0
    }
903
1
    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
904
1
    if(U_FAILURE(*status)) {
905
0
        return 0;
906
0
    }
907
908
1
    keywordValueLen = 0;
909
1
    if(keywordValue) {
910
9
        while (*keywordValue != 0) {
911
8
            if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
912
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
913
0
                return 0;
914
0
            }
915
8
            if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
916
                /* Should we force lowercase in value to set? */
917
8
                keywordValueBuffer[keywordValueLen++] = *keywordValue++;
918
8
            } else {
919
                /* keywordValue too long for internal buffer */
920
0
                *status = U_INTERNAL_PROGRAM_ERROR;
921
0
                return 0;
922
0
            }
923
8
        }
924
1
    }
925
1
    keywordValueBuffer[keywordValueLen] = 0; /* terminate */
926
927
1
    startSearchHere = (char*)locale_getKeywordsStart(buffer);
928
1
    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
929
1
        if(keywordValueLen == 0) { /* no keywords = nothing to remove */
930
0
            U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
931
0
            return bufLen;
932
0
        }
933
934
1
        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
935
1
        if(startSearchHere) { /* had a single @ */
936
0
            needLen--; /* already had the @ */
937
            /* startSearchHere points at the @ */
938
1
        } else {
939
1
            startSearchHere=buffer+bufLen;
940
1
        }
941
1
        if(needLen >= bufferCapacity) {
942
0
            *status = U_BUFFER_OVERFLOW_ERROR;
943
0
            return needLen; /* no change */
944
0
        }
945
1
        *startSearchHere++ = '@';
946
1
        uprv_strcpy(startSearchHere, keywordNameBuffer);
947
1
        startSearchHere += keywordNameLen;
948
1
        *startSearchHere++ = '=';
949
1
        uprv_strcpy(startSearchHere, keywordValueBuffer);
950
1
        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
951
1
        return needLen;
952
1
    } /* end shortcut - no @ */
953
954
0
    keywordStart = startSearchHere;
955
    /* search for keyword */
956
0
    while(keywordStart) {
957
0
        const char* keyValueTail;
958
0
        int32_t keyValueLen;
959
960
0
        keywordStart++; /* skip @ or ; */
961
0
        nextEqualsign = uprv_strchr(keywordStart, '=');
962
0
        if (!nextEqualsign) {
963
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
964
0
            return 0;
965
0
        }
966
        /* strip leading & trailing spaces (TC decided to tolerate these) */
967
0
        while(*keywordStart == ' ') {
968
0
            keywordStart++;
969
0
        }
970
0
        keyValueTail = nextEqualsign;
971
0
        while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
972
0
            keyValueTail--;
973
0
        }
974
        /* now keyValueTail points to first char after the keyName */
975
        /* copy & normalize keyName from locale */
976
0
        if (keywordStart == keyValueTail) {
977
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
978
0
            return 0;
979
0
        }
980
0
        keyValueLen = 0;
981
0
        while (keywordStart < keyValueTail) {
982
0
            if (!UPRV_ISALPHANUM(*keywordStart)) {
983
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
984
0
                return 0;
985
0
            }
986
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
987
0
                localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
988
0
            } else {
989
                /* keyword name too long for internal buffer */
990
0
                *status = U_INTERNAL_PROGRAM_ERROR;
991
0
                return 0;
992
0
            }
993
0
        }
994
0
        localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
995
996
0
        nextSeparator = uprv_strchr(nextEqualsign, ';');
997
998
        /* start processing the value part */
999
0
        nextEqualsign++; /* skip '=' */
1000
        /* First strip leading & trailing spaces (TC decided to tolerate these) */
1001
0
        while(*nextEqualsign == ' ') {
1002
0
            nextEqualsign++;
1003
0
        }
1004
0
        keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1005
0
        while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1006
0
            keyValueTail--;
1007
0
        }
1008
0
        if (nextEqualsign == keyValueTail) {
1009
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1010
0
            return 0;
1011
0
        }
1012
1013
0
        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1014
0
        if(rc == 0) {
1015
            /* Current entry matches the input keyword. Update the entry */
1016
0
            if(keywordValueLen > 0) { /* updating a value */
1017
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1018
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1019
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1020
0
                updatedKeysAndValues.append('=', *status);
1021
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1022
0
            } /* else removing this entry, don't emit anything */
1023
0
            handledInputKeyAndValue = true;
1024
0
        } else {
1025
           /* input keyword sorts earlier than current entry, add before current entry */
1026
0
            if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1027
                /* insert new entry at this location */
1028
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1029
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1030
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1031
0
                updatedKeysAndValues.append('=', *status);
1032
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1033
0
                handledInputKeyAndValue = true;
1034
0
            }
1035
            /* copy the current entry */
1036
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1037
0
            keyValuePrefix = ';'; /* for any subsequent key-value pair */
1038
0
            updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1039
0
            updatedKeysAndValues.append('=', *status);
1040
0
            updatedKeysAndValues.append(nextEqualsign, static_cast<int32_t>(keyValueTail-nextEqualsign), *status);
1041
0
        }
1042
0
        if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1043
            /* append new entry at the end, it sorts later than existing entries */
1044
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1045
            /* skip keyValuePrefix update, no subsequent key-value pair */
1046
0
            updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1047
0
            updatedKeysAndValues.append('=', *status);
1048
0
            updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1049
0
            handledInputKeyAndValue = true;
1050
0
        }
1051
0
        keywordStart = nextSeparator;
1052
0
    } /* end loop searching */
1053
1054
    /* Any error from updatedKeysAndValues.append above would be internal and not due to
1055
     * problems with the passed-in locale. So if we did encounter problems with the
1056
     * passed-in locale above, those errors took precedence and overrode any error
1057
     * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1058
     * are errors here they are from updatedKeysAndValues.append; they do cause an
1059
     * error return but the passed-in locale is unmodified and the original bufLen is
1060
     * returned.
1061
     */
1062
0
    if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1063
        /* if input key/value specified removal of a keyword not present in locale, or
1064
         * there was an error in CharString.append, leave original locale alone. */
1065
0
        U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1066
0
        return bufLen;
1067
0
    }
1068
1069
    // needLen = length of the part before '@'
1070
0
    needLen = (int32_t)(startSearchHere - buffer);
1071
    // Check to see can we fit the startSearchHere, if not, return
1072
    // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1073
    // We do this because this API function does not behave like most others:
1074
    // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1075
    // When the contents fits but without the terminating NUL, in this case we need to not change
1076
    // the buffer contents and return with a buffer overflow error.
1077
0
    int32_t appendLength = updatedKeysAndValues.length();
1078
0
    if (appendLength >= bufferCapacity - needLen) {
1079
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1080
0
        return needLen + appendLength;
1081
0
    }
1082
0
    needLen += updatedKeysAndValues.extract(
1083
0
                         startSearchHere, bufferCapacity - needLen, *status);
1084
0
    U_ASSERT(*status != U_STRING_NOT_TERMINATED_WARNING);
1085
0
    return needLen;
1086
0
}
1087
1088
/* ### ID parsing implementation **************************************************/
1089
1090
443k
#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1091
1092
/*returns true if one of the special prefixes is here (s=string)
1093
  'x-' or 'i-' */
1094
221k
#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1095
1096
/* Dot terminates it because of POSIX form  where dot precedes the codepage
1097
 * except for variant
1098
 */
1099
514M
#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1100
1101
/**
1102
 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1103
 * a NULL entry, followed by more entries, and a second NULL entry.
1104
 *
1105
 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1106
 * COUNTRIES_3.
1107
 */
1108
static int16_t _findIndex(const char* const* list, const char* key)
1109
20.9k
{
1110
20.9k
    const char* const* anchor = list;
1111
20.9k
    int32_t pass = 0;
1112
1113
    /* Make two passes through two NULL-terminated arrays at 'list' */
1114
27.3k
    while (pass++ < 2) {
1115
7.00M
        while (*list) {
1116
6.99M
            if (uprv_strcmp(key, *list) == 0) {
1117
17.7k
                return (int16_t)(list - anchor);
1118
17.7k
            }
1119
6.97M
            list++;
1120
6.97M
        }
1121
6.39k
        ++list;     /* skip final NULL *CWB*/
1122
6.39k
    }
1123
3.17k
    return -1;
1124
20.9k
}
1125
1126
U_CFUNC const char*
1127
0
uloc_getCurrentCountryID(const char* oldID){
1128
0
    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1129
0
    if (offset >= 0) {
1130
0
        return REPLACEMENT_COUNTRIES[offset];
1131
0
    }
1132
0
    return oldID;
1133
0
}
1134
U_CFUNC const char*
1135
0
uloc_getCurrentLanguageID(const char* oldID){
1136
0
    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1137
0
    if (offset >= 0) {
1138
0
        return REPLACEMENT_LANGUAGES[offset];
1139
0
    }
1140
0
    return oldID;
1141
0
}
1142
/*
1143
 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1144
 * avoid duplicating code to handle the earlier locale ID pieces
1145
 * in the functions for the later ones by
1146
 * setting the *pEnd pointer to where they stopped parsing
1147
 *
1148
 * TODO try to use this in Locale
1149
 */
1150
CharString U_EXPORT2
1151
ulocimp_getLanguage(const char *localeID,
1152
                    const char **pEnd,
1153
221k
                    UErrorCode &status) {
1154
221k
    CharString result;
1155
1156
221k
    if (uprv_stricmp(localeID, "root") == 0) {
1157
123
        localeID += 4;
1158
221k
    } else if (uprv_strnicmp(localeID, "und", 3) == 0 &&
1159
221k
               (localeID[3] == '\0' ||
1160
412
                localeID[3] == '-' ||
1161
412
                localeID[3] == '_' ||
1162
412
                localeID[3] == '@')) {
1163
169
        localeID += 3;
1164
169
    }
1165
1166
    /* if it starts with i- or x- then copy that prefix */
1167
221k
    if(_isIDPrefix(localeID)) {
1168
2.79k
        result.append((char)uprv_tolower(*localeID), status);
1169
2.79k
        result.append('-', status);
1170
2.79k
        localeID+=2;
1171
2.79k
    }
1172
1173
    /* copy the language as far as possible and count its length */
1174
85.0M
    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1175
84.8M
        result.append((char)uprv_tolower(*localeID), status);
1176
84.8M
        localeID++;
1177
84.8M
    }
1178
1179
221k
    if(result.length()==3) {
1180
        /* convert 3 character code to 2 character code if possible *CWB*/
1181
20.0k
        int32_t offset = _findIndex(LANGUAGES_3, result.data());
1182
20.0k
        if(offset>=0) {
1183
17.6k
            result.clear();
1184
17.6k
            result.append(LANGUAGES[offset], status);
1185
17.6k
        }
1186
20.0k
    }
1187
1188
221k
    if(pEnd!=NULL) {
1189
216k
        *pEnd=localeID;
1190
216k
    }
1191
1192
221k
    return result;
1193
221k
}
1194
1195
CharString U_EXPORT2
1196
ulocimp_getScript(const char *localeID,
1197
                  const char **pEnd,
1198
81.7k
                  UErrorCode &status) {
1199
81.7k
    CharString result;
1200
81.7k
    int32_t idLen = 0;
1201
1202
81.7k
    if (pEnd != NULL) {
1203
69.4k
        *pEnd = localeID;
1204
69.4k
    }
1205
1206
    /* copy the second item as far as possible and count its length */
1207
19.3M
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1208
19.3M
            && uprv_isASCIILetter(localeID[idLen])) {
1209
19.2M
        idLen++;
1210
19.2M
    }
1211
1212
    /* If it's exactly 4 characters long, then it's a script and not a country. */
1213
81.7k
    if (idLen == 4) {
1214
24.3k
        int32_t i;
1215
24.3k
        if (pEnd != NULL) {
1216
21.1k
            *pEnd = localeID+idLen;
1217
21.1k
        }
1218
24.3k
        if (idLen >= 1) {
1219
24.3k
            result.append((char)uprv_toupper(*(localeID++)), status);
1220
24.3k
        }
1221
97.5k
        for (i = 1; i < idLen; i++) {
1222
73.1k
            result.append((char)uprv_tolower(*(localeID++)), status);
1223
73.1k
        }
1224
24.3k
    }
1225
1226
81.7k
    return result;
1227
81.7k
}
1228
1229
CharString U_EXPORT2
1230
ulocimp_getCountry(const char *localeID,
1231
                   const char **pEnd,
1232
61.1k
                   UErrorCode &status) {
1233
61.1k
    CharString result;
1234
61.1k
    int32_t idLen=0;
1235
1236
    /* copy the country as far as possible and count its length */
1237
84.8M
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1238
84.7M
        result.append((char)uprv_toupper(localeID[idLen]), status);
1239
84.7M
        idLen++;
1240
84.7M
    }
1241
1242
    /* the country should be either length 2 or 3 */
1243
61.1k
    if (idLen == 2 || idLen == 3) {
1244
        /* convert 3 character code to 2 character code if possible *CWB*/
1245
40.7k
        if(idLen==3) {
1246
887
            int32_t offset = _findIndex(COUNTRIES_3, result.data());
1247
887
            if(offset>=0) {
1248
93
                result.clear();
1249
93
                result.append(COUNTRIES[offset], status);
1250
93
            }
1251
887
        }
1252
40.7k
        localeID+=idLen;
1253
40.7k
    } else {
1254
20.4k
        result.clear();
1255
20.4k
    }
1256
1257
61.1k
    if(pEnd!=NULL) {
1258
54.8k
        *pEnd=localeID;
1259
54.8k
    }
1260
1261
61.1k
    return result;
1262
61.1k
}
1263
1264
/**
1265
 * @param needSeparator if true, then add leading '_' if any variants
1266
 * are added to 'variant'
1267
 */
1268
static void
1269
_getVariant(const char *localeID,
1270
            char prev,
1271
            ByteSink& sink,
1272
4.78k
            UBool needSeparator) {
1273
4.78k
    UBool hasVariant = false;
1274
1275
    /* get one or more variant tags and separate them with '_' */
1276
4.78k
    if(_isIDSeparator(prev)) {
1277
        /* get a variant string after a '-' or '_' */
1278
131M
        while(!_isTerminator(*localeID)) {
1279
131M
            if (needSeparator) {
1280
0
                sink.Append("_", 1);
1281
0
                needSeparator = false;
1282
0
            }
1283
131M
            char c = (char)uprv_toupper(*localeID);
1284
131M
            if (c == '-') c = '_';
1285
131M
            sink.Append(&c, 1);
1286
131M
            hasVariant = true;
1287
131M
            localeID++;
1288
131M
        }
1289
4.49k
    }
1290
1291
    /* if there is no variant tag after a '-' or '_' then look for '@' */
1292
4.78k
    if(!hasVariant) {
1293
792
        if(prev=='@') {
1294
            /* keep localeID */
1295
502
        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1296
220
            ++localeID; /* point after the '@' */
1297
282
        } else {
1298
282
            return;
1299
282
        }
1300
4.40M
        while(!_isTerminator(*localeID)) {
1301
4.40M
            if (needSeparator) {
1302
70
                sink.Append("_", 1);
1303
70
                needSeparator = false;
1304
70
            }
1305
4.40M
            char c = (char)uprv_toupper(*localeID);
1306
4.40M
            if (c == '-' || c == ',') c = '_';
1307
4.40M
            sink.Append(&c, 1);
1308
4.40M
            localeID++;
1309
4.40M
        }
1310
510
    }
1311
4.78k
}
1312
1313
/* Keyword enumeration */
1314
1315
typedef struct UKeywordsContext {
1316
    char* keywords;
1317
    char* current;
1318
} UKeywordsContext;
1319
1320
U_CDECL_BEGIN
1321
1322
static void U_CALLCONV
1323
1.42k
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1324
1.42k
    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1325
1.42k
    uprv_free(enumerator->context);
1326
1.42k
    uprv_free(enumerator);
1327
1.42k
}
1328
1329
static int32_t U_CALLCONV
1330
0
uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1331
0
    char *kw = ((UKeywordsContext *)en->context)->keywords;
1332
0
    int32_t result = 0;
1333
0
    while(*kw) {
1334
0
        result++;
1335
0
        kw += uprv_strlen(kw)+1;
1336
0
    }
1337
0
    return result;
1338
0
}
1339
1340
static const char * U_CALLCONV
1341
uloc_kw_nextKeyword(UEnumeration* en,
1342
                    int32_t* resultLength,
1343
0
                    UErrorCode* /*status*/) {
1344
0
    const char* result = ((UKeywordsContext *)en->context)->current;
1345
0
    int32_t len = 0;
1346
0
    if(*result) {
1347
0
        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1348
0
        ((UKeywordsContext *)en->context)->current += len+1;
1349
0
    } else {
1350
0
        result = NULL;
1351
0
    }
1352
0
    if (resultLength) {
1353
0
        *resultLength = len;
1354
0
    }
1355
0
    return result;
1356
0
}
1357
1358
static void U_CALLCONV
1359
uloc_kw_resetKeywords(UEnumeration* en,
1360
0
                      UErrorCode* /*status*/) {
1361
0
    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1362
0
}
1363
1364
U_CDECL_END
1365
1366
1367
static const UEnumeration gKeywordsEnum = {
1368
    NULL,
1369
    NULL,
1370
    uloc_kw_closeKeywords,
1371
    uloc_kw_countKeywords,
1372
    uenum_unextDefault,
1373
    uloc_kw_nextKeyword,
1374
    uloc_kw_resetKeywords
1375
};
1376
1377
U_CAPI UEnumeration* U_EXPORT2
1378
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1379
1.42k
{
1380
1.42k
    LocalMemory<UKeywordsContext> myContext;
1381
1.42k
    LocalMemory<UEnumeration> result;
1382
1383
1.42k
    if (U_FAILURE(*status)) {
1384
0
        return nullptr;
1385
0
    }
1386
1.42k
    myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1387
1.42k
    result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1388
1.42k
    if (myContext.isNull() || result.isNull()) {
1389
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1390
0
        return nullptr;
1391
0
    }
1392
1.42k
    uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1393
1.42k
    myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1394
1.42k
    if (myContext->keywords == nullptr) {
1395
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1396
0
        return nullptr;
1397
0
    }
1398
1.42k
    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1399
1.42k
    myContext->keywords[keywordListSize] = 0;
1400
1.42k
    myContext->current = myContext->keywords;
1401
1.42k
    result->context = myContext.orphan();
1402
1.42k
    return result.orphan();
1403
1.42k
}
1404
1405
U_CAPI UEnumeration* U_EXPORT2
1406
uloc_openKeywords(const char* localeID,
1407
                        UErrorCode* status)
1408
2.87k
{
1409
2.87k
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1410
2.87k
    const char* tmpLocaleID;
1411
1412
2.87k
    if(status==NULL || U_FAILURE(*status)) {
1413
0
        return 0;
1414
0
    }
1415
1416
2.87k
    if (_hasBCP47Extension(localeID)) {
1417
2.12k
        tmpLocaleID = _ConvertBCP47(localeID, tempBuffer,
1418
2.12k
                                    sizeof(tempBuffer), status, nullptr);
1419
2.12k
    } else {
1420
752
        if (localeID==NULL) {
1421
0
            localeID=uloc_getDefault();
1422
0
        }
1423
752
        tmpLocaleID=localeID;
1424
752
    }
1425
1426
    /* Skip the language */
1427
2.87k
    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *status);
1428
2.87k
    if (U_FAILURE(*status)) {
1429
191
        return 0;
1430
191
    }
1431
1432
2.68k
    if(_isIDSeparator(*tmpLocaleID)) {
1433
616
        const char *scriptID;
1434
        /* Skip the script if available */
1435
616
        ulocimp_getScript(tmpLocaleID+1, &scriptID, *status);
1436
616
        if (U_FAILURE(*status)) {
1437
0
            return 0;
1438
0
        }
1439
616
        if(scriptID != tmpLocaleID+1) {
1440
            /* Found optional script */
1441
23
            tmpLocaleID = scriptID;
1442
23
        }
1443
        /* Skip the Country */
1444
616
        if (_isIDSeparator(*tmpLocaleID)) {
1445
594
            ulocimp_getCountry(tmpLocaleID+1, &tmpLocaleID, *status);
1446
594
            if (U_FAILURE(*status)) {
1447
0
                return 0;
1448
0
            }
1449
594
        }
1450
616
    }
1451
1452
    /* keywords are located after '@' */
1453
2.68k
    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1454
1.48k
        CharString keywords;
1455
1.48k
        CharStringByteSink sink(&keywords);
1456
1.48k
        ulocimp_getKeywords(tmpLocaleID+1, '@', sink, false, status);
1457
1.48k
        if (U_FAILURE(*status)) {
1458
64
            return NULL;
1459
64
        }
1460
1.42k
        return uloc_openKeywordList(keywords.data(), keywords.length(), status);
1461
1.48k
    }
1462
1.20k
    return NULL;
1463
2.68k
}
1464
1465
1466
/* bit-flags for 'options' parameter of _canonicalize */
1467
113k
#define _ULOC_STRIP_KEYWORDS 0x2
1468
9.98k
#define _ULOC_CANONICALIZE   0x1
1469
1470
888k
#define OPTION_SET(options, mask) ((options & mask) != 0)
1471
1472
static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1473
148k
#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1474
1475
/**
1476
 * Canonicalize the given localeID, to level 1 or to level 2,
1477
 * depending on the options.  To specify level 1, pass in options=0.
1478
 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1479
 *
1480
 * This is the code underlying uloc_getName and uloc_canonicalize.
1481
 */
1482
static void
1483
_canonicalize(const char* localeID,
1484
              ByteSink& sink,
1485
              uint32_t options,
1486
148k
              UErrorCode* err) {
1487
148k
    if (U_FAILURE(*err)) {
1488
0
        return;
1489
0
    }
1490
1491
148k
    int32_t j, fieldCount=0, scriptSize=0, variantSize=0;
1492
148k
    PreflightingLocaleIDBuffer tempBuffer;  // if localeID has a BCP47 extension, tmpLocaleID points to this
1493
148k
    CharString localeIDWithHyphens;  // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
1494
148k
    const char* origLocaleID;
1495
148k
    const char* tmpLocaleID;
1496
148k
    const char* keywordAssign = NULL;
1497
148k
    const char* separatorIndicator = NULL;
1498
1499
148k
    if (_hasBCP47Extension(localeID)) {
1500
10.0k
        const char* localeIDPtr = localeID;
1501
1502
        // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
1503
10.0k
        if (uprv_strchr(localeID, '_') != nullptr && localeID[1] != '-' && localeID[1] != '_') {
1504
1.41k
            localeIDWithHyphens.append(localeID, -1, *err);
1505
1.41k
            if (U_SUCCESS(*err)) {
1506
54.4M
                for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
1507
54.4M
                    if (*p == '_') {
1508
30.7k
                        *p = '-';
1509
30.7k
                    }
1510
54.4M
                }
1511
1.41k
                localeIDPtr = localeIDWithHyphens.data();
1512
1.41k
            }
1513
1.41k
        }
1514
1515
12.8k
        do {
1516
            // After this call tmpLocaleID may point to localeIDPtr which may
1517
            // point to either localeID or localeIDWithHyphens.data().
1518
12.8k
            tmpLocaleID = _ConvertBCP47(localeIDPtr, tempBuffer.getBuffer(),
1519
12.8k
                                        tempBuffer.getCapacity(), err,
1520
12.8k
                                        &(tempBuffer.requestedCapacity));
1521
12.8k
        } while (tempBuffer.needToTryAgain(err));
1522
137k
    } else {
1523
137k
        if (localeID==NULL) {
1524
0
           localeID=uloc_getDefault();
1525
0
        }
1526
137k
        tmpLocaleID=localeID;
1527
137k
    }
1528
1529
148k
    origLocaleID=tmpLocaleID;
1530
1531
    /* get all pieces, one after another, and separate with '_' */
1532
148k
    CharString tag = ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1533
1534
148k
    if (tag.length() == I_DEFAULT_LENGTH &&
1535
148k
            uprv_strncmp(origLocaleID, i_default, I_DEFAULT_LENGTH) == 0) {
1536
8
        tag.clear();
1537
8
        tag.append(uloc_getDefault(), *err);
1538
148k
    } else if(_isIDSeparator(*tmpLocaleID)) {
1539
25.4k
        const char *scriptID;
1540
1541
25.4k
        ++fieldCount;
1542
25.4k
        tag.append('_', *err);
1543
1544
25.4k
        CharString script = ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1545
25.4k
        tag.append(script, *err);
1546
25.4k
        scriptSize = script.length();
1547
25.4k
        if(scriptSize > 0) {
1548
            /* Found optional script */
1549
3.92k
            tmpLocaleID = scriptID;
1550
3.92k
            ++fieldCount;
1551
3.92k
            if (_isIDSeparator(*tmpLocaleID)) {
1552
                /* If there is something else, then we add the _ */
1553
2.60k
                tag.append('_', *err);
1554
2.60k
            }
1555
3.92k
        }
1556
1557
25.4k
        if (_isIDSeparator(*tmpLocaleID)) {
1558
24.1k
            const char *cntryID;
1559
1560
24.1k
            CharString country = ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1561
24.1k
            tag.append(country, *err);
1562
24.1k
            if (!country.isEmpty()) {
1563
                /* Found optional country */
1564
20.0k
                tmpLocaleID = cntryID;
1565
20.0k
            }
1566
24.1k
            if(_isIDSeparator(*tmpLocaleID)) {
1567
                /* If there is something else, then we add the _  if we found country before. */
1568
4.41k
                if (!_isIDSeparator(*(tmpLocaleID+1))) {
1569
3.03k
                    ++fieldCount;
1570
3.03k
                    tag.append('_', *err);
1571
3.03k
                }
1572
1573
4.41k
                variantSize = -tag.length();
1574
4.41k
                {
1575
4.41k
                    CharStringByteSink s(&tag);
1576
4.41k
                    _getVariant(tmpLocaleID+1, *tmpLocaleID, s, false);
1577
4.41k
                }
1578
4.41k
                variantSize += tag.length();
1579
4.41k
                if (variantSize > 0) {
1580
4.10k
                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1581
4.10k
                }
1582
4.41k
            }
1583
24.1k
        }
1584
25.4k
    }
1585
1586
    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1587
148k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1588
158
        UBool done = false;
1589
5.81M
        do {
1590
5.81M
            char c = *tmpLocaleID;
1591
5.81M
            switch (c) {
1592
140
            case 0:
1593
158
            case '@':
1594
158
                done = true;
1595
158
                break;
1596
5.81M
            default:
1597
5.81M
                tag.append(c, *err);
1598
5.81M
                ++tmpLocaleID;
1599
5.81M
                break;
1600
5.81M
            }
1601
5.81M
        } while (!done);
1602
158
    }
1603
1604
    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1605
       After this, tmpLocaleID either points to '@' or is NULL */
1606
148k
    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1607
8.35k
        keywordAssign = uprv_strchr(tmpLocaleID, '=');
1608
8.35k
        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1609
8.35k
    }
1610
1611
    /* Copy POSIX-style variant, if any [mr@FOO] */
1612
148k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1613
148k
        tmpLocaleID != NULL && keywordAssign == NULL) {
1614
1.13M
        for (;;) {
1615
1.13M
            char c = *tmpLocaleID;
1616
1.13M
            if (c == 0) {
1617
90
                break;
1618
90
            }
1619
1.13M
            tag.append(c, *err);
1620
1.13M
            ++tmpLocaleID;
1621
1.13M
        }
1622
90
    }
1623
1624
148k
    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1625
        /* Handle @FOO variant if @ is present and not followed by = */
1626
9.98k
        if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1627
            /* Add missing '_' if needed */
1628
290
            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1629
385
                do {
1630
385
                    tag.append('_', *err);
1631
385
                    ++fieldCount;
1632
385
                } while(fieldCount<2);
1633
201
            }
1634
1635
290
            int32_t posixVariantSize = -tag.length();
1636
290
            {
1637
290
                CharStringByteSink s(&tag);
1638
290
                _getVariant(tmpLocaleID+1, '@', s, (UBool)(variantSize > 0));
1639
290
            }
1640
290
            posixVariantSize += tag.length();
1641
290
            if (posixVariantSize > 0) {
1642
241
                variantSize += posixVariantSize;
1643
241
            }
1644
290
        }
1645
1646
        /* Look up the ID in the canonicalization map */
1647
109k
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1648
99.7k
            StringPiece id(CANONICALIZE_MAP[j].id);
1649
99.7k
            if (tag == id) {
1650
16
                if (id.empty() && tmpLocaleID != NULL) {
1651
0
                    break; /* Don't remap "" if keywords present */
1652
0
                }
1653
16
                tag.clear();
1654
16
                tag.append(CANONICALIZE_MAP[j].canonicalID, *err);
1655
16
                break;
1656
16
            }
1657
99.7k
        }
1658
9.98k
    }
1659
1660
148k
    sink.Append(tag.data(), tag.length());
1661
1662
148k
    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1663
34.0k
        if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1664
34.0k
            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1665
7.71k
            sink.Append("@", 1);
1666
7.71k
            ++fieldCount;
1667
7.71k
            ulocimp_getKeywords(tmpLocaleID+1, '@', sink, true, err);
1668
7.71k
        }
1669
34.0k
    }
1670
148k
}
1671
1672
/* ### ID parsing API **************************************************/
1673
1674
U_CAPI int32_t  U_EXPORT2
1675
uloc_getParent(const char*    localeID,
1676
               char* parent,
1677
               int32_t parentCapacity,
1678
               UErrorCode* err)
1679
0
{
1680
0
    const char *lastUnderscore;
1681
0
    int32_t i;
1682
1683
0
    if (U_FAILURE(*err))
1684
0
        return 0;
1685
1686
0
    if (localeID == NULL)
1687
0
        localeID = uloc_getDefault();
1688
1689
0
    lastUnderscore=uprv_strrchr(localeID, '_');
1690
0
    if(lastUnderscore!=NULL) {
1691
0
        i=(int32_t)(lastUnderscore-localeID);
1692
0
    } else {
1693
0
        i=0;
1694
0
    }
1695
1696
0
    if (i > 0) {
1697
0
        if (uprv_strnicmp(localeID, "und_", 4) == 0) {
1698
0
            localeID += 3;
1699
0
            i -= 3;
1700
0
            uprv_memmove(parent, localeID, uprv_min(i, parentCapacity));
1701
0
        } else if (parent != localeID) {
1702
0
            uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1703
0
        }
1704
0
    }
1705
1706
0
    return u_terminateChars(parent, parentCapacity, i, err);
1707
0
}
1708
1709
U_CAPI int32_t U_EXPORT2
1710
uloc_getLanguage(const char*    localeID,
1711
         char* language,
1712
         int32_t languageCapacity,
1713
         UErrorCode* err)
1714
5.10k
{
1715
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1716
1717
5.10k
    if (err==NULL || U_FAILURE(*err)) {
1718
0
        return 0;
1719
0
    }
1720
1721
5.10k
    if(localeID==NULL) {
1722
0
        localeID=uloc_getDefault();
1723
0
    }
1724
1725
5.10k
    return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
1726
5.10k
}
1727
1728
U_CAPI int32_t U_EXPORT2
1729
uloc_getScript(const char*    localeID,
1730
         char* script,
1731
         int32_t scriptCapacity,
1732
         UErrorCode* err)
1733
7.98k
{
1734
7.98k
    if(err==NULL || U_FAILURE(*err)) {
1735
0
        return 0;
1736
0
    }
1737
1738
7.98k
    if(localeID==NULL) {
1739
0
        localeID=uloc_getDefault();
1740
0
    }
1741
1742
    /* skip the language */
1743
7.98k
    ulocimp_getLanguage(localeID, &localeID, *err);
1744
7.98k
    if (U_FAILURE(*err)) {
1745
0
        return 0;
1746
0
    }
1747
1748
7.98k
    if(_isIDSeparator(*localeID)) {
1749
6.14k
        return ulocimp_getScript(localeID+1, NULL, *err).extract(script, scriptCapacity, *err);
1750
6.14k
    }
1751
1.84k
    return u_terminateChars(script, scriptCapacity, 0, err);
1752
7.98k
}
1753
1754
U_CAPI int32_t  U_EXPORT2
1755
uloc_getCountry(const char* localeID,
1756
            char* country,
1757
            int32_t countryCapacity,
1758
            UErrorCode* err)
1759
8.77k
{
1760
8.77k
    if(err==NULL || U_FAILURE(*err)) {
1761
0
        return 0;
1762
0
    }
1763
1764
8.77k
    if(localeID==NULL) {
1765
0
        localeID=uloc_getDefault();
1766
0
    }
1767
1768
    /* Skip the language */
1769
8.77k
    ulocimp_getLanguage(localeID, &localeID, *err);
1770
8.77k
    if (U_FAILURE(*err)) {
1771
0
        return 0;
1772
0
    }
1773
1774
8.77k
    if(_isIDSeparator(*localeID)) {
1775
6.63k
        const char *scriptID;
1776
        /* Skip the script if available */
1777
6.63k
        ulocimp_getScript(localeID+1, &scriptID, *err);
1778
6.63k
        if (U_FAILURE(*err)) {
1779
0
            return 0;
1780
0
        }
1781
6.63k
        if(scriptID != localeID+1) {
1782
            /* Found optional script */
1783
2.32k
            localeID = scriptID;
1784
2.32k
        }
1785
6.63k
        if(_isIDSeparator(*localeID)) {
1786
6.31k
            return ulocimp_getCountry(localeID+1, NULL, *err).extract(country, countryCapacity, *err);
1787
6.31k
        }
1788
6.63k
    }
1789
2.46k
    return u_terminateChars(country, countryCapacity, 0, err);
1790
8.77k
}
1791
1792
U_CAPI int32_t  U_EXPORT2
1793
uloc_getVariant(const char* localeID,
1794
                char* variant,
1795
                int32_t variantCapacity,
1796
                UErrorCode* err)
1797
19.8k
{
1798
19.8k
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1799
19.8k
    const char* tmpLocaleID;
1800
19.8k
    int32_t i=0;
1801
1802
19.8k
    if(err==NULL || U_FAILURE(*err)) {
1803
0
        return 0;
1804
0
    }
1805
1806
19.8k
    if (_hasBCP47Extension(localeID)) {
1807
0
        tmpLocaleID =_ConvertBCP47(localeID, tempBuffer, sizeof(tempBuffer), err, nullptr);
1808
19.8k
    } else {
1809
19.8k
        if (localeID==NULL) {
1810
0
           localeID=uloc_getDefault();
1811
0
        }
1812
19.8k
        tmpLocaleID=localeID;
1813
19.8k
    }
1814
1815
    /* Skip the language */
1816
19.8k
    ulocimp_getLanguage(tmpLocaleID, &tmpLocaleID, *err);
1817
19.8k
    if (U_FAILURE(*err)) {
1818
0
        return 0;
1819
0
    }
1820
1821
19.8k
    if(_isIDSeparator(*tmpLocaleID)) {
1822
13.6k
        const char *scriptID;
1823
        /* Skip the script if available */
1824
13.6k
        ulocimp_getScript(tmpLocaleID+1, &scriptID, *err);
1825
13.6k
        if (U_FAILURE(*err)) {
1826
0
            return 0;
1827
0
        }
1828
13.6k
        if(scriptID != tmpLocaleID+1) {
1829
            /* Found optional script */
1830
7.39k
            tmpLocaleID = scriptID;
1831
7.39k
        }
1832
        /* Skip the Country */
1833
13.6k
        if (_isIDSeparator(*tmpLocaleID)) {
1834
7.10k
            const char *cntryID;
1835
7.10k
            ulocimp_getCountry(tmpLocaleID+1, &cntryID, *err);
1836
7.10k
            if (U_FAILURE(*err)) {
1837
0
                return 0;
1838
0
            }
1839
7.10k
            if (cntryID != tmpLocaleID+1) {
1840
                /* Found optional country */
1841
7.08k
                tmpLocaleID = cntryID;
1842
7.08k
            }
1843
7.10k
            if(_isIDSeparator(*tmpLocaleID)) {
1844
                /* If there was no country ID, skip a possible extra IDSeparator */
1845
75
                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
1846
14
                    tmpLocaleID++;
1847
14
                }
1848
1849
75
                CheckedArrayByteSink sink(variant, variantCapacity);
1850
75
                _getVariant(tmpLocaleID+1, *tmpLocaleID, sink, false);
1851
1852
75
                i = sink.NumberOfBytesAppended();
1853
1854
75
                if (U_FAILURE(*err)) {
1855
0
                    return i;
1856
0
                }
1857
1858
75
                if (sink.Overflowed()) {
1859
75
                    *err = U_BUFFER_OVERFLOW_ERROR;
1860
75
                    return i;
1861
75
                }
1862
75
            }
1863
7.10k
        }
1864
13.6k
    }
1865
1866
19.8k
    return u_terminateChars(variant, variantCapacity, i, err);
1867
19.8k
}
1868
1869
U_CAPI int32_t  U_EXPORT2
1870
uloc_getName(const char* localeID,
1871
             char* name,
1872
             int32_t nameCapacity,
1873
             UErrorCode* err)
1874
24.0k
{
1875
24.0k
    if (U_FAILURE(*err)) {
1876
0
        return 0;
1877
0
    }
1878
1879
24.0k
    CheckedArrayByteSink sink(name, nameCapacity);
1880
24.0k
    ulocimp_getName(localeID, sink, err);
1881
1882
24.0k
    int32_t reslen = sink.NumberOfBytesAppended();
1883
1884
24.0k
    if (U_FAILURE(*err)) {
1885
309
        return reslen;
1886
309
    }
1887
1888
23.7k
    if (sink.Overflowed()) {
1889
964
        *err = U_BUFFER_OVERFLOW_ERROR;
1890
22.8k
    } else {
1891
22.8k
        u_terminateChars(name, nameCapacity, reslen, err);
1892
22.8k
    }
1893
1894
23.7k
    return reslen;
1895
24.0k
}
1896
1897
U_CAPI void U_EXPORT2
1898
ulocimp_getName(const char* localeID,
1899
                ByteSink& sink,
1900
                UErrorCode* err)
1901
24.0k
{
1902
24.0k
    _canonicalize(localeID, sink, 0, err);
1903
24.0k
}
1904
1905
U_CAPI int32_t  U_EXPORT2
1906
uloc_getBaseName(const char* localeID,
1907
                 char* name,
1908
                 int32_t nameCapacity,
1909
                 UErrorCode* err)
1910
113k
{
1911
113k
    if (U_FAILURE(*err)) {
1912
0
        return 0;
1913
0
    }
1914
1915
113k
    CheckedArrayByteSink sink(name, nameCapacity);
1916
113k
    ulocimp_getBaseName(localeID, sink, err);
1917
1918
113k
    int32_t reslen = sink.NumberOfBytesAppended();
1919
1920
113k
    if (U_FAILURE(*err)) {
1921
0
        return reslen;
1922
0
    }
1923
1924
113k
    if (sink.Overflowed()) {
1925
0
        *err = U_BUFFER_OVERFLOW_ERROR;
1926
113k
    } else {
1927
113k
        u_terminateChars(name, nameCapacity, reslen, err);
1928
113k
    }
1929
1930
113k
    return reslen;
1931
113k
}
1932
1933
U_CAPI void U_EXPORT2
1934
ulocimp_getBaseName(const char* localeID,
1935
                    ByteSink& sink,
1936
                    UErrorCode* err)
1937
113k
{
1938
113k
    _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
1939
113k
}
1940
1941
U_CAPI int32_t  U_EXPORT2
1942
uloc_canonicalize(const char* localeID,
1943
                  char* name,
1944
                  int32_t nameCapacity,
1945
                  UErrorCode* err)
1946
9.97k
{
1947
9.97k
    if (U_FAILURE(*err)) {
1948
0
        return 0;
1949
0
    }
1950
1951
9.97k
    CheckedArrayByteSink sink(name, nameCapacity);
1952
9.97k
    ulocimp_canonicalize(localeID, sink, err);
1953
1954
9.97k
    int32_t reslen = sink.NumberOfBytesAppended();
1955
1956
9.97k
    if (U_FAILURE(*err)) {
1957
344
        return reslen;
1958
344
    }
1959
1960
9.63k
    if (sink.Overflowed()) {
1961
1.25k
        *err = U_BUFFER_OVERFLOW_ERROR;
1962
8.37k
    } else {
1963
8.37k
        u_terminateChars(name, nameCapacity, reslen, err);
1964
8.37k
    }
1965
1966
9.63k
    return reslen;
1967
9.97k
}
1968
1969
U_CAPI void U_EXPORT2
1970
ulocimp_canonicalize(const char* localeID,
1971
                     ByteSink& sink,
1972
                     UErrorCode* err)
1973
9.98k
{
1974
9.98k
    _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
1975
9.98k
}
1976
1977
U_CAPI const char*  U_EXPORT2
1978
uloc_getISO3Language(const char* localeID)
1979
0
{
1980
0
    int16_t offset;
1981
0
    char lang[ULOC_LANG_CAPACITY];
1982
0
    UErrorCode err = U_ZERO_ERROR;
1983
1984
0
    if (localeID == NULL)
1985
0
    {
1986
0
        localeID = uloc_getDefault();
1987
0
    }
1988
0
    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
1989
0
    if (U_FAILURE(err))
1990
0
        return "";
1991
0
    offset = _findIndex(LANGUAGES, lang);
1992
0
    if (offset < 0)
1993
0
        return "";
1994
0
    return LANGUAGES_3[offset];
1995
0
}
1996
1997
U_CAPI const char*  U_EXPORT2
1998
uloc_getISO3Country(const char* localeID)
1999
0
{
2000
0
    int16_t offset;
2001
0
    char cntry[ULOC_LANG_CAPACITY];
2002
0
    UErrorCode err = U_ZERO_ERROR;
2003
2004
0
    if (localeID == NULL)
2005
0
    {
2006
0
        localeID = uloc_getDefault();
2007
0
    }
2008
0
    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2009
0
    if (U_FAILURE(err))
2010
0
        return "";
2011
0
    offset = _findIndex(COUNTRIES, cntry);
2012
0
    if (offset < 0)
2013
0
        return "";
2014
2015
0
    return COUNTRIES_3[offset];
2016
0
}
2017
2018
U_CAPI uint32_t  U_EXPORT2
2019
uloc_getLCID(const char* localeID)
2020
0
{
2021
0
    UErrorCode status = U_ZERO_ERROR;
2022
0
    char       langID[ULOC_FULLNAME_CAPACITY];
2023
0
    uint32_t   lcid = 0;
2024
2025
    /* Check for incomplete id. */
2026
0
    if (!localeID || uprv_strlen(localeID) < 2) {
2027
0
        return 0;
2028
0
    }
2029
2030
    // First, attempt Windows platform lookup if available, but fall
2031
    // through to catch any special cases (ICU vs Windows name differences).
2032
0
    lcid = uprv_convertToLCIDPlatform(localeID, &status);
2033
0
    if (U_FAILURE(status)) {
2034
0
        return 0;
2035
0
    }
2036
0
    if (lcid > 0) {
2037
        // Windows found an LCID, return that
2038
0
        return lcid;
2039
0
    }
2040
2041
0
    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2042
0
    if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING) {
2043
0
        return 0;
2044
0
    }
2045
2046
0
    if (uprv_strchr(localeID, '@')) {
2047
        // uprv_convertToLCID does not support keywords other than collation.
2048
        // Remove all keywords except collation.
2049
0
        int32_t len;
2050
0
        char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2051
2052
0
        CharString collVal;
2053
0
        {
2054
0
            CharStringByteSink sink(&collVal);
2055
0
            ulocimp_getKeywordValue(localeID, "collation", sink, &status);
2056
0
        }
2057
2058
0
        if (U_SUCCESS(status) && !collVal.isEmpty()) {
2059
0
            len = uloc_getBaseName(localeID, tmpLocaleID,
2060
0
                UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2061
2062
0
            if (U_SUCCESS(status) && len > 0) {
2063
0
                tmpLocaleID[len] = 0;
2064
2065
0
                len = uloc_setKeywordValue("collation", collVal.data(), tmpLocaleID,
2066
0
                    UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2067
2068
0
                if (U_SUCCESS(status) && len > 0) {
2069
0
                    tmpLocaleID[len] = 0;
2070
0
                    return uprv_convertToLCID(langID, tmpLocaleID, &status);
2071
0
                }
2072
0
            }
2073
0
        }
2074
2075
        // fall through - all keywords are simply ignored
2076
0
        status = U_ZERO_ERROR;
2077
0
    }
2078
2079
0
    return uprv_convertToLCID(langID, localeID, &status);
2080
0
}
2081
2082
U_CAPI int32_t U_EXPORT2
2083
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2084
                UErrorCode *status)
2085
0
{
2086
0
    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2087
0
}
2088
2089
/* ### Default locale **************************************************/
2090
2091
U_CAPI const char*  U_EXPORT2
2092
uloc_getDefault()
2093
116k
{
2094
116k
    return locale_get_default();
2095
116k
}
2096
2097
U_CAPI void  U_EXPORT2
2098
uloc_setDefault(const char*   newDefaultLocale,
2099
             UErrorCode* err)
2100
0
{
2101
0
    if (U_FAILURE(*err))
2102
0
        return;
2103
    /* the error code isn't currently used for anything by this function*/
2104
2105
    /* propagate change to C++ */
2106
0
    locale_set_default(newDefaultLocale);
2107
0
}
2108
2109
/**
2110
 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2111
 * to an array of pointers to arrays of char.  All of these pointers are owned
2112
 * by ICU-- do not delete them, and do not write through them.  The array is
2113
 * terminated with a null pointer.
2114
 */
2115
U_CAPI const char* const*  U_EXPORT2
2116
uloc_getISOLanguages()
2117
0
{
2118
0
    return LANGUAGES;
2119
0
}
2120
2121
/**
2122
 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2123
 * pointer to an array of pointers to arrays of char.  All of these pointers are
2124
 * owned by ICU-- do not delete them, and do not write through them.  The array is
2125
 * terminated with a null pointer.
2126
 */
2127
U_CAPI const char* const*  U_EXPORT2
2128
uloc_getISOCountries()
2129
0
{
2130
0
    return COUNTRIES;
2131
0
}
2132
2133
U_CAPI const char* U_EXPORT2
2134
uloc_toUnicodeLocaleKey(const char* keyword)
2135
0
{
2136
0
    const char* bcpKey = ulocimp_toBcpKey(keyword);
2137
0
    if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2138
        // unknown keyword, but syntax is fine..
2139
0
        return keyword;
2140
0
    }
2141
0
    return bcpKey;
2142
0
}
2143
2144
U_CAPI const char* U_EXPORT2
2145
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2146
0
{
2147
0
    const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2148
0
    if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2149
        // unknown keyword, but syntax is fine..
2150
0
        return value;
2151
0
    }
2152
0
    return bcpType;
2153
0
}
2154
2155
static UBool
2156
isWellFormedLegacyKey(const char* legacyKey)
2157
23.2M
{
2158
23.2M
    const char* p = legacyKey;
2159
69.8M
    while (*p) {
2160
46.5M
        if (!UPRV_ISALPHANUM(*p)) {
2161
0
            return false;
2162
0
        }
2163
46.5M
        p++;
2164
46.5M
    }
2165
23.2M
    return true;
2166
23.2M
}
2167
2168
static UBool
2169
isWellFormedLegacyType(const char* legacyType)
2170
88.4k
{
2171
88.4k
    const char* p = legacyType;
2172
88.4k
    int32_t alphaNumLen = 0;
2173
395k
    while (*p) {
2174
307k
        if (*p == '_' || *p == '/' || *p == '-') {
2175
19.0k
            if (alphaNumLen == 0) {
2176
0
                return false;
2177
0
            }
2178
19.0k
            alphaNumLen = 0;
2179
288k
        } else if (UPRV_ISALPHANUM(*p)) {
2180
288k
            alphaNumLen++;
2181
288k
        } else {
2182
0
            return false;
2183
0
        }
2184
307k
        p++;
2185
307k
    }
2186
88.4k
    return (alphaNumLen != 0);
2187
88.4k
}
2188
2189
U_CAPI const char* U_EXPORT2
2190
uloc_toLegacyKey(const char* keyword)
2191
23.3M
{
2192
23.3M
    const char* legacyKey = ulocimp_toLegacyKey(keyword);
2193
23.3M
    if (legacyKey == NULL) {
2194
        // Checks if the specified locale key is well-formed with the legacy locale syntax.
2195
        //
2196
        // Note:
2197
        //  LDML/CLDR provides some definition of keyword syntax in
2198
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2199
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2200
        //  Keys can only consist of [0-9a-zA-Z].
2201
23.2M
        if (isWellFormedLegacyKey(keyword)) {
2202
23.2M
            return keyword;
2203
23.2M
        }
2204
23.2M
    }
2205
66.5k
    return legacyKey;
2206
23.3M
}
2207
2208
U_CAPI const char* U_EXPORT2
2209
uloc_toLegacyType(const char* keyword, const char* value)
2210
95.5k
{
2211
95.5k
    const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2212
95.5k
    if (legacyType == NULL) {
2213
        // Checks if the specified locale type is well-formed with the legacy locale syntax.
2214
        //
2215
        // Note:
2216
        //  LDML/CLDR provides some definition of keyword syntax in
2217
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2218
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2219
        //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2220
        //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2221
88.4k
        if (isWellFormedLegacyType(value)) {
2222
88.4k
            return value;
2223
88.4k
        }
2224
88.4k
    }
2225
7.14k
    return legacyType;
2226
95.5k
}
2227
2228
/*eof*/