Coverage Report

Created: 2025-06-13 06:34

/src/icu/icu4c/source/common/uloc.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 1997-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*
9
* File ULOC.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   04/01/97    aliu        Creation.
15
*   08/21/98    stephen     JDK 1.2 sync
16
*   12/08/98    rtg         New Locale implementation and C API
17
*   03/15/99    damiba      overhaul.
18
*   04/06/99    stephen     changed setDefault() to realloc and copy
19
*   06/14/99    stephen     Changed calls to ures_open for new params
20
*   07/21/99    stephen     Modified setDefault() to propagate to C++
21
*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
22
*                           brought canonicalization code into line with spec
23
*****************************************************************************/
24
25
/*
26
   POSIX's locale format, from putil.c: [no spaces]
27
28
     ll [ _CC ] [ . MM ] [ @ VV]
29
30
     l = lang, C = ctry, M = charmap, V = variant
31
*/
32
33
#include <algorithm>
34
#include <optional>
35
#include <string_view>
36
37
#include "unicode/bytestream.h"
38
#include "unicode/errorcode.h"
39
#include "unicode/stringpiece.h"
40
#include "unicode/utypes.h"
41
#include "unicode/ustring.h"
42
#include "unicode/uloc.h"
43
44
#include "bytesinkutil.h"
45
#include "putilimp.h"
46
#include "ustr_imp.h"
47
#include "ulocimp.h"
48
#include "umutex.h"
49
#include "cstring.h"
50
#include "cmemory.h"
51
#include "locmap.h"
52
#include "uarrsort.h"
53
#include "uenumimp.h"
54
#include "uassert.h"
55
#include "charstr.h"
56
57
U_NAMESPACE_USE
58
59
/* ### Declarations **************************************************/
60
61
/* Locale stuff from locid.cpp */
62
U_CFUNC void locale_set_default(const char *id);
63
U_CFUNC const char *locale_get_default();
64
65
namespace {
66
67
/* ### Data tables **************************************************/
68
69
/**
70
 * Table of language codes, both 2- and 3-letter, with preference
71
 * given to 2-letter codes where possible.  Includes 3-letter codes
72
 * that lack a 2-letter equivalent.
73
 *
74
 * This list must be in sorted order.  This list is returned directly
75
 * to the user by some API.
76
 *
77
 * This list must be kept in sync with LANGUAGES_3, with corresponding
78
 * entries matched.
79
 *
80
 * This table should be terminated with a nullptr entry, followed by a
81
 * second list, and another nullptr entry.  The first list is visible to
82
 * user code when this array is returned by API.  The second list
83
 * contains codes we support, but do not expose through user API.
84
 *
85
 * Notes
86
 *
87
 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
88
 * include the revisions up to 2001/7/27 *CWB*
89
 *
90
 * The 3 character codes are the terminology codes like RFC 3066.  This
91
 * is compatible with prior ICU codes
92
 *
93
 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
94
 * table but now at the end of the table because 3 character codes are
95
 * duplicates.  This avoids bad searches going from 3 to 2 character
96
 * codes.
97
 *
98
 * The range qaa-qtz is reserved for local use
99
 */
100
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
101
/* ISO639 table version is 20150505 */
102
/* Subsequent hand addition of selected languages */
103
constexpr const char* LANGUAGES[] = {
104
    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
105
    "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
106
    "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
107
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
108
    "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",
109
    "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
110
    "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
111
    "bgc", "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",
112
    "blo", "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",
113
    "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",
114
    "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",
115
    "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",
116
    "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",
117
    "cs",  "csb", "csw", "cu",  "cv",  "cy",
118
    "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",
119
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
120
    "dyo", "dyu", "dz",  "dzg",
121
    "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",
122
    "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",
123
    "ext",
124
    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",
125
    "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",
126
    "frs", "fur", "fy",
127
    "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
128
    "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",
129
    "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",
130
    "gur", "guz", "gv",  "gwi",
131
    "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",
132
    "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",
133
    "hup", "hy",  "hz",
134
    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",
135
    "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",
136
    "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
137
    "jv",
138
    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
139
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",
140
    "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",
141
    "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",
142
    "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",
143
    "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",
144
    "kv",  "kw",  "kxv", "ky",
145
    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",
146
    "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",
147
    "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",
148
    "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",
149
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
150
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",
151
    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
152
    "ml",  "mn",  "mnc", "mni",
153
    "moh", "mos", "mr",  "mrj",
154
    "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",
155
    "my",  "mye", "myv", "mzn",
156
    "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",
157
    "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",
158
    "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",
159
    "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",
160
    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",
161
    "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
162
    "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",
163
    "pon", "prg", "pro", "ps",  "pt",
164
    "qu",  "quc", "qug",
165
    "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",
166
    "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",
167
    "rw",  "rwk",
168
    "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",
169
    "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",
170
    "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",
171
    "sgs", "shi", "shn", "shu", "si",  "sid", "sk",
172
    "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",
173
    "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",
174
    "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",
175
    "sv",  "sw",  "swb", "syc", "syr", "szl",
176
    "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",
177
    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr",
178
    "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tok", "tpi",
179
    "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",
180
    "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",
181
    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
182
    "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vmw",
183
    "vo", "vot", "vro", "vun",
184
    "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",
185
    "xal", "xh",  "xmf", "xnr", "xog",
186
    "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",
187
    "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",
188
    "zun", "zxx", "zza",
189
nullptr,
190
    "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  /* obsolete language codes */
191
nullptr
192
};
193
194
constexpr const char* DEPRECATED_LANGUAGES[]={
195
    "in", "iw", "ji", "jw", "mo", nullptr, nullptr
196
};
197
constexpr const char* REPLACEMENT_LANGUAGES[]={
198
    "id", "he", "yi", "jv", "ro", nullptr, nullptr
199
};
200
201
/**
202
 * Table of 3-letter language codes.
203
 *
204
 * This is a lookup table used to convert 3-letter language codes to
205
 * their 2-letter equivalent, where possible.  It must be kept in sync
206
 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
207
 * same language as LANGUAGES_3[i].  The commented-out lines are
208
 * copied from LANGUAGES to make eyeballing this baby easier.
209
 *
210
 * Where a 3-letter language code has no 2-letter equivalent, the
211
 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
212
 *
213
 * This table should be terminated with a nullptr entry, followed by a
214
 * second list, and another nullptr entry.  The two lists correspond to
215
 * the two lists in LANGUAGES.
216
 */
217
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
218
/* ISO639 table version is 20150505 */
219
/* Subsequent hand addition of selected languages */
220
constexpr const char* LANGUAGES_3[] = {
221
    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
222
    "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
223
    "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
224
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
225
    "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
226
    "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
227
    "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
228
    "bgc", "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
229
    "blo", "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
230
    "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
231
    "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
232
    "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
233
    "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
234
    "ces", "csb", "csw", "chu", "chv", "cym",
235
    "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
236
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
237
    "dyo", "dyu", "dzo", "dzg",
238
    "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
239
    "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
240
    "ext",
241
    "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
242
    "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
243
    "frs", "fur", "fry",
244
    "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
245
    "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
246
    "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
247
    "gur", "guz", "glv", "gwi",
248
    "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
249
    "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
250
    "hup", "hye", "her",
251
    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
252
    "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
253
    "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
254
    "jav",
255
    "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
256
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
257
    "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
258
    "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
259
    "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
260
    "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
261
    "kom", "cor", "kxv", "kir",
262
    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
263
    "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
264
    "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
265
    "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
266
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
267
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
268
    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
269
    "mal", "mon", "mnc", "mni",
270
    "moh", "mos", "mar", "mrj",
271
    "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
272
    "mya", "mye", "myv", "mzn",
273
    "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
274
    "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
275
    "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
276
    "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
277
    "oci", "oji", "orm", "ori", "oss", "osa", "ota",
278
    "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
279
    "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
280
    "pon", "prg", "pro", "pus", "por",
281
    "que", "quc", "qug",
282
    "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
283
    "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
284
    "kin", "rwk",
285
    "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
286
    "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
287
    "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
288
    "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
289
    "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
290
    "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
291
    "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
292
    "swe", "swa", "swb", "syc", "syr", "szl",
293
    "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
294
    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
295
    "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tok", "tpi",
296
    "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
297
    "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
298
    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
299
    "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vmw",
300
    "vol", "vot", "vro", "vun",
301
    "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
302
    "xal", "xho", "xmf", "xnr", "xog",
303
    "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
304
    "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
305
    "zun", "zxx", "zza",
306
nullptr,
307
/*  "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  */
308
    "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
309
nullptr
310
};
311
312
/**
313
 * Table of 2-letter country codes.
314
 *
315
 * This list must be in sorted order.  This list is returned directly
316
 * to the user by some API.
317
 *
318
 * This list must be kept in sync with COUNTRIES_3, with corresponding
319
 * entries matched.
320
 *
321
 * This table should be terminated with a nullptr entry, followed by a
322
 * second list, and another nullptr entry.  The first list is visible to
323
 * user code when this array is returned by API.  The second list
324
 * contains codes we support, but do not expose through user API.
325
 *
326
 * Notes:
327
 *
328
 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
329
 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
330
 * new codes keeping the old ones for compatibility updated to include
331
 * 1999/12/03 revisions *CWB*
332
 *
333
 * RO(ROM) is now RO(ROU) according to
334
 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
335
 */
336
constexpr const char* COUNTRIES[] = {
337
    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
338
    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
339
    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
340
    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
341
    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
342
    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CQ",  "CR",
343
    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",
344
    "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",
345
    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
346
    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
347
    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
348
    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
349
    "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
350
    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
351
    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
352
    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
353
    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
354
    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
355
    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
356
    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
357
    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
358
    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
359
    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
360
    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
361
    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
362
    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
363
    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
364
    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
365
    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
366
    "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
367
nullptr,
368
    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
369
nullptr
370
};
371
372
constexpr const char* DEPRECATED_COUNTRIES[] = {
373
    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", nullptr, nullptr /* deprecated country list */
374
};
375
constexpr const char* REPLACEMENT_COUNTRIES[] = {
376
/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
377
    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", nullptr, nullptr  /* replacement country codes */
378
};
379
380
/**
381
 * Table of 3-letter country codes.
382
 *
383
 * This is a lookup table used to convert 3-letter country codes to
384
 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
385
 * For all valid i, COUNTRIES[i] must refer to the same country as
386
 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
387
 * to make eyeballing this baby easier.
388
 *
389
 * This table should be terminated with a nullptr entry, followed by a
390
 * second list, and another nullptr entry.  The two lists correspond to
391
 * the two lists in COUNTRIES.
392
 */
393
constexpr const char* COUNTRIES_3[] = {
394
/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
395
    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
396
/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
397
    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
398
/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
399
    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
400
/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
401
    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
402
/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
403
    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
404
/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CQ",  "CR",     */
405
    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRQ", "CRI",
406
/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",     */
407
    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
408
/*  "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",     */
409
    "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
410
/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
411
    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
412
/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
413
    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
414
/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
415
    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
416
/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
417
    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
418
/*  "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
419
    "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
420
/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
421
    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
422
/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
423
    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
424
/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
425
    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
426
/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
427
    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
428
/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
429
    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
430
/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
431
    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
432
/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
433
    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
434
/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
435
    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
436
/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
437
    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
438
/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
439
    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
440
/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
441
    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
442
/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
443
    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
444
/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
445
    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
446
/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
447
    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
448
/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
449
    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
450
/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
451
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
452
/*  "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
453
    "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
454
nullptr,
455
/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
456
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
457
nullptr
458
};
459
460
typedef struct CanonicalizationMap {
461
    const char *id;          /* input ID */
462
    const char *canonicalID; /* canonicalized output ID */
463
} CanonicalizationMap;
464
465
/**
466
 * A map to canonicalize locale IDs.  This handles a variety of
467
 * different semantic kinds of transformations.
468
 */
469
constexpr CanonicalizationMap CANONICALIZE_MAP[] = {
470
    { "art__LOJBAN",    "jbo" }, /* registered name */
471
    { "hy__AREVELA",    "hy" }, /* Registered IANA variant */
472
    { "hy__AREVMDA",    "hyw" }, /* Registered IANA variant */
473
    { "zh__GUOYU",      "zh" }, /* registered name */
474
    { "zh__HAKKA",      "hak" }, /* registered name */
475
    { "zh__XIANG",      "hsn" }, /* registered name */
476
    // subtags with 3 chars won't be treated as variants.
477
    { "zh_GAN",         "gan" }, /* registered name */
478
    { "zh_MIN_NAN",     "nan" }, /* registered name */
479
    { "zh_WUU",         "wuu" }, /* registered name */
480
    { "zh_YUE",         "yue" }, /* registered name */
481
};
482
483
/* ### BCP47 Conversion *******************************************/
484
/* Gets the size of the shortest subtag in the given localeID. */
485
2.85k
int32_t getShortestSubtagLength(std::string_view localeID) {
486
2.85k
    int32_t localeIDLength = static_cast<int32_t>(localeID.length());
487
2.85k
    int32_t length = localeIDLength;
488
2.85k
    int32_t tmpLength = 0;
489
2.85k
    int32_t i;
490
2.85k
    bool reset = true;
491
492
17.2k
    for (i = 0; i < localeIDLength; i++) {
493
14.4k
        if (localeID[i] != '_' && localeID[i] != '-') {
494
12.0k
            if (reset) {
495
5.25k
                tmpLength = 0;
496
5.25k
                reset = false;
497
5.25k
            }
498
12.0k
            tmpLength++;
499
12.0k
        } else {
500
2.40k
            if (tmpLength != 0 && tmpLength < length) {
501
2.11k
                length = tmpLength;
502
2.11k
            }
503
2.40k
            reset = true;
504
2.40k
        }
505
14.4k
    }
506
507
2.85k
    return length;
508
2.85k
}
509
/* Test if the locale id has BCP47 u extension and does not have '@' */
510
2.85k
inline bool _hasBCP47Extension(std::string_view id) {
511
2.85k
    return id.find('@') == std::string_view::npos && getShortestSubtagLength(id) == 1;
512
2.85k
}
513
514
/* ### Keywords **************************************************/
515
0
inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
516
2.94k
inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
517
/* Punctuation/symbols allowed in legacy key values */
518
0
inline bool UPRV_OK_VALUE_PUNCTUATION(char c) { return c == '_' || c == '-' || c == '+' || c == '/'; }
519
520
}  // namespace
521
522
0
#define ULOC_KEYWORD_BUFFER_LEN 25
523
0
#define ULOC_MAX_NO_KEYWORDS 25
524
525
U_CAPI const char * U_EXPORT2
526
3.57k
locale_getKeywordsStart(std::string_view localeID) {
527
3.57k
    if (size_t pos = localeID.find('@'); pos != std::string_view::npos) {
528
0
        return localeID.data() + pos;
529
0
    }
530
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
531
    else {
532
        /* We do this because the @ sign is variant, and the @ sign used on one
533
        EBCDIC machine won't be compiled the same way on other EBCDIC based
534
        machines. */
535
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
536
        const uint8_t *charToFind = ebcdicSigns;
537
        while(*charToFind) {
538
            if (size_t pos = localeID.find(*charToFind); pos != std::string_view::npos) {
539
                return localeID.data() + pos;
540
            }
541
            charToFind++;
542
        }
543
    }
544
#endif
545
3.57k
    return nullptr;
546
3.57k
}
547
548
namespace {
549
550
/**
551
 * @param keywordName incoming name to be canonicalized
552
 * @param status return status (keyword too long)
553
 * @return the keyword name
554
 */
555
CharString locale_canonKeywordName(std::string_view keywordName, UErrorCode& status)
556
420
{
557
420
  if (U_FAILURE(status)) { return {}; }
558
420
  CharString result;
559
560
2.94k
  for (char c : keywordName) {
561
2.94k
    if (!UPRV_ISALPHANUM(c)) {
562
0
      status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
563
0
      return {};
564
0
    }
565
2.94k
    result.append(uprv_tolower(c), status);
566
2.94k
  }
567
420
  if (result.isEmpty()) {
568
0
    status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
569
0
    return {};
570
0
  }
571
572
420
  return result;
573
420
}
574
575
typedef struct {
576
    char keyword[ULOC_KEYWORD_BUFFER_LEN];
577
    int32_t keywordLen;
578
    const char *valueStart;
579
    int32_t valueLen;
580
} KeywordStruct;
581
582
int32_t U_CALLCONV
583
0
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
584
0
    const char* leftString = static_cast<const KeywordStruct*>(left)->keyword;
585
0
    const char* rightString = static_cast<const KeywordStruct*>(right)->keyword;
586
0
    return uprv_strcmp(leftString, rightString);
587
0
}
588
589
}  // namespace
590
591
U_EXPORT CharString
592
ulocimp_getKeywords(std::string_view localeID,
593
                    char prev,
594
                    bool valuesToo,
595
                    UErrorCode& status)
596
0
{
597
0
    return ByteSinkUtil::viaByteSinkToCharString(
598
0
        [&](ByteSink& sink, UErrorCode& status) {
599
0
            ulocimp_getKeywords(localeID,
600
0
                                prev,
601
0
                                sink,
602
0
                                valuesToo,
603
0
                                status);
604
0
        },
605
0
        status);
606
0
}
607
608
U_EXPORT void
609
ulocimp_getKeywords(std::string_view localeID,
610
                    char prev,
611
                    ByteSink& sink,
612
                    bool valuesToo,
613
                    UErrorCode& status)
614
0
{
615
0
    if (U_FAILURE(status)) { return; }
616
617
0
    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
618
619
0
    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
620
0
    int32_t numKeywords = 0;
621
0
    size_t equalSign = std::string_view::npos;
622
0
    size_t semicolon = std::string_view::npos;
623
0
    int32_t i = 0, j, n;
624
625
0
    if(prev == '@') { /* start of keyword definition */
626
        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
627
0
        do {
628
0
            bool duplicate = false;
629
            /* skip leading spaces */
630
0
            while (localeID.front() == ' ') {
631
0
                localeID.remove_prefix(1);
632
0
            }
633
0
            if (localeID.empty()) { /* handle trailing "; " */
634
0
                break;
635
0
            }
636
0
            if(numKeywords == maxKeywords) {
637
0
                status = U_INTERNAL_PROGRAM_ERROR;
638
0
                return;
639
0
            }
640
0
            equalSign = localeID.find('=');
641
0
            semicolon = localeID.find(';');
642
            /* lack of '=' [foo@currency] is illegal */
643
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
644
0
            if (equalSign == std::string_view::npos ||
645
0
                (semicolon != std::string_view::npos && semicolon < equalSign)) {
646
0
                status = U_INVALID_FORMAT_ERROR;
647
0
                return;
648
0
            }
649
            /* zero-length keyword is an error. */
650
0
            if (equalSign == 0) {
651
0
                status = U_INVALID_FORMAT_ERROR;
652
0
                return;
653
0
            }
654
            /* need to normalize both keyword and keyword name */
655
0
            if (equalSign >= ULOC_KEYWORD_BUFFER_LEN) {
656
                /* keyword name too long for internal buffer */
657
0
                status = U_INTERNAL_PROGRAM_ERROR;
658
0
                return;
659
0
            }
660
0
            for (i = 0, n = 0; static_cast<size_t>(i) < equalSign; ++i) {
661
0
                if (localeID[i] != ' ') {
662
0
                    keywordList[numKeywords].keyword[n++] = uprv_tolower(localeID[i]);
663
0
                }
664
0
            }
665
666
0
            keywordList[numKeywords].keyword[n] = 0;
667
0
            keywordList[numKeywords].keywordLen = n;
668
            /* now grab the value part. First we skip the '=' */
669
0
            equalSign++;
670
            /* then we leading spaces */
671
0
            while (equalSign < localeID.length() && localeID[equalSign] == ' ') {
672
0
                equalSign++;
673
0
            }
674
675
            /* Premature end or zero-length value */
676
0
            if (equalSign == localeID.length() || equalSign == semicolon) {
677
0
                status = U_INVALID_FORMAT_ERROR;
678
0
                return;
679
0
            }
680
681
0
            keywordList[numKeywords].valueStart = localeID.data() + equalSign;
682
683
0
            std::string_view value = localeID;
684
0
            if (semicolon != std::string_view::npos) {
685
0
                value.remove_suffix(value.length() - semicolon);
686
0
                localeID.remove_prefix(semicolon + 1);
687
0
            } else {
688
0
                localeID = {};
689
0
            }
690
0
            value.remove_prefix(equalSign);
691
0
            if (size_t last = value.find_last_not_of(' '); last != std::string_view::npos) {
692
0
                value.remove_suffix(value.length() - last - 1);
693
0
            }
694
0
            keywordList[numKeywords].valueLen = static_cast<int32_t>(value.length());
695
696
            /* If this is a duplicate keyword, then ignore it */
697
0
            for (j=0; j<numKeywords; ++j) {
698
0
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
699
0
                    duplicate = true;
700
0
                    break;
701
0
                }
702
0
            }
703
0
            if (!duplicate) {
704
0
                ++numKeywords;
705
0
            }
706
0
        } while (!localeID.empty());
707
708
        /* now we have a list of keywords */
709
        /* we need to sort it */
710
0
        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, nullptr, false, &status);
711
712
        /* Now construct the keyword part */
713
0
        for(i = 0; i < numKeywords; i++) {
714
0
            sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
715
0
            if(valuesToo) {
716
0
                sink.Append("=", 1);
717
0
                sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
718
0
                if(i < numKeywords - 1) {
719
0
                    sink.Append(";", 1);
720
0
                }
721
0
            } else {
722
0
                sink.Append("\0", 1);
723
0
            }
724
0
        }
725
0
    }
726
0
}
727
728
U_CAPI int32_t U_EXPORT2
729
uloc_getKeywordValue(const char* localeID,
730
                     const char* keywordName,
731
                     char* buffer, int32_t bufferCapacity,
732
                     UErrorCode* status)
733
420
{
734
420
    if (U_FAILURE(*status)) { return 0; }
735
420
    if (keywordName == nullptr || *keywordName == '\0') {
736
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
737
0
        return 0;
738
0
    }
739
420
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
740
420
        buffer, bufferCapacity,
741
420
        [&](ByteSink& sink, UErrorCode& status) {
742
420
            ulocimp_getKeywordValue(localeID, keywordName, sink, status);
743
420
        },
744
420
        *status);
745
420
}
746
747
U_EXPORT CharString
748
ulocimp_getKeywordValue(const char* localeID,
749
                        std::string_view keywordName,
750
                        UErrorCode& status)
751
0
{
752
0
    return ByteSinkUtil::viaByteSinkToCharString(
753
0
        [&](ByteSink& sink, UErrorCode& status) {
754
0
            ulocimp_getKeywordValue(localeID, keywordName, sink, status);
755
0
        },
756
0
        status);
757
0
}
758
759
U_EXPORT void
760
ulocimp_getKeywordValue(const char* localeID,
761
                        std::string_view keywordName,
762
                        icu::ByteSink& sink,
763
                        UErrorCode& status)
764
420
{
765
420
    if (U_FAILURE(status)) { return; }
766
767
420
    if (localeID == nullptr || keywordName.empty()) {
768
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
769
0
        return;
770
0
    }
771
772
420
    const char* startSearchHere = nullptr;
773
420
    const char* nextSeparator = nullptr;
774
775
420
    CharString tempBuffer;
776
420
    const char* tmpLocaleID;
777
778
420
    CharString canonKeywordName = locale_canonKeywordName(keywordName, status);
779
420
    if (U_FAILURE(status)) {
780
0
      return;
781
0
    }
782
783
420
    if (localeID != nullptr && _hasBCP47Extension(localeID)) {
784
0
        tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, status);
785
0
        tmpLocaleID = U_SUCCESS(status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
786
420
    } else {
787
420
        tmpLocaleID=localeID;
788
420
    }
789
790
420
    startSearchHere = locale_getKeywordsStart(tmpLocaleID);
791
420
    if(startSearchHere == nullptr) {
792
        /* no keywords, return at once */
793
420
        return;
794
420
    }
795
796
    /* find the first keyword */
797
0
    while(startSearchHere) {
798
0
        const char* keyValueTail;
799
800
0
        startSearchHere++; /* skip @ or ; */
801
0
        nextSeparator = uprv_strchr(startSearchHere, '=');
802
0
        if(!nextSeparator) {
803
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
804
0
            return;
805
0
        }
806
        /* strip leading & trailing spaces (TC decided to tolerate these) */
807
0
        while(*startSearchHere == ' ') {
808
0
            startSearchHere++;
809
0
        }
810
0
        keyValueTail = nextSeparator;
811
0
        while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
812
0
            keyValueTail--;
813
0
        }
814
        /* now keyValueTail points to first char after the keyName */
815
        /* copy & normalize keyName from locale */
816
0
        if (startSearchHere == keyValueTail) {
817
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
818
0
            return;
819
0
        }
820
0
        CharString localeKeywordName;
821
0
        while (startSearchHere < keyValueTail) {
822
0
          if (!UPRV_ISALPHANUM(*startSearchHere)) {
823
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
824
0
            return;
825
0
          }
826
0
          localeKeywordName.append(uprv_tolower(*startSearchHere++), status);
827
0
        }
828
0
        if (U_FAILURE(status)) {
829
0
            return;
830
0
        }
831
832
0
        startSearchHere = uprv_strchr(nextSeparator, ';');
833
834
0
        if (canonKeywordName == localeKeywordName) {
835
             /* current entry matches the keyword. */
836
0
           nextSeparator++; /* skip '=' */
837
            /* First strip leading & trailing spaces (TC decided to tolerate these) */
838
0
            while(*nextSeparator == ' ') {
839
0
              nextSeparator++;
840
0
            }
841
0
            keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
842
0
            while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
843
0
              keyValueTail--;
844
0
            }
845
            /* Now copy the value, but check well-formedness */
846
0
            if (nextSeparator == keyValueTail) {
847
0
              status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
848
0
              return;
849
0
            }
850
0
            while (nextSeparator < keyValueTail) {
851
0
              if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
852
0
                status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
853
0
                return;
854
0
              }
855
              /* Should we lowercase value to return here? Tests expect as-is. */
856
0
              sink.Append(nextSeparator++, 1);
857
0
            }
858
0
            return;
859
0
        }
860
0
    }
861
0
}
862
863
U_CAPI int32_t U_EXPORT2
864
uloc_setKeywordValue(const char* keywordName,
865
                     const char* keywordValue,
866
                     char* buffer, int32_t bufferCapacity,
867
                     UErrorCode* status)
868
0
{
869
0
    if (U_FAILURE(*status)) { return 0; }
870
871
0
    if (keywordName == nullptr || *keywordName == 0) {
872
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
873
0
        return 0;
874
0
    }
875
876
0
    if (bufferCapacity <= 1) {
877
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
878
0
        return 0;
879
0
    }
880
881
0
    int32_t bufLen = (int32_t)uprv_strlen(buffer);
882
0
    if(bufferCapacity<bufLen) {
883
        /* The capacity is less than the length?! Is this NUL terminated? */
884
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
885
0
        return 0;
886
0
    }
887
888
0
    char* keywords = const_cast<char*>(
889
0
        locale_getKeywordsStart({buffer, static_cast<std::string_view::size_type>(bufLen)}));
890
0
    int32_t baseLen = keywords == nullptr ? bufLen : keywords - buffer;
891
    // Remove -1 from the capacity so that this function can guarantee NUL termination.
892
0
    CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords,
893
0
                              bufferCapacity - baseLen - 1);
894
0
    int32_t reslen = ulocimp_setKeywordValue(
895
0
        keywords == nullptr ? std::string_view() : keywords,
896
0
        keywordName,
897
0
        keywordValue == nullptr ? std::string_view() : keywordValue,
898
0
        sink,
899
0
        *status);
900
901
0
    if (U_FAILURE(*status)) {
902
0
        return *status == U_BUFFER_OVERFLOW_ERROR ? reslen + baseLen : 0;
903
0
    }
904
905
    // See the documentation for this function, it's guaranteed to never
906
    // overflow the buffer but instead abort with BUFFER_OVERFLOW_ERROR.
907
    // In this case, nothing has been written to the sink, so it cannot have Overflowed().
908
0
    U_ASSERT(!sink.Overflowed());
909
0
    U_ASSERT(reslen >= 0);
910
0
    return u_terminateChars(buffer, bufferCapacity, reslen + baseLen, status);
911
0
}
912
913
U_EXPORT void
914
ulocimp_setKeywordValue(std::string_view keywordName,
915
                        std::string_view keywordValue,
916
                        CharString& localeID,
917
                        UErrorCode& status)
918
0
{
919
0
    if (U_FAILURE(status)) { return; }
920
0
    std::string_view keywords;
921
0
    if (const char* start = locale_getKeywordsStart(localeID.toStringPiece()); start != nullptr) {
922
        // This is safe because CharString::truncate() doesn't actually erase any
923
        // data, but simply sets the position for where new data will be written.
924
0
        int32_t size = start - localeID.data();
925
0
        keywords = localeID.toStringPiece();
926
0
        keywords.remove_prefix(size);
927
0
        localeID.truncate(size);
928
0
    }
929
0
    CharStringByteSink sink(&localeID);
930
0
    ulocimp_setKeywordValue(keywords, keywordName, keywordValue, sink, status);
931
0
}
932
933
U_EXPORT int32_t
934
ulocimp_setKeywordValue(std::string_view keywords,
935
                        std::string_view keywordName,
936
                        std::string_view keywordValue,
937
                        ByteSink& sink,
938
                        UErrorCode& status)
939
0
{
940
0
    if (U_FAILURE(status)) { return 0; }
941
942
    /* TODO: sorting. removal. */
943
0
    int32_t needLen = 0;
944
0
    int32_t rc;
945
0
    CharString updatedKeysAndValues;
946
0
    bool handledInputKeyAndValue = false;
947
0
    char keyValuePrefix = '@';
948
949
0
    if (status == U_STRING_NOT_TERMINATED_WARNING) {
950
0
        status = U_ZERO_ERROR;
951
0
    }
952
0
    if (keywordName.empty()) {
953
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
954
0
        return 0;
955
0
    }
956
0
    CharString canonKeywordName = locale_canonKeywordName(keywordName, status);
957
0
    if (U_FAILURE(status)) {
958
0
        return 0;
959
0
    }
960
961
0
    CharString canonKeywordValue;
962
0
    for (char c : keywordValue) {
963
0
        if (!UPRV_ISALPHANUM(c) && !UPRV_OK_VALUE_PUNCTUATION(c)) {
964
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
965
0
            return 0;
966
0
        }
967
        /* Should we force lowercase in value to set? */
968
0
        canonKeywordValue.append(c, status);
969
0
    }
970
0
    if (U_FAILURE(status)) {
971
0
        return 0;
972
0
    }
973
974
0
    if (keywords.size() <= 1) {
975
0
        if (canonKeywordValue.isEmpty()) { /* no keywords = nothing to remove */
976
0
            U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
977
0
            return 0;
978
0
        }
979
980
0
        needLen = 1 + canonKeywordName.length() + 1 + canonKeywordValue.length();
981
0
        int32_t capacity = 0;
982
0
        char* buffer = sink.GetAppendBuffer(
983
0
                needLen, needLen, nullptr, needLen, &capacity);
984
0
        if (capacity < needLen || buffer == nullptr) {
985
0
            status = U_BUFFER_OVERFLOW_ERROR;
986
0
            return needLen; /* no change */
987
0
        }
988
0
        char* it = buffer;
989
990
0
        *it++ = '@';
991
0
        uprv_memcpy(it, canonKeywordName.data(), canonKeywordName.length());
992
0
        it += canonKeywordName.length();
993
0
        *it++ = '=';
994
0
        uprv_memcpy(it, canonKeywordValue.data(), canonKeywordValue.length());
995
0
        sink.Append(buffer, needLen);
996
0
        U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
997
0
        return needLen;
998
0
    } /* end shortcut - no @ */
999
1000
    /* search for keyword */
1001
0
    for (size_t keywordStart = 0; keywordStart != std::string_view::npos;) {
1002
0
        keywordStart++; /* skip @ or ; */
1003
0
        size_t nextEqualsign = keywords.find('=', keywordStart);
1004
0
        if (nextEqualsign == std::string_view::npos) {
1005
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
1006
0
            return 0;
1007
0
        }
1008
        /* strip leading & trailing spaces (TC decided to tolerate these) */
1009
0
        while (keywordStart < keywords.size() && keywords[keywordStart] == ' ') {
1010
0
            keywordStart++;
1011
0
        }
1012
0
        size_t keyValueTail = nextEqualsign;
1013
0
        while (keyValueTail > keywordStart && keywords[keyValueTail - 1] == ' ') {
1014
0
            keyValueTail--;
1015
0
        }
1016
        /* now keyValueTail points to first char after the keyName */
1017
        /* copy & normalize keyName from locale */
1018
0
        if (keywordStart == keyValueTail) {
1019
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
1020
0
            return 0;
1021
0
        }
1022
0
        CharString localeKeywordName;
1023
0
        while (keywordStart < keyValueTail) {
1024
0
            if (!UPRV_ISALPHANUM(keywords[keywordStart])) {
1025
0
                status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1026
0
                return 0;
1027
0
            }
1028
0
            localeKeywordName.append(uprv_tolower(keywords[keywordStart++]), status);
1029
0
        }
1030
0
        if (U_FAILURE(status)) {
1031
0
            return 0;
1032
0
        }
1033
1034
0
        size_t nextSeparator = keywords.find(';', nextEqualsign);
1035
1036
        /* start processing the value part */
1037
0
        nextEqualsign++; /* skip '=' */
1038
        /* First strip leading & trailing spaces (TC decided to tolerate these) */
1039
0
        while (nextEqualsign < keywords.size() && keywords[nextEqualsign] == ' ') {
1040
0
            nextEqualsign++;
1041
0
        }
1042
0
        keyValueTail = nextSeparator == std::string_view::npos ? keywords.size() : nextSeparator;
1043
0
        while (keyValueTail > nextEqualsign && keywords[keyValueTail - 1] == ' ') {
1044
0
            keyValueTail--;
1045
0
        }
1046
0
        if (nextEqualsign == keyValueTail) {
1047
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1048
0
            return 0;
1049
0
        }
1050
1051
0
        rc = uprv_strcmp(canonKeywordName.data(), localeKeywordName.data());
1052
0
        if(rc == 0) {
1053
            /* Current entry matches the input keyword. Update the entry */
1054
0
            if (!canonKeywordValue.isEmpty()) { /* updating a value */
1055
0
                updatedKeysAndValues.append(keyValuePrefix, status);
1056
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1057
0
                updatedKeysAndValues.append(canonKeywordName, status);
1058
0
                updatedKeysAndValues.append('=', status);
1059
0
                updatedKeysAndValues.append(canonKeywordValue, status);
1060
0
            } /* else removing this entry, don't emit anything */
1061
0
            handledInputKeyAndValue = true;
1062
0
        } else {
1063
           /* input keyword sorts earlier than current entry, add before current entry */
1064
0
            if (rc < 0 && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
1065
                /* insert new entry at this location */
1066
0
                updatedKeysAndValues.append(keyValuePrefix, status);
1067
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1068
0
                updatedKeysAndValues.append(canonKeywordName, status);
1069
0
                updatedKeysAndValues.append('=', status);
1070
0
                updatedKeysAndValues.append(canonKeywordValue, status);
1071
0
                handledInputKeyAndValue = true;
1072
0
            }
1073
            /* copy the current entry */
1074
0
            updatedKeysAndValues.append(keyValuePrefix, status);
1075
0
            keyValuePrefix = ';'; /* for any subsequent key-value pair */
1076
0
            updatedKeysAndValues.append(localeKeywordName, status);
1077
0
            updatedKeysAndValues.append('=', status);
1078
0
            updatedKeysAndValues.append(keywords.data() + nextEqualsign,
1079
0
                                        static_cast<int32_t>(keyValueTail - nextEqualsign), status);
1080
0
        }
1081
0
        if (nextSeparator == std::string_view::npos && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
1082
            /* append new entry at the end, it sorts later than existing entries */
1083
0
            updatedKeysAndValues.append(keyValuePrefix, status);
1084
            /* skip keyValuePrefix update, no subsequent key-value pair */
1085
0
            updatedKeysAndValues.append(canonKeywordName, status);
1086
0
            updatedKeysAndValues.append('=', status);
1087
0
            updatedKeysAndValues.append(canonKeywordValue, status);
1088
0
            handledInputKeyAndValue = true;
1089
0
        }
1090
0
        keywordStart = nextSeparator;
1091
0
    } /* end loop searching */
1092
1093
    /* Any error from updatedKeysAndValues.append above would be internal and not due to
1094
     * problems with the passed-in locale. So if we did encounter problems with the
1095
     * passed-in locale above, those errors took precedence and overrode any error
1096
     * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1097
     * are errors here they are from updatedKeysAndValues.append; they do cause an
1098
     * error return but the passed-in locale is unmodified and the original bufLen is
1099
     * returned.
1100
     */
1101
0
    if (!handledInputKeyAndValue || U_FAILURE(status)) {
1102
        /* if input key/value specified removal of a keyword not present in locale, or
1103
         * there was an error in CharString.append, leave original locale alone. */
1104
0
        U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
1105
        // The sink is expected to be a buffer which already contains the full
1106
        // locale string, so when it isn't going to be modified there's no need
1107
        // to actually write any data to it, as the data is already there. Only
1108
        // the first character needs to be overwritten (changing '\0' to '@').
1109
0
        needLen = static_cast<int32_t>(keywords.size());
1110
0
        int32_t capacity = 0;
1111
0
        char* buffer = sink.GetAppendBuffer(
1112
0
                needLen, needLen, nullptr, needLen, &capacity);
1113
0
        if (capacity < needLen || buffer == nullptr) {
1114
0
            status = U_BUFFER_OVERFLOW_ERROR;
1115
0
        } else {
1116
0
            *buffer = '@';
1117
0
            sink.Append(buffer, needLen);
1118
0
        }
1119
0
        return needLen;
1120
0
    }
1121
1122
0
    needLen = updatedKeysAndValues.length();
1123
    // Check to see can we fit the updatedKeysAndValues, if not, return
1124
    // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1125
    // We do this because this API function does not behave like most others:
1126
    // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1127
    // When the contents fits but without the terminating NUL, in this case we need to not change
1128
    // the buffer contents and return with a buffer overflow error.
1129
0
    if (needLen > 0) {
1130
0
        int32_t capacity = 0;
1131
0
        char* buffer = sink.GetAppendBuffer(
1132
0
                needLen, needLen, nullptr, needLen, &capacity);
1133
0
        if (capacity < needLen || buffer == nullptr) {
1134
0
            status = U_BUFFER_OVERFLOW_ERROR;
1135
0
            return needLen;
1136
0
        }
1137
0
        uprv_memcpy(buffer, updatedKeysAndValues.data(), needLen);
1138
0
        sink.Append(buffer, needLen);
1139
0
    }
1140
0
    U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
1141
0
    return needLen;
1142
0
}
1143
1144
/* ### ID parsing implementation **************************************************/
1145
1146
namespace {
1147
1148
3.18k
inline bool _isPrefixLetter(char a) { return a == 'x' || a == 'X' || a == 'i' || a == 'I'; }
1149
1150
/*returns true if one of the special prefixes is here (s=string)
1151
  'x-' or 'i-' */
1152
3.18k
inline bool _isIDPrefix(std::string_view s) {
1153
3.18k
    return s.size() >= 2 && _isPrefixLetter(s[0]) && _isIDSeparator(s[1]);
1154
3.18k
}
1155
1156
/* Dot terminates it because of POSIX form  where dot precedes the codepage
1157
 * except for variant
1158
 */
1159
20.4k
inline bool _isTerminator(char a) { return a == '.' || a == '@'; }
1160
1161
11
inline bool _isBCP47Extension(std::string_view p) {
1162
11
    return p.size() >= 3 &&
1163
11
           p[0] == '-' &&
1164
11
           (p[1] == 't' || p[1] == 'T' ||
1165
0
            p[1] == 'u' || p[1] == 'U' ||
1166
0
            p[1] == 'x' || p[1] == 'X') &&
1167
11
           p[2] == '-';
1168
11
}
1169
1170
/**
1171
 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1172
 * a nullptr entry, followed by more entries, and a second nullptr entry.
1173
 *
1174
 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1175
 * COUNTRIES_3.
1176
 */
1177
std::optional<int16_t> _findIndex(const char* const* list, const char* key)
1178
724
{
1179
724
    const char* const* anchor = list;
1180
724
    int32_t pass = 0;
1181
1182
    /* Make two passes through two nullptr-terminated arrays at 'list' */
1183
774
    while (pass++ < 2) {
1184
226k
        while (*list) {
1185
226k
            if (uprv_strcmp(key, *list) == 0) {
1186
699
                return static_cast<int16_t>(list - anchor);
1187
699
            }
1188
225k
            list++;
1189
225k
        }
1190
50
        ++list;     /* skip final nullptr *CWB*/
1191
50
    }
1192
25
    return std::nullopt;
1193
724
}
1194
1195
}  // namespace
1196
1197
U_CFUNC const char*
1198
0
uloc_getCurrentCountryID(const char* oldID){
1199
0
    std::optional<int16_t> offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1200
0
    return offset.has_value() ? REPLACEMENT_COUNTRIES[*offset] : oldID;
1201
0
}
1202
U_CFUNC const char*
1203
0
uloc_getCurrentLanguageID(const char* oldID){
1204
0
    std::optional<int16_t> offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1205
0
    return offset.has_value() ? REPLACEMENT_LANGUAGES[*offset] : oldID;
1206
0
}
1207
1208
namespace {
1209
1210
/*
1211
 * the internal functions _getLanguage(), _getScript(), _getRegion(), _getVariant()
1212
 * avoid duplicating code to handle the earlier locale ID pieces
1213
 * in the functions for the later ones by
1214
 * setting the *pEnd pointer to where they stopped parsing
1215
 *
1216
 * TODO try to use this in Locale
1217
 */
1218
1219
3.18k
size_t _getLanguage(std::string_view localeID, ByteSink* sink, UErrorCode& status) {
1220
3.18k
    size_t skip = 0;
1221
3.18k
    if (localeID.size() == 4 && uprv_strnicmp(localeID.data(), "root", 4) == 0) {
1222
0
        skip = 4;
1223
0
        localeID.remove_prefix(skip);
1224
3.18k
    } else if (localeID.size() >= 3 && uprv_strnicmp(localeID.data(), "und", 3) == 0 &&
1225
3.18k
               (localeID.size() == 3 ||
1226
0
                localeID[3] == '-' ||
1227
0
                localeID[3] == '_' ||
1228
0
                localeID[3] == '@')) {
1229
0
        skip = 3;
1230
0
        localeID.remove_prefix(skip);
1231
0
    }
1232
1233
3.18k
    constexpr int32_t MAXLEN = ULOC_LANG_CAPACITY - 1;  // Minus NUL.
1234
1235
    /* if it starts with i- or x- then copy that prefix */
1236
3.18k
    size_t len = _isIDPrefix(localeID) ? 2 : 0;
1237
10.3k
    while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
1238
7.15k
        if (len == MAXLEN) {
1239
0
            status = U_ILLEGAL_ARGUMENT_ERROR;
1240
0
            return 0;
1241
0
        }
1242
7.15k
        len++;
1243
7.15k
    }
1244
1245
3.18k
    if (sink == nullptr || len == 0) { return skip + len; }
1246
1247
2.88k
    int32_t minCapacity = uprv_max(static_cast<int32_t>(len), 4);  // Minimum 3 letters plus NUL.
1248
2.88k
    char scratch[MAXLEN];
1249
2.88k
    int32_t capacity = 0;
1250
2.88k
    char* buffer = sink->GetAppendBuffer(
1251
2.88k
            minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);
1252
1253
9.34k
    for (size_t i = 0; i < len; ++i) {
1254
6.46k
        buffer[i] = uprv_tolower(localeID[i]);
1255
6.46k
    }
1256
2.88k
    if (localeID.size() >= 2 && _isIDSeparator(localeID[1])) {
1257
0
        buffer[1] = '-';
1258
0
    }
1259
1260
2.88k
    if (len == 3) {
1261
        /* convert 3 character code to 2 character code if possible *CWB*/
1262
699
        U_ASSERT(capacity >= 4);
1263
699
        buffer[3] = '\0';
1264
699
        std::optional<int16_t> offset = _findIndex(LANGUAGES_3, buffer);
1265
699
        if (offset.has_value()) {
1266
699
            const char* const alias = LANGUAGES[*offset];
1267
699
            sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
1268
699
            return skip + len;
1269
699
        }
1270
699
    }
1271
1272
2.18k
    sink->Append(buffer, static_cast<int32_t>(len));
1273
2.18k
    return skip + len;
1274
2.88k
}
1275
1276
2.56k
size_t _getScript(std::string_view localeID, ByteSink* sink) {
1277
2.56k
    constexpr int32_t LENGTH = 4;
1278
1279
2.56k
    size_t len = 0;
1280
8.91k
    while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) &&
1281
8.91k
            uprv_isASCIILetter(localeID[len])) {
1282
6.35k
        if (len == LENGTH) { return 0; }
1283
6.35k
        len++;
1284
6.35k
    }
1285
2.56k
    if (len != LENGTH) { return 0; }
1286
1287
647
    if (sink == nullptr) { return len; }
1288
1289
487
    char scratch[LENGTH];
1290
487
    int32_t capacity = 0;
1291
487
    char* buffer = sink->GetAppendBuffer(
1292
487
            LENGTH, LENGTH, scratch, UPRV_LENGTHOF(scratch), &capacity);
1293
1294
487
    buffer[0] = uprv_toupper(localeID[0]);
1295
1.94k
    for (int32_t i = 1; i < LENGTH; ++i) {
1296
1.46k
        buffer[i] = uprv_tolower(localeID[i]);
1297
1.46k
    }
1298
1299
487
    sink->Append(buffer, LENGTH);
1300
487
    return len;
1301
647
}
1302
1303
2.03k
size_t _getRegion(std::string_view localeID, ByteSink* sink) {
1304
2.03k
    constexpr int32_t MINLEN = 2;
1305
2.03k
    constexpr int32_t MAXLEN = ULOC_COUNTRY_CAPACITY - 1;  // Minus NUL.
1306
1307
2.03k
    size_t len = 0;
1308
6.13k
    while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
1309
4.10k
        if (len == MAXLEN) { return 0; }
1310
4.10k
        len++;
1311
4.10k
    }
1312
2.03k
    if (len < MINLEN) { return 0; }
1313
1314
2.03k
    if (sink == nullptr) { return len; }
1315
1316
1.87k
    char scratch[ULOC_COUNTRY_CAPACITY];
1317
1.87k
    int32_t capacity = 0;
1318
1.87k
    char* buffer = sink->GetAppendBuffer(
1319
1.87k
            ULOC_COUNTRY_CAPACITY,
1320
1.87k
            ULOC_COUNTRY_CAPACITY,
1321
1.87k
            scratch,
1322
1.87k
            UPRV_LENGTHOF(scratch),
1323
1.87k
            &capacity);
1324
1325
5.64k
    for (size_t i = 0; i < len; ++i) {
1326
3.77k
        buffer[i] = uprv_toupper(localeID[i]);
1327
3.77k
    }
1328
1329
1.87k
    if (len == 3) {
1330
        /* convert 3 character code to 2 character code if possible *CWB*/
1331
25
        U_ASSERT(capacity >= 4);
1332
25
        buffer[3] = '\0';
1333
25
        std::optional<int16_t> offset = _findIndex(COUNTRIES_3, buffer);
1334
25
        if (offset.has_value()) {
1335
0
            const char* const alias = COUNTRIES[*offset];
1336
0
            sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
1337
0
            return len;
1338
0
        }
1339
25
    }
1340
1341
1.87k
    sink->Append(buffer, static_cast<int32_t>(len));
1342
1.87k
    return len;
1343
1.87k
}
1344
1345
/**
1346
 * @param needSeparator if true, then add leading '_' if any variants
1347
 * are added to 'variant'
1348
 */
1349
size_t
1350
_getVariant(std::string_view localeID,
1351
            char prev,
1352
            ByteSink* sink,
1353
            bool needSeparator,
1354
11
            UErrorCode& status) {
1355
11
    if (U_FAILURE(status) || localeID.empty()) return 0;
1356
1357
    // Reasonable upper limit for variants
1358
    // There are no strict limitation of the syntax of variant in the legacy
1359
    // locale format. If the locale is constructed from unicode_locale_id
1360
    // as defined in UTS35, then we know each unicode_variant_subtag
1361
    // could have max length of 8 ((alphanum{5,8} | digit alphanum{3})
1362
    // 179 would allow 20 unicode_variant_subtag with sep in the
1363
    // unicode_locale_id
1364
    // 8*20 + 1*(20-1) = 179
1365
11
    constexpr int32_t MAX_VARIANTS_LENGTH = 179;
1366
1367
    /* get one or more variant tags and separate them with '_' */
1368
11
    size_t index = 0;
1369
11
    if (_isIDSeparator(prev)) {
1370
        /* get a variant string after a '-' or '_' */
1371
11
        for (std::string_view sub = localeID;;) {
1372
11
            size_t next = sub.find_first_of(".@_-");
1373
            // For historical reasons, a trailing separator is included in the variant.
1374
11
            bool finished = next == std::string_view::npos || next + 1 == sub.length();
1375
11
            size_t limit = finished ? sub.length() : next;
1376
11
            index += limit;
1377
11
            if (index > MAX_VARIANTS_LENGTH) {
1378
0
                status = U_ILLEGAL_ARGUMENT_ERROR;
1379
0
                return 0;
1380
0
            }
1381
1382
11
            if (sink != nullptr) {
1383
11
                if (needSeparator) {
1384
0
                    sink->Append("_", 1);
1385
11
                } else {
1386
11
                    needSeparator = true;
1387
11
                }
1388
1389
11
                int32_t length = static_cast<int32_t>(limit);
1390
11
                int32_t minCapacity = uprv_min(length, MAX_VARIANTS_LENGTH);
1391
11
                char scratch[MAX_VARIANTS_LENGTH];
1392
11
                int32_t capacity = 0;
1393
11
                char* buffer = sink->GetAppendBuffer(
1394
11
                        minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);
1395
1396
66
                for (size_t i = 0; i < limit; ++i) {
1397
55
                    buffer[i] = uprv_toupper(sub[i]);
1398
55
                }
1399
11
                sink->Append(buffer, length);
1400
11
            }
1401
1402
11
            if (finished) { return index; }
1403
0
            sub.remove_prefix(next);
1404
0
            if (_isTerminator(sub.front()) || _isBCP47Extension(sub)) { return index; }
1405
0
            sub.remove_prefix(1);
1406
0
            index++;
1407
0
        }
1408
11
    }
1409
1410
0
    size_t skip = 0;
1411
    /* if there is no variant tag after a '-' or '_' then look for '@' */
1412
0
    if (prev == '@') {
1413
        /* keep localeID */
1414
0
    } else if (const char* p = locale_getKeywordsStart(localeID); p != nullptr) {
1415
0
        skip = 1 + p - localeID.data(); /* point after the '@' */
1416
0
        localeID.remove_prefix(skip);
1417
0
    } else {
1418
0
        return 0;
1419
0
    }
1420
0
    for (; index < localeID.size() && !_isTerminator(localeID[index]); index++) {
1421
0
        if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
1422
0
            status = U_ILLEGAL_ARGUMENT_ERROR;
1423
0
            return 0;
1424
0
        }
1425
0
        if (needSeparator) {
1426
0
            if (sink != nullptr) {
1427
0
                sink->Append("_", 1);
1428
0
            }
1429
0
            needSeparator = false;
1430
0
        }
1431
0
        if (sink != nullptr) {
1432
0
            char c = uprv_toupper(localeID[index]);
1433
0
            if (c == '-' || c == ',') c = '_';
1434
0
            sink->Append(&c, 1);
1435
0
        }
1436
0
    }
1437
0
    return skip + index;
1438
0
}
1439
1440
}  // namespace
1441
1442
U_EXPORT CharString
1443
0
ulocimp_getLanguage(std::string_view localeID, UErrorCode& status) {
1444
0
    return ByteSinkUtil::viaByteSinkToCharString(
1445
0
        [&](ByteSink& sink, UErrorCode& status) {
1446
0
            ulocimp_getSubtags(
1447
0
                    localeID,
1448
0
                    &sink,
1449
0
                    nullptr,
1450
0
                    nullptr,
1451
0
                    nullptr,
1452
0
                    nullptr,
1453
0
                    status);
1454
0
        },
1455
0
        status);
1456
0
}
1457
1458
U_EXPORT CharString
1459
0
ulocimp_getScript(std::string_view localeID, UErrorCode& status) {
1460
0
    return ByteSinkUtil::viaByteSinkToCharString(
1461
0
        [&](ByteSink& sink, UErrorCode& status) {
1462
0
            ulocimp_getSubtags(
1463
0
                    localeID,
1464
0
                    nullptr,
1465
0
                    &sink,
1466
0
                    nullptr,
1467
0
                    nullptr,
1468
0
                    nullptr,
1469
0
                    status);
1470
0
        },
1471
0
        status);
1472
0
}
1473
1474
U_EXPORT CharString
1475
0
ulocimp_getRegion(std::string_view localeID, UErrorCode& status) {
1476
0
    return ByteSinkUtil::viaByteSinkToCharString(
1477
0
        [&](ByteSink& sink, UErrorCode& status) {
1478
0
            ulocimp_getSubtags(
1479
0
                    localeID,
1480
0
                    nullptr,
1481
0
                    nullptr,
1482
0
                    &sink,
1483
0
                    nullptr,
1484
0
                    nullptr,
1485
0
                    status);
1486
0
        },
1487
0
        status);
1488
0
}
1489
1490
U_EXPORT CharString
1491
0
ulocimp_getVariant(std::string_view localeID, UErrorCode& status) {
1492
0
    return ByteSinkUtil::viaByteSinkToCharString(
1493
0
        [&](ByteSink& sink, UErrorCode& status) {
1494
0
            ulocimp_getSubtags(
1495
0
                    localeID,
1496
0
                    nullptr,
1497
0
                    nullptr,
1498
0
                    nullptr,
1499
0
                    &sink,
1500
0
                    nullptr,
1501
0
                    status);
1502
0
        },
1503
0
        status);
1504
0
}
1505
1506
U_EXPORT void
1507
ulocimp_getSubtags(
1508
        std::string_view localeID,
1509
        CharString* language,
1510
        CharString* script,
1511
        CharString* region,
1512
        CharString* variant,
1513
        const char** pEnd,
1514
2.88k
        UErrorCode& status) {
1515
2.88k
    if (U_FAILURE(status)) { return; }
1516
1517
2.88k
    std::optional<CharStringByteSink> languageSink;
1518
2.88k
    std::optional<CharStringByteSink> scriptSink;
1519
2.88k
    std::optional<CharStringByteSink> regionSink;
1520
2.88k
    std::optional<CharStringByteSink> variantSink;
1521
1522
2.88k
    if (language != nullptr) { languageSink.emplace(language); }
1523
2.88k
    if (script != nullptr) { scriptSink.emplace(script); }
1524
2.88k
    if (region != nullptr) { regionSink.emplace(region); }
1525
2.88k
    if (variant != nullptr) { variantSink.emplace(variant); }
1526
1527
2.88k
    ulocimp_getSubtags(
1528
2.88k
            localeID,
1529
2.88k
            languageSink.has_value() ? &*languageSink : nullptr,
1530
2.88k
            scriptSink.has_value() ? &*scriptSink : nullptr,
1531
2.88k
            regionSink.has_value() ? &*regionSink : nullptr,
1532
2.88k
            variantSink.has_value() ? &*variantSink : nullptr,
1533
2.88k
            pEnd,
1534
2.88k
            status);
1535
2.88k
}
1536
1537
U_EXPORT void
1538
ulocimp_getSubtags(
1539
        std::string_view localeID,
1540
        ByteSink* language,
1541
        ByteSink* script,
1542
        ByteSink* region,
1543
        ByteSink* variant,
1544
        const char** pEnd,
1545
3.18k
        UErrorCode& status) {
1546
3.18k
    if (U_FAILURE(status)) { return; }
1547
1548
3.18k
    if (pEnd != nullptr) {
1549
2.43k
        *pEnd = localeID.data();
1550
2.43k
    } else if (language == nullptr &&
1551
753
               script == nullptr &&
1552
753
               region == nullptr &&
1553
753
               variant == nullptr) {
1554
0
        return;
1555
0
    }
1556
1557
3.18k
    if (localeID.empty()) { return; }
1558
1559
3.18k
    bool hasRegion = false;
1560
1561
3.18k
    {
1562
3.18k
        size_t len = _getLanguage(localeID, language, status);
1563
3.18k
        if (U_FAILURE(status)) { return; }
1564
3.18k
        if (len > 0) {
1565
3.18k
            localeID.remove_prefix(len);
1566
3.18k
        }
1567
3.18k
    }
1568
1569
3.18k
    if (pEnd != nullptr) {
1570
2.43k
        *pEnd = localeID.data();
1571
2.43k
    } else if (script == nullptr &&
1572
753
               region == nullptr &&
1573
753
               variant == nullptr) {
1574
0
        return;
1575
0
    }
1576
1577
3.18k
    if (localeID.empty()) { return; }
1578
1579
2.56k
    if (_isIDSeparator(localeID.front())) {
1580
2.56k
        std::string_view sub = localeID;
1581
2.56k
        sub.remove_prefix(1);
1582
2.56k
        size_t len = _getScript(sub, script);
1583
2.56k
        if (len > 0) {
1584
647
            localeID.remove_prefix(len + 1);
1585
647
            if (pEnd != nullptr) { *pEnd = localeID.data(); }
1586
647
        }
1587
2.56k
    }
1588
1589
2.56k
    if ((region == nullptr && variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }
1590
1591
2.03k
    if (_isIDSeparator(localeID.front())) {
1592
2.03k
        std::string_view sub = localeID;
1593
2.03k
        sub.remove_prefix(1);
1594
2.03k
        size_t len = _getRegion(sub, region);
1595
2.03k
        if (len > 0) {
1596
2.03k
            hasRegion = true;
1597
2.03k
            localeID.remove_prefix(len + 1);
1598
2.03k
            if (pEnd != nullptr) { *pEnd = localeID.data(); }
1599
2.03k
        }
1600
2.03k
    }
1601
1602
2.03k
    if ((variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }
1603
1604
11
    bool hasVariant = false;
1605
1606
11
    if (_isIDSeparator(localeID.front()) && !_isBCP47Extension(localeID)) {
1607
11
        std::string_view sub = localeID;
1608
        /* If there was no country ID, skip a possible extra IDSeparator */
1609
11
        size_t skip = !hasRegion && localeID.size() > 1 && _isIDSeparator(localeID[1]) ? 2 : 1;
1610
11
        sub.remove_prefix(skip);
1611
11
        size_t len = _getVariant(sub, localeID[0], variant, false, status);
1612
11
        if (U_FAILURE(status)) { return; }
1613
11
        if (len > 0) {
1614
11
            hasVariant = true;
1615
11
            localeID.remove_prefix(skip + len);
1616
11
            if (pEnd != nullptr) { *pEnd = localeID.data(); }
1617
11
        }
1618
11
    }
1619
1620
11
    if ((variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }
1621
1622
0
    if (_isBCP47Extension(localeID)) {
1623
0
        localeID.remove_prefix(2);
1624
0
        constexpr char vaposix[] = "-va-posix";
1625
0
        constexpr size_t length = sizeof vaposix - 1;
1626
0
        for (size_t next;; localeID.remove_prefix(next)) {
1627
0
            next = localeID.find('-', 1);
1628
0
            if (next == std::string_view::npos) { break; }
1629
0
            next = localeID.find('-', next + 1);
1630
0
            bool finished = next == std::string_view::npos;
1631
0
            std::string_view sub = localeID;
1632
0
            if (!finished) { sub.remove_suffix(sub.length() - next); }
1633
1634
0
            if (sub.length() == length && uprv_strnicmp(sub.data(), vaposix, length) == 0) {
1635
0
                if (variant != nullptr) {
1636
0
                    if (hasVariant) { variant->Append("_", 1); }
1637
0
                    constexpr char posix[] = "POSIX";
1638
0
                    variant->Append(posix, sizeof posix - 1);
1639
0
                }
1640
0
                if (pEnd != nullptr) { *pEnd = localeID.data() + length; }
1641
0
            }
1642
1643
0
            if (finished) { break; }
1644
0
        }
1645
0
    }
1646
0
}
1647
1648
/* Keyword enumeration */
1649
1650
typedef struct UKeywordsContext {
1651
    char* keywords;
1652
    char* current;
1653
} UKeywordsContext;
1654
1655
U_CDECL_BEGIN
1656
1657
static void U_CALLCONV
1658
0
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1659
0
    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1660
0
    uprv_free(enumerator->context);
1661
0
    uprv_free(enumerator);
1662
0
}
1663
1664
static int32_t U_CALLCONV
1665
0
uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1666
0
    char *kw = ((UKeywordsContext *)en->context)->keywords;
1667
0
    int32_t result = 0;
1668
0
    while(*kw) {
1669
0
        result++;
1670
0
        kw += uprv_strlen(kw)+1;
1671
0
    }
1672
0
    return result;
1673
0
}
1674
1675
static const char * U_CALLCONV
1676
uloc_kw_nextKeyword(UEnumeration* en,
1677
                    int32_t* resultLength,
1678
0
                    UErrorCode* /*status*/) {
1679
0
    const char* result = ((UKeywordsContext *)en->context)->current;
1680
0
    int32_t len = 0;
1681
0
    if(*result) {
1682
0
        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1683
0
        ((UKeywordsContext *)en->context)->current += len+1;
1684
0
    } else {
1685
0
        result = nullptr;
1686
0
    }
1687
0
    if (resultLength) {
1688
0
        *resultLength = len;
1689
0
    }
1690
0
    return result;
1691
0
}
1692
1693
static void U_CALLCONV
1694
uloc_kw_resetKeywords(UEnumeration* en,
1695
0
                      UErrorCode* /*status*/) {
1696
0
    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1697
0
}
1698
1699
U_CDECL_END
1700
1701
1702
static const UEnumeration gKeywordsEnum = {
1703
    nullptr,
1704
    nullptr,
1705
    uloc_kw_closeKeywords,
1706
    uloc_kw_countKeywords,
1707
    uenum_unextDefault,
1708
    uloc_kw_nextKeyword,
1709
    uloc_kw_resetKeywords
1710
};
1711
1712
U_CAPI UEnumeration* U_EXPORT2
1713
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1714
0
{
1715
0
    if (U_FAILURE(*status)) { return nullptr; }
1716
1717
0
    LocalMemory<UKeywordsContext> myContext;
1718
0
    LocalMemory<UEnumeration> result;
1719
1720
0
    myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1721
0
    result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1722
0
    if (myContext.isNull() || result.isNull()) {
1723
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1724
0
        return nullptr;
1725
0
    }
1726
0
    uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1727
0
    myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1728
0
    if (myContext->keywords == nullptr) {
1729
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1730
0
        return nullptr;
1731
0
    }
1732
0
    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1733
0
    myContext->keywords[keywordListSize] = 0;
1734
0
    myContext->current = myContext->keywords;
1735
0
    result->context = myContext.orphan();
1736
0
    return result.orphan();
1737
0
}
1738
1739
U_CAPI UEnumeration* U_EXPORT2
1740
uloc_openKeywords(const char* localeID,
1741
                        UErrorCode* status)
1742
0
{
1743
0
    if(status==nullptr || U_FAILURE(*status)) {
1744
0
        return nullptr;
1745
0
    }
1746
1747
0
    CharString tempBuffer;
1748
0
    const char* tmpLocaleID;
1749
1750
0
    if (localeID != nullptr && _hasBCP47Extension(localeID)) {
1751
0
        tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, *status);
1752
0
        tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
1753
0
    } else {
1754
0
        if (localeID==nullptr) {
1755
0
            localeID=uloc_getDefault();
1756
0
        }
1757
0
        tmpLocaleID=localeID;
1758
0
    }
1759
1760
0
    ulocimp_getSubtags(
1761
0
            tmpLocaleID,
1762
0
            nullptr,
1763
0
            nullptr,
1764
0
            nullptr,
1765
0
            nullptr,
1766
0
            &tmpLocaleID,
1767
0
            *status);
1768
0
    if (U_FAILURE(*status)) {
1769
0
        return nullptr;
1770
0
    }
1771
1772
    /* keywords are located after '@' */
1773
0
    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != nullptr) {
1774
0
        CharString keywords = ulocimp_getKeywords(tmpLocaleID + 1, '@', false, *status);
1775
0
        if (U_FAILURE(*status)) {
1776
0
            return nullptr;
1777
0
        }
1778
0
        return uloc_openKeywordList(keywords.data(), keywords.length(), status);
1779
0
    }
1780
0
    return nullptr;
1781
0
}
1782
1783
1784
/* bit-flags for 'options' parameter of _canonicalize */
1785
3.98k
#define _ULOC_STRIP_KEYWORDS 0x2
1786
8.18k
#define _ULOC_CANONICALIZE   0x1
1787
1788
namespace {
1789
1790
9.73k
inline bool OPTION_SET(uint32_t options, uint32_t mask) { return (options & mask) != 0; }
1791
1792
constexpr char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1793
constexpr int32_t I_DEFAULT_LENGTH = UPRV_LENGTHOF(i_default);
1794
1795
/**
1796
 * Canonicalize the given localeID, to level 1 or to level 2,
1797
 * depending on the options.  To specify level 1, pass in options=0.
1798
 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1799
 *
1800
 * This is the code underlying uloc_getName and uloc_canonicalize.
1801
 */
1802
void
1803
_canonicalize(std::string_view localeID,
1804
              ByteSink& sink,
1805
              uint32_t options,
1806
2.43k
              UErrorCode& err) {
1807
2.43k
    if (U_FAILURE(err)) {
1808
0
        return;
1809
0
    }
1810
1811
2.43k
    int32_t j, fieldCount=0;
1812
2.43k
    CharString tempBuffer;  // if localeID has a BCP47 extension, tmpLocaleID points to this
1813
2.43k
    CharString localeIDWithHyphens;  // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
1814
2.43k
    std::string_view origLocaleID;
1815
2.43k
    std::string_view tmpLocaleID;
1816
2.43k
    size_t keywordAssign = std::string_view::npos;
1817
2.43k
    size_t separatorIndicator = std::string_view::npos;
1818
1819
2.43k
    if (_hasBCP47Extension(localeID)) {
1820
0
        std::string_view localeIDPtr = localeID;
1821
1822
        // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
1823
0
        if (localeID.size() >= 2 && localeID.find('_') != std::string_view::npos && localeID[1] != '-' && localeID[1] != '_') {
1824
0
            localeIDWithHyphens.append(localeID, err);
1825
0
            if (U_SUCCESS(err)) {
1826
0
                for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
1827
0
                    if (*p == '_') {
1828
0
                        *p = '-';
1829
0
                    }
1830
0
                }
1831
0
                localeIDPtr = localeIDWithHyphens.toStringPiece();
1832
0
            }
1833
0
        }
1834
1835
0
        tempBuffer = ulocimp_forLanguageTag(localeIDPtr.data(), static_cast<int32_t>(localeIDPtr.size()), nullptr, err);
1836
0
        tmpLocaleID = U_SUCCESS(err) && !tempBuffer.isEmpty() ? static_cast<std::string_view>(tempBuffer.toStringPiece()) : localeIDPtr;
1837
2.43k
    } else {
1838
2.43k
        tmpLocaleID=localeID;
1839
2.43k
    }
1840
1841
2.43k
    origLocaleID=tmpLocaleID;
1842
1843
    /* get all pieces, one after another, and separate with '_' */
1844
2.43k
    CharString tag;
1845
2.43k
    CharString script;
1846
2.43k
    CharString country;
1847
2.43k
    CharString variant;
1848
2.43k
    const char* end = nullptr;
1849
2.43k
    ulocimp_getSubtags(
1850
2.43k
            tmpLocaleID,
1851
2.43k
            &tag,
1852
2.43k
            &script,
1853
2.43k
            &country,
1854
2.43k
            &variant,
1855
2.43k
            &end,
1856
2.43k
            err);
1857
2.43k
    if (U_FAILURE(err)) {
1858
0
        return;
1859
0
    }
1860
2.43k
    U_ASSERT(end != nullptr);
1861
2.43k
    if (end > tmpLocaleID.data()) {
1862
2.43k
        tmpLocaleID.remove_prefix(end - tmpLocaleID.data());
1863
2.43k
    }
1864
1865
2.43k
    if (tag.length() == I_DEFAULT_LENGTH && origLocaleID.length() >= I_DEFAULT_LENGTH &&
1866
2.43k
            uprv_strncmp(origLocaleID.data(), i_default, I_DEFAULT_LENGTH) == 0) {
1867
0
        tag.clear();
1868
0
        tag.append(uloc_getDefault(), err);
1869
2.43k
    } else {
1870
2.43k
        if (!script.isEmpty()) {
1871
324
            ++fieldCount;
1872
324
            tag.append('_', err);
1873
324
            tag.append(script, err);
1874
324
        }
1875
2.43k
        if (!country.isEmpty()) {
1876
1.71k
            ++fieldCount;
1877
1.71k
            tag.append('_', err);
1878
1.71k
            tag.append(country, err);
1879
1.71k
        }
1880
2.43k
        if (!variant.isEmpty()) {
1881
10
            ++fieldCount;
1882
10
            if (country.isEmpty()) {
1883
0
                tag.append('_', err);
1884
0
            }
1885
10
            tag.append('_', err);
1886
10
            tag.append(variant, err);
1887
10
        }
1888
2.43k
    }
1889
1890
    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1891
2.43k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && !tmpLocaleID.empty() && tmpLocaleID.front() == '.') {
1892
0
        tag.append('.', err);
1893
0
        tmpLocaleID.remove_prefix(1);
1894
0
        size_t length;
1895
0
        if (size_t atPos = tmpLocaleID.find('@'); atPos != std::string_view::npos) {
1896
0
            length = atPos;
1897
0
        } else {
1898
0
            length = tmpLocaleID.length();
1899
0
        }
1900
        // The longest charset name we found in IANA charset registry
1901
        // https://www.iana.org/assignments/character-sets/ is
1902
        // "Extended_UNIX_Code_Packed_Format_for_Japanese" in length 45.
1903
        // we therefore restrict the length here to be 64 which is a power of 2
1904
        // number that is longer than 45.
1905
0
        constexpr size_t kMaxCharsetLength = 64;
1906
0
        if (length > kMaxCharsetLength) {
1907
0
           err = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1908
0
           return;
1909
0
        }
1910
0
        if (length > 0) {
1911
0
            tag.append(tmpLocaleID.data(), static_cast<int32_t>(length), err);
1912
0
            tmpLocaleID.remove_prefix(length);
1913
0
        }
1914
0
    }
1915
1916
    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1917
       After this, tmpLocaleID either starts at '@' or is empty. */
1918
2.43k
    if (const char* start = locale_getKeywordsStart(tmpLocaleID); start != nullptr) {
1919
0
        if (start > tmpLocaleID.data()) {
1920
0
            tmpLocaleID.remove_prefix(start - tmpLocaleID.data());
1921
0
        }
1922
0
        keywordAssign = tmpLocaleID.find('=');
1923
0
        separatorIndicator = tmpLocaleID.find(';');
1924
2.43k
    } else {
1925
2.43k
        tmpLocaleID = {};
1926
2.43k
    }
1927
1928
    /* Copy POSIX-style variant, if any [mr@FOO] */
1929
2.43k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1930
2.43k
        !tmpLocaleID.empty() && keywordAssign == std::string_view::npos) {
1931
0
        tag.append(tmpLocaleID, err);
1932
0
        tmpLocaleID = {};
1933
0
    }
1934
1935
2.43k
    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1936
        /* Handle @FOO variant if @ is present and not followed by = */
1937
886
        if (!tmpLocaleID.empty() && keywordAssign == std::string_view::npos) {
1938
            /* Add missing '_' if needed */
1939
0
            if (fieldCount < 2 || (fieldCount < 3 && !script.isEmpty())) {
1940
0
                do {
1941
0
                    tag.append('_', err);
1942
0
                    ++fieldCount;
1943
0
                } while(fieldCount<2);
1944
0
            }
1945
1946
0
            CharStringByteSink s(&tag);
1947
0
            std::string_view sub = tmpLocaleID;
1948
0
            sub.remove_prefix(1);
1949
0
            _getVariant(sub, '@', &s, !variant.isEmpty(), err);
1950
0
            if (U_FAILURE(err)) { return; }
1951
0
        }
1952
1953
        /* Look up the ID in the canonicalization map */
1954
9.74k
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1955
8.86k
            StringPiece id(CANONICALIZE_MAP[j].id);
1956
8.86k
            if (tag == id) {
1957
0
                if (id.empty() && !tmpLocaleID.empty()) {
1958
0
                    break; /* Don't remap "" if keywords present */
1959
0
                }
1960
0
                tag.clear();
1961
0
                tag.append(CANONICALIZE_MAP[j].canonicalID, err);
1962
0
                break;
1963
0
            }
1964
8.86k
        }
1965
886
    }
1966
1967
2.43k
    sink.Append(tag.data(), tag.length());
1968
1969
2.43k
    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1970
887
        if (!tmpLocaleID.empty() && keywordAssign != std::string_view::npos &&
1971
887
            (separatorIndicator == std::string_view::npos || separatorIndicator > keywordAssign)) {
1972
0
            sink.Append("@", 1);
1973
0
            ++fieldCount;
1974
0
            tmpLocaleID.remove_prefix(1);
1975
0
            ulocimp_getKeywords(tmpLocaleID, '@', sink, true, err);
1976
0
        }
1977
887
    }
1978
2.43k
}
1979
1980
}  // namespace
1981
1982
/* ### ID parsing API **************************************************/
1983
1984
U_CAPI int32_t  U_EXPORT2
1985
uloc_getParent(const char*    localeID,
1986
               char* parent,
1987
               int32_t parentCapacity,
1988
               UErrorCode* err)
1989
0
{
1990
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
1991
0
        parent, parentCapacity,
1992
0
        [&](ByteSink& sink, UErrorCode& status) {
1993
0
            ulocimp_getParent(localeID, sink, status);
1994
0
        },
1995
0
        *err);
1996
0
}
1997
1998
U_EXPORT CharString
1999
ulocimp_getParent(const char* localeID,
2000
                  UErrorCode& err)
2001
0
{
2002
0
    return ByteSinkUtil::viaByteSinkToCharString(
2003
0
        [&](ByteSink& sink, UErrorCode& status) {
2004
0
            ulocimp_getParent(localeID, sink, status);
2005
0
        },
2006
0
        err);
2007
0
}
2008
2009
U_EXPORT void
2010
ulocimp_getParent(const char* localeID,
2011
                  icu::ByteSink& sink,
2012
                  UErrorCode& err)
2013
0
{
2014
0
    if (U_FAILURE(err)) { return; }
2015
2016
0
    const char *lastUnderscore;
2017
0
    int32_t i;
2018
2019
0
    if (localeID == nullptr)
2020
0
        localeID = uloc_getDefault();
2021
2022
0
    lastUnderscore=uprv_strrchr(localeID, '_');
2023
0
    if(lastUnderscore!=nullptr) {
2024
0
        i = static_cast<int32_t>(lastUnderscore - localeID);
2025
0
    } else {
2026
0
        i=0;
2027
0
    }
2028
2029
0
    if (i > 0) {
2030
0
        if (uprv_strnicmp(localeID, "und_", 4) == 0) {
2031
0
            localeID += 3;
2032
0
            i -= 3;
2033
0
        }
2034
0
        sink.Append(localeID, i);
2035
0
    }
2036
0
}
2037
2038
U_CAPI int32_t U_EXPORT2
2039
uloc_getLanguage(const char*    localeID,
2040
         char* language,
2041
         int32_t languageCapacity,
2042
         UErrorCode* err)
2043
0
{
2044
0
    if (localeID == nullptr) {
2045
0
        localeID = uloc_getDefault();
2046
0
    }
2047
2048
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
2049
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2050
0
        language, languageCapacity,
2051
0
        [&](ByteSink& sink, UErrorCode& status) {
2052
0
            ulocimp_getSubtags(
2053
0
                    localeID,
2054
0
                    &sink,
2055
0
                    nullptr,
2056
0
                    nullptr,
2057
0
                    nullptr,
2058
0
                    nullptr,
2059
0
                    status);
2060
0
        },
2061
0
        *err);
2062
0
}
2063
2064
U_CAPI int32_t U_EXPORT2
2065
uloc_getScript(const char*    localeID,
2066
         char* script,
2067
         int32_t scriptCapacity,
2068
         UErrorCode* err)
2069
0
{
2070
0
    if (localeID == nullptr) {
2071
0
        localeID = uloc_getDefault();
2072
0
    }
2073
2074
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2075
0
        script, scriptCapacity,
2076
0
        [&](ByteSink& sink, UErrorCode& status) {
2077
0
            ulocimp_getSubtags(
2078
0
                    localeID,
2079
0
                    nullptr,
2080
0
                    &sink,
2081
0
                    nullptr,
2082
0
                    nullptr,
2083
0
                    nullptr,
2084
0
                    status);
2085
0
        },
2086
0
        *err);
2087
0
}
2088
2089
U_CAPI int32_t  U_EXPORT2
2090
uloc_getCountry(const char* localeID,
2091
            char* country,
2092
            int32_t countryCapacity,
2093
            UErrorCode* err)
2094
0
{
2095
0
    if (localeID == nullptr) {
2096
0
        localeID = uloc_getDefault();
2097
0
    }
2098
2099
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2100
0
        country, countryCapacity,
2101
0
        [&](ByteSink& sink, UErrorCode& status) {
2102
0
            ulocimp_getSubtags(
2103
0
                    localeID,
2104
0
                    nullptr,
2105
0
                    nullptr,
2106
0
                    &sink,
2107
0
                    nullptr,
2108
0
                    nullptr,
2109
0
                    status);
2110
0
        },
2111
0
        *err);
2112
0
}
2113
2114
U_CAPI int32_t  U_EXPORT2
2115
uloc_getVariant(const char* localeID,
2116
                char* variant,
2117
                int32_t variantCapacity,
2118
                UErrorCode* err)
2119
0
{
2120
0
    if (localeID == nullptr) {
2121
0
        localeID = uloc_getDefault();
2122
0
    }
2123
2124
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2125
0
        variant, variantCapacity,
2126
0
        [&](ByteSink& sink, UErrorCode& status) {
2127
0
            ulocimp_getSubtags(
2128
0
                    localeID,
2129
0
                    nullptr,
2130
0
                    nullptr,
2131
0
                    nullptr,
2132
0
                    &sink,
2133
0
                    nullptr,
2134
0
                    status);
2135
0
        },
2136
0
        *err);
2137
0
}
2138
2139
U_CAPI int32_t  U_EXPORT2
2140
uloc_getName(const char* localeID,
2141
             char* name,
2142
             int32_t nameCapacity,
2143
             UErrorCode* err)
2144
0
{
2145
0
    if (localeID == nullptr) {
2146
0
        localeID = uloc_getDefault();
2147
0
    }
2148
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2149
0
        name, nameCapacity,
2150
0
        [&](ByteSink& sink, UErrorCode& status) {
2151
0
            ulocimp_getName(localeID, sink, status);
2152
0
        },
2153
0
        *err);
2154
0
}
2155
2156
U_EXPORT CharString
2157
ulocimp_getName(std::string_view localeID,
2158
                UErrorCode& err)
2159
0
{
2160
0
    return ByteSinkUtil::viaByteSinkToCharString(
2161
0
        [&](ByteSink& sink, UErrorCode& status) {
2162
0
            ulocimp_getName(localeID, sink, status);
2163
0
        },
2164
0
        err);
2165
0
}
2166
2167
U_EXPORT void
2168
ulocimp_getName(std::string_view localeID,
2169
                ByteSink& sink,
2170
                UErrorCode& err)
2171
1
{
2172
1
    _canonicalize(localeID, sink, 0, err);
2173
1
}
2174
2175
U_CAPI int32_t  U_EXPORT2
2176
uloc_getBaseName(const char* localeID,
2177
                 char* name,
2178
                 int32_t nameCapacity,
2179
                 UErrorCode* err)
2180
0
{
2181
0
    if (localeID == nullptr) {
2182
0
        localeID = uloc_getDefault();
2183
0
    }
2184
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2185
0
        name, nameCapacity,
2186
0
        [&](ByteSink& sink, UErrorCode& status) {
2187
0
            ulocimp_getBaseName(localeID, sink, status);
2188
0
        },
2189
0
        *err);
2190
0
}
2191
2192
U_EXPORT CharString
2193
ulocimp_getBaseName(std::string_view localeID,
2194
                    UErrorCode& err)
2195
1.54k
{
2196
1.54k
    return ByteSinkUtil::viaByteSinkToCharString(
2197
1.54k
        [&](ByteSink& sink, UErrorCode& status) {
2198
1.54k
            ulocimp_getBaseName(localeID, sink, status);
2199
1.54k
        },
2200
1.54k
        err);
2201
1.54k
}
2202
2203
U_EXPORT void
2204
ulocimp_getBaseName(std::string_view localeID,
2205
                    ByteSink& sink,
2206
                    UErrorCode& err)
2207
1.54k
{
2208
1.54k
    _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
2209
1.54k
}
2210
2211
U_CAPI int32_t  U_EXPORT2
2212
uloc_canonicalize(const char* localeID,
2213
                  char* name,
2214
                  int32_t nameCapacity,
2215
                  UErrorCode* err)
2216
0
{
2217
0
    if (localeID == nullptr) {
2218
0
        localeID = uloc_getDefault();
2219
0
    }
2220
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2221
0
        name, nameCapacity,
2222
0
        [&](ByteSink& sink, UErrorCode& status) {
2223
0
            ulocimp_canonicalize(localeID, sink, status);
2224
0
        },
2225
0
        *err);
2226
0
}
2227
2228
U_EXPORT CharString
2229
ulocimp_canonicalize(std::string_view localeID,
2230
                     UErrorCode& err)
2231
1
{
2232
1
    return ByteSinkUtil::viaByteSinkToCharString(
2233
1
        [&](ByteSink& sink, UErrorCode& status) {
2234
1
            ulocimp_canonicalize(localeID, sink, status);
2235
1
        },
2236
1
        err);
2237
1
}
2238
2239
U_EXPORT void
2240
ulocimp_canonicalize(std::string_view localeID,
2241
                     ByteSink& sink,
2242
                     UErrorCode& err)
2243
886
{
2244
886
    _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
2245
886
}
2246
2247
U_CAPI const char*  U_EXPORT2
2248
uloc_getISO3Language(const char* localeID)
2249
0
{
2250
0
    UErrorCode err = U_ZERO_ERROR;
2251
2252
0
    if (localeID == nullptr)
2253
0
    {
2254
0
        localeID = uloc_getDefault();
2255
0
    }
2256
0
    CharString lang = ulocimp_getLanguage(localeID, err);
2257
0
    if (U_FAILURE(err))
2258
0
        return "";
2259
0
    std::optional<int16_t> offset = _findIndex(LANGUAGES, lang.data());
2260
0
    return offset.has_value() ? LANGUAGES_3[*offset] : "";
2261
0
}
2262
2263
U_CAPI const char*  U_EXPORT2
2264
uloc_getISO3Country(const char* localeID)
2265
0
{
2266
0
    UErrorCode err = U_ZERO_ERROR;
2267
2268
0
    if (localeID == nullptr)
2269
0
    {
2270
0
        localeID = uloc_getDefault();
2271
0
    }
2272
0
    CharString cntry = ulocimp_getRegion(localeID, err);
2273
0
    if (U_FAILURE(err))
2274
0
        return "";
2275
0
    std::optional<int16_t> offset = _findIndex(COUNTRIES, cntry.data());
2276
0
    return offset.has_value() ? COUNTRIES_3[*offset] : "";
2277
0
}
2278
2279
U_CAPI uint32_t  U_EXPORT2
2280
uloc_getLCID(const char* localeID)
2281
0
{
2282
0
    UErrorCode status = U_ZERO_ERROR;
2283
0
    uint32_t   lcid = 0;
2284
2285
    /* Check for incomplete id. */
2286
0
    if (!localeID || uprv_strlen(localeID) < 2) {
2287
0
        return 0;
2288
0
    }
2289
2290
    // First, attempt Windows platform lookup if available, but fall
2291
    // through to catch any special cases (ICU vs Windows name differences).
2292
0
    lcid = uprv_convertToLCIDPlatform(localeID, &status);
2293
0
    if (U_FAILURE(status)) {
2294
0
        return 0;
2295
0
    }
2296
0
    if (lcid > 0) {
2297
        // Windows found an LCID, return that
2298
0
        return lcid;
2299
0
    }
2300
2301
0
    CharString langID = ulocimp_getLanguage(localeID, status);
2302
0
    if (U_FAILURE(status)) {
2303
0
        return 0;
2304
0
    }
2305
2306
0
    if (uprv_strchr(localeID, '@')) {
2307
        // uprv_convertToLCID does not support keywords other than collation.
2308
        // Remove all keywords except collation.
2309
0
        CharString collVal = ulocimp_getKeywordValue(localeID, "collation", status);
2310
0
        if (U_SUCCESS(status) && !collVal.isEmpty()) {
2311
0
            CharString tmpLocaleID = ulocimp_getBaseName(localeID, status);
2312
0
            ulocimp_setKeywordValue("collation", collVal.toStringPiece(), tmpLocaleID, status);
2313
0
            if (U_SUCCESS(status)) {
2314
0
                return uprv_convertToLCID(langID.data(), tmpLocaleID.data(), &status);
2315
0
            }
2316
0
        }
2317
2318
        // fall through - all keywords are simply ignored
2319
0
        status = U_ZERO_ERROR;
2320
0
    }
2321
2322
0
    return uprv_convertToLCID(langID.data(), localeID, &status);
2323
0
}
2324
2325
U_CAPI int32_t U_EXPORT2
2326
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2327
                UErrorCode *status)
2328
0
{
2329
0
    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2330
0
}
2331
2332
/* ### Default locale **************************************************/
2333
2334
U_CAPI const char*  U_EXPORT2
2335
uloc_getDefault()
2336
3.05k
{
2337
3.05k
    return locale_get_default();
2338
3.05k
}
2339
2340
U_CAPI void  U_EXPORT2
2341
uloc_setDefault(const char*   newDefaultLocale,
2342
             UErrorCode* err)
2343
0
{
2344
0
    if (U_FAILURE(*err))
2345
0
        return;
2346
    /* the error code isn't currently used for anything by this function*/
2347
2348
    /* propagate change to C++ */
2349
0
    locale_set_default(newDefaultLocale);
2350
0
}
2351
2352
/**
2353
 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2354
 * to an array of pointers to arrays of char.  All of these pointers are owned
2355
 * by ICU-- do not delete them, and do not write through them.  The array is
2356
 * terminated with a null pointer.
2357
 */
2358
U_CAPI const char* const*  U_EXPORT2
2359
uloc_getISOLanguages()
2360
0
{
2361
0
    return LANGUAGES;
2362
0
}
2363
2364
/**
2365
 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2366
 * pointer to an array of pointers to arrays of char.  All of these pointers are
2367
 * owned by ICU-- do not delete them, and do not write through them.  The array is
2368
 * terminated with a null pointer.
2369
 */
2370
U_CAPI const char* const*  U_EXPORT2
2371
uloc_getISOCountries()
2372
0
{
2373
0
    return COUNTRIES;
2374
0
}
2375
2376
U_CAPI const char* U_EXPORT2
2377
uloc_toUnicodeLocaleKey(const char* keyword)
2378
0
{
2379
0
    if (keyword == nullptr || *keyword == '\0') { return nullptr; }
2380
0
    std::optional<std::string_view> result = ulocimp_toBcpKeyWithFallback(keyword);
2381
0
    return result.has_value() ? result->data() : nullptr;  // Known to be NUL terminated.
2382
0
}
2383
2384
U_EXPORT std::optional<std::string_view>
2385
ulocimp_toBcpKeyWithFallback(std::string_view keyword)
2386
0
{
2387
0
    std::optional<std::string_view> bcpKey = ulocimp_toBcpKey(keyword);
2388
0
    if (!bcpKey.has_value() &&
2389
0
        ultag_isUnicodeLocaleKey(keyword.data(), static_cast<int32_t>(keyword.size()))) {
2390
        // unknown keyword, but syntax is fine..
2391
0
        return keyword;
2392
0
    }
2393
0
    return bcpKey;
2394
0
}
2395
2396
U_CAPI const char* U_EXPORT2
2397
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2398
0
{
2399
0
    if (keyword == nullptr || *keyword == '\0' ||
2400
0
        value == nullptr || *value == '\0') { return nullptr; }
2401
0
    std::optional<std::string_view> result = ulocimp_toBcpTypeWithFallback(keyword, value);
2402
0
    return result.has_value() ? result->data() : nullptr;  // Known to be NUL terminated.
2403
0
}
2404
2405
U_EXPORT std::optional<std::string_view>
2406
ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value)
2407
0
{
2408
0
    std::optional<std::string_view> bcpType = ulocimp_toBcpType(keyword, value);
2409
0
    if (!bcpType.has_value() &&
2410
0
        ultag_isUnicodeLocaleType(value.data(), static_cast<int32_t>(value.size()))) {
2411
        // unknown keyword, but syntax is fine..
2412
0
        return value;
2413
0
    }
2414
0
    return bcpType;
2415
0
}
2416
2417
namespace {
2418
2419
bool
2420
isWellFormedLegacyKey(std::string_view key)
2421
0
{
2422
0
    return std::all_of(key.begin(), key.end(), UPRV_ISALPHANUM);
2423
0
}
2424
2425
bool
2426
isWellFormedLegacyType(std::string_view legacyType)
2427
0
{
2428
0
    int32_t alphaNumLen = 0;
2429
0
    for (char c : legacyType) {
2430
0
        if (c == '_' || c == '/' || c == '-') {
2431
0
            if (alphaNumLen == 0) {
2432
0
                return false;
2433
0
            }
2434
0
            alphaNumLen = 0;
2435
0
        } else if (UPRV_ISALPHANUM(c)) {
2436
0
            alphaNumLen++;
2437
0
        } else {
2438
0
            return false;
2439
0
        }
2440
0
    }
2441
0
    return alphaNumLen != 0;
2442
0
}
2443
2444
}  // namespace
2445
2446
U_CAPI const char* U_EXPORT2
2447
uloc_toLegacyKey(const char* keyword)
2448
0
{
2449
0
    if (keyword == nullptr || *keyword == '\0') { return nullptr; }
2450
0
    std::optional<std::string_view> result = ulocimp_toLegacyKeyWithFallback(keyword);
2451
0
    return result.has_value() ? result->data() : nullptr;  // Known to be NUL terminated.
2452
0
}
2453
2454
U_EXPORT std::optional<std::string_view>
2455
ulocimp_toLegacyKeyWithFallback(std::string_view keyword)
2456
0
{
2457
0
    std::optional<std::string_view> legacyKey = ulocimp_toLegacyKey(keyword);
2458
0
    if (!legacyKey.has_value() && isWellFormedLegacyKey(keyword)) {
2459
        // Checks if the specified locale key is well-formed with the legacy locale syntax.
2460
        //
2461
        // Note:
2462
        //  LDML/CLDR provides some definition of keyword syntax in
2463
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2464
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2465
        //  Keys can only consist of [0-9a-zA-Z].
2466
0
        return keyword;
2467
0
    }
2468
0
    return legacyKey;
2469
0
}
2470
2471
U_CAPI const char* U_EXPORT2
2472
uloc_toLegacyType(const char* keyword, const char* value)
2473
0
{
2474
0
    if (keyword == nullptr || *keyword == '\0' ||
2475
0
        value == nullptr || *value == '\0') { return nullptr; }
2476
0
    std::optional<std::string_view> result = ulocimp_toLegacyTypeWithFallback(keyword, value);
2477
0
    return result.has_value() ? result->data() : nullptr;  // Known to be NUL terminated.
2478
0
}
2479
2480
U_EXPORT std::optional<std::string_view>
2481
ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value)
2482
0
{
2483
0
    std::optional<std::string_view> legacyType = ulocimp_toLegacyType(keyword, value);
2484
0
    if (!legacyType.has_value() && isWellFormedLegacyType(value)) {
2485
        // Checks if the specified locale type is well-formed with the legacy locale syntax.
2486
        //
2487
        // Note:
2488
        //  LDML/CLDR provides some definition of keyword syntax in
2489
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2490
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2491
        //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2492
        //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2493
0
        return value;
2494
0
    }
2495
0
    return legacyType;
2496
0
}
2497
2498
/*eof*/