Coverage Report

Created: 2025-06-13 06:34

/src/icu/icu4c/source/common/uloc.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 1997-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*
9
* File ULOC.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   04/01/97    aliu        Creation.
15
*   08/21/98    stephen     JDK 1.2 sync
16
*   12/08/98    rtg         New Locale implementation and C API
17
*   03/15/99    damiba      overhaul.
18
*   04/06/99    stephen     changed setDefault() to realloc and copy
19
*   06/14/99    stephen     Changed calls to ures_open for new params
20
*   07/21/99    stephen     Modified setDefault() to propagate to C++
21
*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
22
*                           brought canonicalization code into line with spec
23
*****************************************************************************/
24
25
/*
26
   POSIX's locale format, from putil.c: [no spaces]
27
28
     ll [ _CC ] [ . MM ] [ @ VV]
29
30
     l = lang, C = ctry, M = charmap, V = variant
31
*/
32
33
#include <algorithm>
34
#include <optional>
35
#include <string_view>
36
37
#include "unicode/bytestream.h"
38
#include "unicode/errorcode.h"
39
#include "unicode/stringpiece.h"
40
#include "unicode/utypes.h"
41
#include "unicode/ustring.h"
42
#include "unicode/uloc.h"
43
44
#include "bytesinkutil.h"
45
#include "putilimp.h"
46
#include "ustr_imp.h"
47
#include "ulocimp.h"
48
#include "umutex.h"
49
#include "cstring.h"
50
#include "cmemory.h"
51
#include "locmap.h"
52
#include "uarrsort.h"
53
#include "uenumimp.h"
54
#include "uassert.h"
55
#include "charstr.h"
56
57
U_NAMESPACE_USE
58
59
/* ### Declarations **************************************************/
60
61
/* Locale stuff from locid.cpp */
62
U_CFUNC void locale_set_default(const char *id);
63
U_CFUNC const char *locale_get_default();
64
65
namespace {
66
67
/* ### Data tables **************************************************/
68
69
/**
70
 * Table of language codes, both 2- and 3-letter, with preference
71
 * given to 2-letter codes where possible.  Includes 3-letter codes
72
 * that lack a 2-letter equivalent.
73
 *
74
 * This list must be in sorted order.  This list is returned directly
75
 * to the user by some API.
76
 *
77
 * This list must be kept in sync with LANGUAGES_3, with corresponding
78
 * entries matched.
79
 *
80
 * This table should be terminated with a nullptr entry, followed by a
81
 * second list, and another nullptr entry.  The first list is visible to
82
 * user code when this array is returned by API.  The second list
83
 * contains codes we support, but do not expose through user API.
84
 *
85
 * Notes
86
 *
87
 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
88
 * include the revisions up to 2001/7/27 *CWB*
89
 *
90
 * The 3 character codes are the terminology codes like RFC 3066.  This
91
 * is compatible with prior ICU codes
92
 *
93
 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
94
 * table but now at the end of the table because 3 character codes are
95
 * duplicates.  This avoids bad searches going from 3 to 2 character
96
 * codes.
97
 *
98
 * The range qaa-qtz is reserved for local use
99
 */
100
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
101
/* ISO639 table version is 20150505 */
102
/* Subsequent hand addition of selected languages */
103
constexpr const char* LANGUAGES[] = {
104
    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
105
    "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
106
    "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
107
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
108
    "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",
109
    "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
110
    "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
111
    "bgc", "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",
112
    "blo", "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",
113
    "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",
114
    "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",
115
    "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",
116
    "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",
117
    "cs",  "csb", "csw", "cu",  "cv",  "cy",
118
    "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",
119
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
120
    "dyo", "dyu", "dz",  "dzg",
121
    "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",
122
    "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",
123
    "ext",
124
    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",
125
    "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",
126
    "frs", "fur", "fy",
127
    "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
128
    "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",
129
    "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",
130
    "gur", "guz", "gv",  "gwi",
131
    "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",
132
    "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",
133
    "hup", "hy",  "hz",
134
    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",
135
    "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",
136
    "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
137
    "jv",
138
    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
139
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",
140
    "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",
141
    "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",
142
    "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",
143
    "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",
144
    "kv",  "kw",  "kxv", "ky",
145
    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",
146
    "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",
147
    "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",
148
    "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",
149
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
150
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",
151
    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
152
    "ml",  "mn",  "mnc", "mni",
153
    "moh", "mos", "mr",  "mrj",
154
    "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",
155
    "my",  "mye", "myv", "mzn",
156
    "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",
157
    "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",
158
    "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",
159
    "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",
160
    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",
161
    "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
162
    "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",
163
    "pon", "prg", "pro", "ps",  "pt",
164
    "qu",  "quc", "qug",
165
    "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",
166
    "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",
167
    "rw",  "rwk",
168
    "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",
169
    "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",
170
    "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",
171
    "sgs", "shi", "shn", "shu", "si",  "sid", "sk",
172
    "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",
173
    "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",
174
    "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",
175
    "sv",  "sw",  "swb", "syc", "syr", "szl",
176
    "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",
177
    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr",
178
    "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tok", "tpi",
179
    "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",
180
    "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",
181
    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
182
    "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vmw",
183
    "vo", "vot", "vro", "vun",
184
    "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",
185
    "xal", "xh",  "xmf", "xnr", "xog",
186
    "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",
187
    "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",
188
    "zun", "zxx", "zza",
189
nullptr,
190
    "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  /* obsolete language codes */
191
nullptr
192
};
193
194
constexpr const char* DEPRECATED_LANGUAGES[]={
195
    "in", "iw", "ji", "jw", "mo", nullptr, nullptr
196
};
197
constexpr const char* REPLACEMENT_LANGUAGES[]={
198
    "id", "he", "yi", "jv", "ro", nullptr, nullptr
199
};
200
201
/**
202
 * Table of 3-letter language codes.
203
 *
204
 * This is a lookup table used to convert 3-letter language codes to
205
 * their 2-letter equivalent, where possible.  It must be kept in sync
206
 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
207
 * same language as LANGUAGES_3[i].  The commented-out lines are
208
 * copied from LANGUAGES to make eyeballing this baby easier.
209
 *
210
 * Where a 3-letter language code has no 2-letter equivalent, the
211
 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
212
 *
213
 * This table should be terminated with a nullptr entry, followed by a
214
 * second list, and another nullptr entry.  The two lists correspond to
215
 * the two lists in LANGUAGES.
216
 */
217
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
218
/* ISO639 table version is 20150505 */
219
/* Subsequent hand addition of selected languages */
220
constexpr const char* LANGUAGES_3[] = {
221
    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
222
    "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
223
    "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
224
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
225
    "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
226
    "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
227
    "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
228
    "bgc", "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
229
    "blo", "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
230
    "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
231
    "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
232
    "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
233
    "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
234
    "ces", "csb", "csw", "chu", "chv", "cym",
235
    "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
236
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
237
    "dyo", "dyu", "dzo", "dzg",
238
    "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
239
    "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
240
    "ext",
241
    "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
242
    "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
243
    "frs", "fur", "fry",
244
    "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
245
    "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
246
    "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
247
    "gur", "guz", "glv", "gwi",
248
    "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
249
    "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
250
    "hup", "hye", "her",
251
    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
252
    "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
253
    "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
254
    "jav",
255
    "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
256
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
257
    "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
258
    "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
259
    "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
260
    "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
261
    "kom", "cor", "kxv", "kir",
262
    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
263
    "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
264
    "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
265
    "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
266
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
267
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
268
    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
269
    "mal", "mon", "mnc", "mni",
270
    "moh", "mos", "mar", "mrj",
271
    "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
272
    "mya", "mye", "myv", "mzn",
273
    "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
274
    "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
275
    "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
276
    "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
277
    "oci", "oji", "orm", "ori", "oss", "osa", "ota",
278
    "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pcm", "pdc",
279
    "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
280
    "pon", "prg", "pro", "pus", "por",
281
    "que", "quc", "qug",
282
    "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
283
    "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
284
    "kin", "rwk",
285
    "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
286
    "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
287
    "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
288
    "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
289
    "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
290
    "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
291
    "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
292
    "swe", "swa", "swb", "syc", "syr", "szl",
293
    "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
294
    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr",
295
    "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tok", "tpi",
296
    "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
297
    "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
298
    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
299
    "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vmw",
300
    "vol", "vot", "vro", "vun",
301
    "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
302
    "xal", "xho", "xmf", "xnr", "xog",
303
    "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
304
    "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
305
    "zun", "zxx", "zza",
306
nullptr,
307
/*  "in",  "iw",  "ji",  "jw",  "mo",  "sh",  "swc", "tl",  */
308
    "ind", "heb", "yid", "jaw", "mol", "srp", "swc", "tgl",
309
nullptr
310
};
311
312
/**
313
 * Table of 2-letter country codes.
314
 *
315
 * This list must be in sorted order.  This list is returned directly
316
 * to the user by some API.
317
 *
318
 * This list must be kept in sync with COUNTRIES_3, with corresponding
319
 * entries matched.
320
 *
321
 * This table should be terminated with a nullptr entry, followed by a
322
 * second list, and another nullptr entry.  The first list is visible to
323
 * user code when this array is returned by API.  The second list
324
 * contains codes we support, but do not expose through user API.
325
 *
326
 * Notes:
327
 *
328
 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
329
 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
330
 * new codes keeping the old ones for compatibility updated to include
331
 * 1999/12/03 revisions *CWB*
332
 *
333
 * RO(ROM) is now RO(ROU) according to
334
 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
335
 */
336
constexpr const char* COUNTRIES[] = {
337
    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
338
    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
339
    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
340
    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
341
    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
342
    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CQ",  "CR",
343
    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",
344
    "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",
345
    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
346
    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
347
    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
348
    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
349
    "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
350
    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
351
    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
352
    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
353
    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
354
    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
355
    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
356
    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
357
    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
358
    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
359
    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
360
    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
361
    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
362
    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
363
    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
364
    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
365
    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
366
    "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
367
nullptr,
368
    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
369
nullptr
370
};
371
372
constexpr const char* DEPRECATED_COUNTRIES[] = {
373
    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", nullptr, nullptr /* deprecated country list */
374
};
375
constexpr const char* REPLACEMENT_COUNTRIES[] = {
376
/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
377
    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", nullptr, nullptr  /* replacement country codes */
378
};
379
380
/**
381
 * Table of 3-letter country codes.
382
 *
383
 * This is a lookup table used to convert 3-letter country codes to
384
 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
385
 * For all valid i, COUNTRIES[i] must refer to the same country as
386
 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
387
 * to make eyeballing this baby easier.
388
 *
389
 * This table should be terminated with a nullptr entry, followed by a
390
 * second list, and another nullptr entry.  The two lists correspond to
391
 * the two lists in COUNTRIES.
392
 */
393
constexpr const char* COUNTRIES_3[] = {
394
/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
395
    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
396
/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
397
    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
398
/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
399
    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
400
/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
401
    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
402
/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
403
    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
404
/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CQ",  "CR",     */
405
    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRQ", "CRI",
406
/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DG",  "DJ",  "DK",     */
407
    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DGA", "DJI", "DNK",
408
/*  "DM",  "DO",  "DZ",  "EA",  "EC",  "EE",  "EG",  "EH",  "ER",     */
409
    "DMA", "DOM", "DZA", "XEA", "ECU", "EST", "EGY", "ESH", "ERI",
410
/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
411
    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
412
/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
413
    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
414
/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
415
    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
416
/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
417
    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
418
/*  "IC",  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
419
    "XIC", "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
420
/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
421
    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
422
/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
423
    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
424
/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
425
    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
426
/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
427
    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
428
/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
429
    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
430
/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
431
    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
432
/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
433
    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
434
/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
435
    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
436
/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
437
    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
438
/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
439
    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
440
/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
441
    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
442
/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
443
    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
444
/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
445
    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
446
/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
447
    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
448
/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
449
    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
450
/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
451
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
452
/*  "WS",  "XK",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
453
    "WSM", "XKK", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
454
nullptr,
455
/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
456
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
457
nullptr
458
};
459
460
typedef struct CanonicalizationMap {
461
    const char *id;          /* input ID */
462
    const char *canonicalID; /* canonicalized output ID */
463
} CanonicalizationMap;
464
465
/**
466
 * A map to canonicalize locale IDs.  This handles a variety of
467
 * different semantic kinds of transformations.
468
 */
469
constexpr CanonicalizationMap CANONICALIZE_MAP[] = {
470
    { "art__LOJBAN",    "jbo" }, /* registered name */
471
    { "hy__AREVELA",    "hy" }, /* Registered IANA variant */
472
    { "hy__AREVMDA",    "hyw" }, /* Registered IANA variant */
473
    { "zh__GUOYU",      "zh" }, /* registered name */
474
    { "zh__HAKKA",      "hak" }, /* registered name */
475
    { "zh__XIANG",      "hsn" }, /* registered name */
476
    // subtags with 3 chars won't be treated as variants.
477
    { "zh_GAN",         "gan" }, /* registered name */
478
    { "zh_MIN_NAN",     "nan" }, /* registered name */
479
    { "zh_WUU",         "wuu" }, /* registered name */
480
    { "zh_YUE",         "yue" }, /* registered name */
481
};
482
483
/* ### BCP47 Conversion *******************************************/
484
/* Gets the size of the shortest subtag in the given localeID. */
485
43.8k
int32_t getShortestSubtagLength(std::string_view localeID) {
486
43.8k
    int32_t localeIDLength = static_cast<int32_t>(localeID.length());
487
43.8k
    int32_t length = localeIDLength;
488
43.8k
    int32_t tmpLength = 0;
489
43.8k
    int32_t i;
490
43.8k
    bool reset = true;
491
492
120k
    for (i = 0; i < localeIDLength; i++) {
493
76.4k
        if (localeID[i] != '_' && localeID[i] != '-') {
494
67.9k
            if (reset) {
495
27.1k
                tmpLength = 0;
496
27.1k
                reset = false;
497
27.1k
            }
498
67.9k
            tmpLength++;
499
67.9k
        } else {
500
8.45k
            if (tmpLength != 0 && tmpLength < length) {
501
7.93k
                length = tmpLength;
502
7.93k
            }
503
8.45k
            reset = true;
504
8.45k
        }
505
76.4k
    }
506
507
43.8k
    return length;
508
43.8k
}
509
/* Test if the locale id has BCP47 u extension and does not have '@' */
510
79.2k
inline bool _hasBCP47Extension(std::string_view id) {
511
79.2k
    return id.find('@') == std::string_view::npos && getShortestSubtagLength(id) == 1;
512
79.2k
}
513
514
/* ### Keywords **************************************************/
515
5.97k
inline bool UPRV_ISDIGIT(char c) { return c >= '0' && c <= '9'; }
516
529k
inline bool UPRV_ISALPHANUM(char c) { return uprv_isASCIILetter(c) || UPRV_ISDIGIT(c); }
517
/* Punctuation/symbols allowed in legacy key values */
518
2.07k
inline bool UPRV_OK_VALUE_PUNCTUATION(char c) { return c == '_' || c == '-' || c == '+' || c == '/'; }
519
520
}  // namespace
521
522
18.2k
#define ULOC_KEYWORD_BUFFER_LEN 25
523
18.2k
#define ULOC_MAX_NO_KEYWORDS 25
524
525
U_CAPI const char * U_EXPORT2
526
89.2k
locale_getKeywordsStart(std::string_view localeID) {
527
89.2k
    if (size_t pos = localeID.find('@'); pos != std::string_view::npos) {
528
35.4k
        return localeID.data() + pos;
529
35.4k
    }
530
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
531
    else {
532
        /* We do this because the @ sign is variant, and the @ sign used on one
533
        EBCDIC machine won't be compiled the same way on other EBCDIC based
534
        machines. */
535
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
536
        const uint8_t *charToFind = ebcdicSigns;
537
        while(*charToFind) {
538
            if (size_t pos = localeID.find(*charToFind); pos != std::string_view::npos) {
539
                return localeID.data() + pos;
540
            }
541
            charToFind++;
542
        }
543
    }
544
#endif
545
53.8k
    return nullptr;
546
89.2k
}
547
548
namespace {
549
550
/**
551
 * @param keywordName incoming name to be canonicalized
552
 * @param status return status (keyword too long)
553
 * @return the keyword name
554
 */
555
CharString locale_canonKeywordName(std::string_view keywordName, UErrorCode& status)
556
40.4k
{
557
40.4k
  if (U_FAILURE(status)) { return {}; }
558
40.4k
  CharString result;
559
560
226k
  for (char c : keywordName) {
561
226k
    if (!UPRV_ISALPHANUM(c)) {
562
0
      status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
563
0
      return {};
564
0
    }
565
226k
    result.append(uprv_tolower(c), status);
566
226k
  }
567
40.4k
  if (result.isEmpty()) {
568
0
    status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
569
0
    return {};
570
0
  }
571
572
40.4k
  return result;
573
40.4k
}
574
575
typedef struct {
576
    char keyword[ULOC_KEYWORD_BUFFER_LEN];
577
    int32_t keywordLen;
578
    const char *valueStart;
579
    int32_t valueLen;
580
} KeywordStruct;
581
582
int32_t U_CALLCONV
583
0
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
584
0
    const char* leftString = static_cast<const KeywordStruct*>(left)->keyword;
585
0
    const char* rightString = static_cast<const KeywordStruct*>(right)->keyword;
586
0
    return uprv_strcmp(leftString, rightString);
587
0
}
588
589
}  // namespace
590
591
U_EXPORT CharString
592
ulocimp_getKeywords(std::string_view localeID,
593
                    char prev,
594
                    bool valuesToo,
595
                    UErrorCode& status)
596
4.63k
{
597
4.63k
    return ByteSinkUtil::viaByteSinkToCharString(
598
4.63k
        [&](ByteSink& sink, UErrorCode& status) {
599
4.63k
            ulocimp_getKeywords(localeID,
600
4.63k
                                prev,
601
4.63k
                                sink,
602
4.63k
                                valuesToo,
603
4.63k
                                status);
604
4.63k
        },
605
4.63k
        status);
606
4.63k
}
607
608
U_EXPORT void
609
ulocimp_getKeywords(std::string_view localeID,
610
                    char prev,
611
                    ByteSink& sink,
612
                    bool valuesToo,
613
                    UErrorCode& status)
614
18.2k
{
615
18.2k
    if (U_FAILURE(status)) { return; }
616
617
18.2k
    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
618
619
18.2k
    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
620
18.2k
    int32_t numKeywords = 0;
621
18.2k
    size_t equalSign = std::string_view::npos;
622
18.2k
    size_t semicolon = std::string_view::npos;
623
18.2k
    int32_t i = 0, j, n;
624
625
18.2k
    if(prev == '@') { /* start of keyword definition */
626
        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
627
18.2k
        do {
628
18.2k
            bool duplicate = false;
629
            /* skip leading spaces */
630
18.2k
            while (localeID.front() == ' ') {
631
0
                localeID.remove_prefix(1);
632
0
            }
633
18.2k
            if (localeID.empty()) { /* handle trailing "; " */
634
0
                break;
635
0
            }
636
18.2k
            if(numKeywords == maxKeywords) {
637
0
                status = U_INTERNAL_PROGRAM_ERROR;
638
0
                return;
639
0
            }
640
18.2k
            equalSign = localeID.find('=');
641
18.2k
            semicolon = localeID.find(';');
642
            /* lack of '=' [foo@currency] is illegal */
643
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
644
18.2k
            if (equalSign == std::string_view::npos ||
645
18.2k
                (semicolon != std::string_view::npos && semicolon < equalSign)) {
646
0
                status = U_INVALID_FORMAT_ERROR;
647
0
                return;
648
0
            }
649
            /* zero-length keyword is an error. */
650
18.2k
            if (equalSign == 0) {
651
0
                status = U_INVALID_FORMAT_ERROR;
652
0
                return;
653
0
            }
654
            /* need to normalize both keyword and keyword name */
655
18.2k
            if (equalSign >= ULOC_KEYWORD_BUFFER_LEN) {
656
                /* keyword name too long for internal buffer */
657
0
                status = U_INTERNAL_PROGRAM_ERROR;
658
0
                return;
659
0
            }
660
163k
            for (i = 0, n = 0; static_cast<size_t>(i) < equalSign; ++i) {
661
145k
                if (localeID[i] != ' ') {
662
145k
                    keywordList[numKeywords].keyword[n++] = uprv_tolower(localeID[i]);
663
145k
                }
664
145k
            }
665
666
18.2k
            keywordList[numKeywords].keyword[n] = 0;
667
18.2k
            keywordList[numKeywords].keywordLen = n;
668
            /* now grab the value part. First we skip the '=' */
669
18.2k
            equalSign++;
670
            /* then we leading spaces */
671
18.2k
            while (equalSign < localeID.length() && localeID[equalSign] == ' ') {
672
0
                equalSign++;
673
0
            }
674
675
            /* Premature end or zero-length value */
676
18.2k
            if (equalSign == localeID.length() || equalSign == semicolon) {
677
0
                status = U_INVALID_FORMAT_ERROR;
678
0
                return;
679
0
            }
680
681
18.2k
            keywordList[numKeywords].valueStart = localeID.data() + equalSign;
682
683
18.2k
            std::string_view value = localeID;
684
18.2k
            if (semicolon != std::string_view::npos) {
685
0
                value.remove_suffix(value.length() - semicolon);
686
0
                localeID.remove_prefix(semicolon + 1);
687
18.2k
            } else {
688
18.2k
                localeID = {};
689
18.2k
            }
690
18.2k
            value.remove_prefix(equalSign);
691
18.2k
            if (size_t last = value.find_last_not_of(' '); last != std::string_view::npos) {
692
18.2k
                value.remove_suffix(value.length() - last - 1);
693
18.2k
            }
694
18.2k
            keywordList[numKeywords].valueLen = static_cast<int32_t>(value.length());
695
696
            /* If this is a duplicate keyword, then ignore it */
697
18.2k
            for (j=0; j<numKeywords; ++j) {
698
0
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
699
0
                    duplicate = true;
700
0
                    break;
701
0
                }
702
0
            }
703
18.2k
            if (!duplicate) {
704
18.2k
                ++numKeywords;
705
18.2k
            }
706
18.2k
        } while (!localeID.empty());
707
708
        /* now we have a list of keywords */
709
        /* we need to sort it */
710
18.2k
        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, nullptr, false, &status);
711
712
        /* Now construct the keyword part */
713
36.4k
        for(i = 0; i < numKeywords; i++) {
714
18.2k
            sink.Append(keywordList[i].keyword, keywordList[i].keywordLen);
715
18.2k
            if(valuesToo) {
716
13.5k
                sink.Append("=", 1);
717
13.5k
                sink.Append(keywordList[i].valueStart, keywordList[i].valueLen);
718
13.5k
                if(i < numKeywords - 1) {
719
0
                    sink.Append(";", 1);
720
0
                }
721
13.5k
            } else {
722
4.63k
                sink.Append("\0", 1);
723
4.63k
            }
724
18.2k
        }
725
18.2k
    }
726
18.2k
}
727
728
U_CAPI int32_t U_EXPORT2
729
uloc_getKeywordValue(const char* localeID,
730
                     const char* keywordName,
731
                     char* buffer, int32_t bufferCapacity,
732
                     UErrorCode* status)
733
10.6k
{
734
10.6k
    if (U_FAILURE(*status)) { return 0; }
735
10.6k
    if (keywordName == nullptr || *keywordName == '\0') {
736
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
737
0
        return 0;
738
0
    }
739
10.6k
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
740
10.6k
        buffer, bufferCapacity,
741
10.6k
        [&](ByteSink& sink, UErrorCode& status) {
742
10.6k
            ulocimp_getKeywordValue(localeID, keywordName, sink, status);
743
10.6k
        },
744
10.6k
        *status);
745
10.6k
}
746
747
U_EXPORT CharString
748
ulocimp_getKeywordValue(const char* localeID,
749
                        std::string_view keywordName,
750
                        UErrorCode& status)
751
15.8k
{
752
15.8k
    return ByteSinkUtil::viaByteSinkToCharString(
753
15.8k
        [&](ByteSink& sink, UErrorCode& status) {
754
15.8k
            ulocimp_getKeywordValue(localeID, keywordName, sink, status);
755
15.8k
        },
756
15.8k
        status);
757
15.8k
}
758
759
U_EXPORT void
760
ulocimp_getKeywordValue(const char* localeID,
761
                        std::string_view keywordName,
762
                        icu::ByteSink& sink,
763
                        UErrorCode& status)
764
31.1k
{
765
31.1k
    if (U_FAILURE(status)) { return; }
766
767
31.1k
    if (localeID == nullptr || keywordName.empty()) {
768
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
769
0
        return;
770
0
    }
771
772
31.1k
    const char* startSearchHere = nullptr;
773
31.1k
    const char* nextSeparator = nullptr;
774
775
31.1k
    CharString tempBuffer;
776
31.1k
    const char* tmpLocaleID;
777
778
31.1k
    CharString canonKeywordName = locale_canonKeywordName(keywordName, status);
779
31.1k
    if (U_FAILURE(status)) {
780
0
      return;
781
0
    }
782
783
31.1k
    if (localeID != nullptr && _hasBCP47Extension(localeID)) {
784
0
        tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, status);
785
0
        tmpLocaleID = U_SUCCESS(status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
786
31.1k
    } else {
787
31.1k
        tmpLocaleID=localeID;
788
31.1k
    }
789
790
31.1k
    startSearchHere = locale_getKeywordsStart(tmpLocaleID);
791
31.1k
    if(startSearchHere == nullptr) {
792
        /* no keywords, return at once */
793
9.27k
        return;
794
9.27k
    }
795
796
    /* find the first keyword */
797
36.2k
    while(startSearchHere) {
798
21.8k
        const char* keyValueTail;
799
800
21.8k
        startSearchHere++; /* skip @ or ; */
801
21.8k
        nextSeparator = uprv_strchr(startSearchHere, '=');
802
21.8k
        if(!nextSeparator) {
803
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
804
0
            return;
805
0
        }
806
        /* strip leading & trailing spaces (TC decided to tolerate these) */
807
21.8k
        while(*startSearchHere == ' ') {
808
0
            startSearchHere++;
809
0
        }
810
21.8k
        keyValueTail = nextSeparator;
811
21.8k
        while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
812
0
            keyValueTail--;
813
0
        }
814
        /* now keyValueTail points to first char after the keyName */
815
        /* copy & normalize keyName from locale */
816
21.8k
        if (startSearchHere == keyValueTail) {
817
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
818
0
            return;
819
0
        }
820
21.8k
        CharString localeKeywordName;
821
196k
        while (startSearchHere < keyValueTail) {
822
174k
          if (!UPRV_ISALPHANUM(*startSearchHere)) {
823
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
824
0
            return;
825
0
          }
826
174k
          localeKeywordName.append(uprv_tolower(*startSearchHere++), status);
827
174k
        }
828
21.8k
        if (U_FAILURE(status)) {
829
0
            return;
830
0
        }
831
832
21.8k
        startSearchHere = uprv_strchr(nextSeparator, ';');
833
834
21.8k
        if (canonKeywordName == localeKeywordName) {
835
             /* current entry matches the keyword. */
836
7.49k
           nextSeparator++; /* skip '=' */
837
            /* First strip leading & trailing spaces (TC decided to tolerate these) */
838
7.49k
            while(*nextSeparator == ' ') {
839
0
              nextSeparator++;
840
0
            }
841
7.49k
            keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
842
7.49k
            while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
843
0
              keyValueTail--;
844
0
            }
845
            /* Now copy the value, but check well-formedness */
846
7.49k
            if (nextSeparator == keyValueTail) {
847
0
              status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
848
0
              return;
849
0
            }
850
65.1k
            while (nextSeparator < keyValueTail) {
851
57.6k
              if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
852
0
                status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
853
0
                return;
854
0
              }
855
              /* Should we lowercase value to return here? Tests expect as-is. */
856
57.6k
              sink.Append(nextSeparator++, 1);
857
57.6k
            }
858
7.49k
            return;
859
7.49k
        }
860
21.8k
    }
861
21.8k
}
862
863
U_CAPI int32_t U_EXPORT2
864
uloc_setKeywordValue(const char* keywordName,
865
                     const char* keywordValue,
866
                     char* buffer, int32_t bufferCapacity,
867
                     UErrorCode* status)
868
0
{
869
0
    if (U_FAILURE(*status)) { return 0; }
870
871
0
    if (keywordName == nullptr || *keywordName == 0) {
872
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
873
0
        return 0;
874
0
    }
875
876
0
    if (bufferCapacity <= 1) {
877
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
878
0
        return 0;
879
0
    }
880
881
0
    int32_t bufLen = (int32_t)uprv_strlen(buffer);
882
0
    if(bufferCapacity<bufLen) {
883
        /* The capacity is less than the length?! Is this NUL terminated? */
884
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
885
0
        return 0;
886
0
    }
887
888
0
    char* keywords = const_cast<char*>(
889
0
        locale_getKeywordsStart({buffer, static_cast<std::string_view::size_type>(bufLen)}));
890
0
    int32_t baseLen = keywords == nullptr ? bufLen : keywords - buffer;
891
    // Remove -1 from the capacity so that this function can guarantee NUL termination.
892
0
    CheckedArrayByteSink sink(keywords == nullptr ? buffer + bufLen : keywords,
893
0
                              bufferCapacity - baseLen - 1);
894
0
    int32_t reslen = ulocimp_setKeywordValue(
895
0
        keywords == nullptr ? std::string_view() : keywords,
896
0
        keywordName,
897
0
        keywordValue == nullptr ? std::string_view() : keywordValue,
898
0
        sink,
899
0
        *status);
900
901
0
    if (U_FAILURE(*status)) {
902
0
        return *status == U_BUFFER_OVERFLOW_ERROR ? reslen + baseLen : 0;
903
0
    }
904
905
    // See the documentation for this function, it's guaranteed to never
906
    // overflow the buffer but instead abort with BUFFER_OVERFLOW_ERROR.
907
    // In this case, nothing has been written to the sink, so it cannot have Overflowed().
908
0
    U_ASSERT(!sink.Overflowed());
909
0
    U_ASSERT(reslen >= 0);
910
0
    return u_terminateChars(buffer, bufferCapacity, reslen + baseLen, status);
911
0
}
912
913
U_EXPORT void
914
ulocimp_setKeywordValue(std::string_view keywordName,
915
                        std::string_view keywordValue,
916
                        CharString& localeID,
917
                        UErrorCode& status)
918
0
{
919
0
    if (U_FAILURE(status)) { return; }
920
0
    std::string_view keywords;
921
0
    if (const char* start = locale_getKeywordsStart(localeID.toStringPiece()); start != nullptr) {
922
        // This is safe because CharString::truncate() doesn't actually erase any
923
        // data, but simply sets the position for where new data will be written.
924
0
        int32_t size = start - localeID.data();
925
0
        keywords = localeID.toStringPiece();
926
0
        keywords.remove_prefix(size);
927
0
        localeID.truncate(size);
928
0
    }
929
0
    CharStringByteSink sink(&localeID);
930
0
    ulocimp_setKeywordValue(keywords, keywordName, keywordValue, sink, status);
931
0
}
932
933
U_EXPORT int32_t
934
ulocimp_setKeywordValue(std::string_view keywords,
935
                        std::string_view keywordName,
936
                        std::string_view keywordValue,
937
                        ByteSink& sink,
938
                        UErrorCode& status)
939
9.27k
{
940
9.27k
    if (U_FAILURE(status)) { return 0; }
941
942
    /* TODO: sorting. removal. */
943
9.27k
    int32_t needLen = 0;
944
9.27k
    int32_t rc;
945
9.27k
    CharString updatedKeysAndValues;
946
9.27k
    bool handledInputKeyAndValue = false;
947
9.27k
    char keyValuePrefix = '@';
948
949
9.27k
    if (status == U_STRING_NOT_TERMINATED_WARNING) {
950
0
        status = U_ZERO_ERROR;
951
0
    }
952
9.27k
    if (keywordName.empty()) {
953
0
        status = U_ILLEGAL_ARGUMENT_ERROR;
954
0
        return 0;
955
0
    }
956
9.27k
    CharString canonKeywordName = locale_canonKeywordName(keywordName, status);
957
9.27k
    if (U_FAILURE(status)) {
958
0
        return 0;
959
0
    }
960
961
9.27k
    CharString canonKeywordValue;
962
70.7k
    for (char c : keywordValue) {
963
70.7k
        if (!UPRV_ISALPHANUM(c) && !UPRV_OK_VALUE_PUNCTUATION(c)) {
964
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
965
0
            return 0;
966
0
        }
967
        /* Should we force lowercase in value to set? */
968
70.7k
        canonKeywordValue.append(c, status);
969
70.7k
    }
970
9.27k
    if (U_FAILURE(status)) {
971
0
        return 0;
972
0
    }
973
974
9.27k
    if (keywords.size() <= 1) {
975
9.27k
        if (canonKeywordValue.isEmpty()) { /* no keywords = nothing to remove */
976
0
            U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
977
0
            return 0;
978
0
        }
979
980
9.27k
        needLen = 1 + canonKeywordName.length() + 1 + canonKeywordValue.length();
981
9.27k
        int32_t capacity = 0;
982
9.27k
        char* buffer = sink.GetAppendBuffer(
983
9.27k
                needLen, needLen, nullptr, needLen, &capacity);
984
9.27k
        if (capacity < needLen || buffer == nullptr) {
985
0
            status = U_BUFFER_OVERFLOW_ERROR;
986
0
            return needLen; /* no change */
987
0
        }
988
9.27k
        char* it = buffer;
989
990
9.27k
        *it++ = '@';
991
9.27k
        uprv_memcpy(it, canonKeywordName.data(), canonKeywordName.length());
992
9.27k
        it += canonKeywordName.length();
993
9.27k
        *it++ = '=';
994
9.27k
        uprv_memcpy(it, canonKeywordValue.data(), canonKeywordValue.length());
995
9.27k
        sink.Append(buffer, needLen);
996
9.27k
        U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
997
9.27k
        return needLen;
998
9.27k
    } /* end shortcut - no @ */
999
1000
    /* search for keyword */
1001
0
    for (size_t keywordStart = 0; keywordStart != std::string_view::npos;) {
1002
0
        keywordStart++; /* skip @ or ; */
1003
0
        size_t nextEqualsign = keywords.find('=', keywordStart);
1004
0
        if (nextEqualsign == std::string_view::npos) {
1005
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
1006
0
            return 0;
1007
0
        }
1008
        /* strip leading & trailing spaces (TC decided to tolerate these) */
1009
0
        while (keywordStart < keywords.size() && keywords[keywordStart] == ' ') {
1010
0
            keywordStart++;
1011
0
        }
1012
0
        size_t keyValueTail = nextEqualsign;
1013
0
        while (keyValueTail > keywordStart && keywords[keyValueTail - 1] == ' ') {
1014
0
            keyValueTail--;
1015
0
        }
1016
        /* now keyValueTail points to first char after the keyName */
1017
        /* copy & normalize keyName from locale */
1018
0
        if (keywordStart == keyValueTail) {
1019
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
1020
0
            return 0;
1021
0
        }
1022
0
        CharString localeKeywordName;
1023
0
        while (keywordStart < keyValueTail) {
1024
0
            if (!UPRV_ISALPHANUM(keywords[keywordStart])) {
1025
0
                status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1026
0
                return 0;
1027
0
            }
1028
0
            localeKeywordName.append(uprv_tolower(keywords[keywordStart++]), status);
1029
0
        }
1030
0
        if (U_FAILURE(status)) {
1031
0
            return 0;
1032
0
        }
1033
1034
0
        size_t nextSeparator = keywords.find(';', nextEqualsign);
1035
1036
        /* start processing the value part */
1037
0
        nextEqualsign++; /* skip '=' */
1038
        /* First strip leading & trailing spaces (TC decided to tolerate these) */
1039
0
        while (nextEqualsign < keywords.size() && keywords[nextEqualsign] == ' ') {
1040
0
            nextEqualsign++;
1041
0
        }
1042
0
        keyValueTail = nextSeparator == std::string_view::npos ? keywords.size() : nextSeparator;
1043
0
        while (keyValueTail > nextEqualsign && keywords[keyValueTail - 1] == ' ') {
1044
0
            keyValueTail--;
1045
0
        }
1046
0
        if (nextEqualsign == keyValueTail) {
1047
0
            status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1048
0
            return 0;
1049
0
        }
1050
1051
0
        rc = uprv_strcmp(canonKeywordName.data(), localeKeywordName.data());
1052
0
        if(rc == 0) {
1053
            /* Current entry matches the input keyword. Update the entry */
1054
0
            if (!canonKeywordValue.isEmpty()) { /* updating a value */
1055
0
                updatedKeysAndValues.append(keyValuePrefix, status);
1056
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1057
0
                updatedKeysAndValues.append(canonKeywordName, status);
1058
0
                updatedKeysAndValues.append('=', status);
1059
0
                updatedKeysAndValues.append(canonKeywordValue, status);
1060
0
            } /* else removing this entry, don't emit anything */
1061
0
            handledInputKeyAndValue = true;
1062
0
        } else {
1063
           /* input keyword sorts earlier than current entry, add before current entry */
1064
0
            if (rc < 0 && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
1065
                /* insert new entry at this location */
1066
0
                updatedKeysAndValues.append(keyValuePrefix, status);
1067
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1068
0
                updatedKeysAndValues.append(canonKeywordName, status);
1069
0
                updatedKeysAndValues.append('=', status);
1070
0
                updatedKeysAndValues.append(canonKeywordValue, status);
1071
0
                handledInputKeyAndValue = true;
1072
0
            }
1073
            /* copy the current entry */
1074
0
            updatedKeysAndValues.append(keyValuePrefix, status);
1075
0
            keyValuePrefix = ';'; /* for any subsequent key-value pair */
1076
0
            updatedKeysAndValues.append(localeKeywordName, status);
1077
0
            updatedKeysAndValues.append('=', status);
1078
0
            updatedKeysAndValues.append(keywords.data() + nextEqualsign,
1079
0
                                        static_cast<int32_t>(keyValueTail - nextEqualsign), status);
1080
0
        }
1081
0
        if (nextSeparator == std::string_view::npos && !canonKeywordValue.isEmpty() && !handledInputKeyAndValue) {
1082
            /* append new entry at the end, it sorts later than existing entries */
1083
0
            updatedKeysAndValues.append(keyValuePrefix, status);
1084
            /* skip keyValuePrefix update, no subsequent key-value pair */
1085
0
            updatedKeysAndValues.append(canonKeywordName, status);
1086
0
            updatedKeysAndValues.append('=', status);
1087
0
            updatedKeysAndValues.append(canonKeywordValue, status);
1088
0
            handledInputKeyAndValue = true;
1089
0
        }
1090
0
        keywordStart = nextSeparator;
1091
0
    } /* end loop searching */
1092
1093
    /* Any error from updatedKeysAndValues.append above would be internal and not due to
1094
     * problems with the passed-in locale. So if we did encounter problems with the
1095
     * passed-in locale above, those errors took precedence and overrode any error
1096
     * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1097
     * are errors here they are from updatedKeysAndValues.append; they do cause an
1098
     * error return but the passed-in locale is unmodified and the original bufLen is
1099
     * returned.
1100
     */
1101
0
    if (!handledInputKeyAndValue || U_FAILURE(status)) {
1102
        /* if input key/value specified removal of a keyword not present in locale, or
1103
         * there was an error in CharString.append, leave original locale alone. */
1104
0
        U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
1105
        // The sink is expected to be a buffer which already contains the full
1106
        // locale string, so when it isn't going to be modified there's no need
1107
        // to actually write any data to it, as the data is already there. Only
1108
        // the first character needs to be overwritten (changing '\0' to '@').
1109
0
        needLen = static_cast<int32_t>(keywords.size());
1110
0
        int32_t capacity = 0;
1111
0
        char* buffer = sink.GetAppendBuffer(
1112
0
                needLen, needLen, nullptr, needLen, &capacity);
1113
0
        if (capacity < needLen || buffer == nullptr) {
1114
0
            status = U_BUFFER_OVERFLOW_ERROR;
1115
0
        } else {
1116
0
            *buffer = '@';
1117
0
            sink.Append(buffer, needLen);
1118
0
        }
1119
0
        return needLen;
1120
0
    }
1121
1122
0
    needLen = updatedKeysAndValues.length();
1123
    // Check to see can we fit the updatedKeysAndValues, if not, return
1124
    // U_BUFFER_OVERFLOW_ERROR without copy updatedKeysAndValues into it.
1125
    // We do this because this API function does not behave like most others:
1126
    // It promises never to set a U_STRING_NOT_TERMINATED_WARNING.
1127
    // When the contents fits but without the terminating NUL, in this case we need to not change
1128
    // the buffer contents and return with a buffer overflow error.
1129
0
    if (needLen > 0) {
1130
0
        int32_t capacity = 0;
1131
0
        char* buffer = sink.GetAppendBuffer(
1132
0
                needLen, needLen, nullptr, needLen, &capacity);
1133
0
        if (capacity < needLen || buffer == nullptr) {
1134
0
            status = U_BUFFER_OVERFLOW_ERROR;
1135
0
            return needLen;
1136
0
        }
1137
0
        uprv_memcpy(buffer, updatedKeysAndValues.data(), needLen);
1138
0
        sink.Append(buffer, needLen);
1139
0
    }
1140
0
    U_ASSERT(status != U_STRING_NOT_TERMINATED_WARNING);
1141
0
    return needLen;
1142
0
}
1143
1144
/* ### ID parsing implementation **************************************************/
1145
1146
namespace {
1147
1148
36.4k
inline bool _isPrefixLetter(char a) { return a == 'x' || a == 'X' || a == 'i' || a == 'I'; }
1149
1150
/*returns true if one of the special prefixes is here (s=string)
1151
  'x-' or 'i-' */
1152
45.6k
inline bool _isIDPrefix(std::string_view s) {
1153
45.6k
    return s.size() >= 2 && _isPrefixLetter(s[0]) && _isIDSeparator(s[1]);
1154
45.6k
}
1155
1156
/* Dot terminates it because of POSIX form  where dot precedes the codepage
1157
 * except for variant
1158
 */
1159
266k
inline bool _isTerminator(char a) { return a == '.' || a == '@'; }
1160
1161
18.4k
inline bool _isBCP47Extension(std::string_view p) {
1162
18.4k
    return p.size() >= 3 &&
1163
18.4k
           p[0] == '-' &&
1164
18.4k
           (p[1] == 't' || p[1] == 'T' ||
1165
11
            p[1] == 'u' || p[1] == 'U' ||
1166
11
            p[1] == 'x' || p[1] == 'X') &&
1167
18.4k
           p[2] == '-';
1168
18.4k
}
1169
1170
/**
1171
 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1172
 * a nullptr entry, followed by more entries, and a second nullptr entry.
1173
 *
1174
 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1175
 * COUNTRIES_3.
1176
 */
1177
std::optional<int16_t> _findIndex(const char* const* list, const char* key)
1178
8.36k
{
1179
8.36k
    const char* const* anchor = list;
1180
8.36k
    int32_t pass = 0;
1181
1182
    /* Make two passes through two nullptr-terminated arrays at 'list' */
1183
8.89k
    while (pass++ < 2) {
1184
2.72M
        while (*list) {
1185
2.72M
            if (uprv_strcmp(key, *list) == 0) {
1186
8.09k
                return static_cast<int16_t>(list - anchor);
1187
8.09k
            }
1188
2.72M
            list++;
1189
2.72M
        }
1190
528
        ++list;     /* skip final nullptr *CWB*/
1191
528
    }
1192
264
    return std::nullopt;
1193
8.36k
}
1194
1195
}  // namespace
1196
1197
U_CFUNC const char*
1198
0
uloc_getCurrentCountryID(const char* oldID){
1199
0
    std::optional<int16_t> offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1200
0
    return offset.has_value() ? REPLACEMENT_COUNTRIES[*offset] : oldID;
1201
0
}
1202
U_CFUNC const char*
1203
0
uloc_getCurrentLanguageID(const char* oldID){
1204
0
    std::optional<int16_t> offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1205
0
    return offset.has_value() ? REPLACEMENT_LANGUAGES[*offset] : oldID;
1206
0
}
1207
1208
namespace {
1209
1210
/*
1211
 * the internal functions _getLanguage(), _getScript(), _getRegion(), _getVariant()
1212
 * avoid duplicating code to handle the earlier locale ID pieces
1213
 * in the functions for the later ones by
1214
 * setting the *pEnd pointer to where they stopped parsing
1215
 *
1216
 * TODO try to use this in Locale
1217
 */
1218
1219
45.6k
size_t _getLanguage(std::string_view localeID, ByteSink* sink, UErrorCode& status) {
1220
45.6k
    size_t skip = 0;
1221
45.6k
    if (localeID.size() == 4 && uprv_strnicmp(localeID.data(), "root", 4) == 0) {
1222
0
        skip = 4;
1223
0
        localeID.remove_prefix(skip);
1224
45.6k
    } else if (localeID.size() >= 3 && uprv_strnicmp(localeID.data(), "und", 3) == 0 &&
1225
45.6k
               (localeID.size() == 3 ||
1226
9.27k
                localeID[3] == '-' ||
1227
9.27k
                localeID[3] == '_' ||
1228
9.27k
                localeID[3] == '@')) {
1229
9.27k
        skip = 3;
1230
9.27k
        localeID.remove_prefix(skip);
1231
9.27k
    }
1232
1233
45.6k
    constexpr int32_t MAXLEN = ULOC_LANG_CAPACITY - 1;  // Minus NUL.
1234
1235
    /* if it starts with i- or x- then copy that prefix */
1236
45.6k
    size_t len = _isIDPrefix(localeID) ? 2 : 0;
1237
127k
    while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
1238
82.0k
        if (len == MAXLEN) {
1239
0
            status = U_ILLEGAL_ARGUMENT_ERROR;
1240
0
            return 0;
1241
0
        }
1242
82.0k
        len++;
1243
82.0k
    }
1244
1245
45.6k
    if (sink == nullptr || len == 0) { return skip + len; }
1246
1247
27.9k
    int32_t minCapacity = uprv_max(static_cast<int32_t>(len), 4);  // Minimum 3 letters plus NUL.
1248
27.9k
    char scratch[MAXLEN];
1249
27.9k
    int32_t capacity = 0;
1250
27.9k
    char* buffer = sink->GetAppendBuffer(
1251
27.9k
            minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);
1252
1253
91.9k
    for (size_t i = 0; i < len; ++i) {
1254
63.9k
        buffer[i] = uprv_tolower(localeID[i]);
1255
63.9k
    }
1256
27.9k
    if (localeID.size() >= 2 && _isIDSeparator(localeID[1])) {
1257
0
        buffer[1] = '-';
1258
0
    }
1259
1260
27.9k
    if (len == 3) {
1261
        /* convert 3 character code to 2 character code if possible *CWB*/
1262
8.09k
        U_ASSERT(capacity >= 4);
1263
8.09k
        buffer[3] = '\0';
1264
8.09k
        std::optional<int16_t> offset = _findIndex(LANGUAGES_3, buffer);
1265
8.09k
        if (offset.has_value()) {
1266
8.09k
            const char* const alias = LANGUAGES[*offset];
1267
8.09k
            sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
1268
8.09k
            return skip + len;
1269
8.09k
        }
1270
8.09k
    }
1271
1272
19.8k
    sink->Append(buffer, static_cast<int32_t>(len));
1273
19.8k
    return skip + len;
1274
27.9k
}
1275
1276
25.8k
size_t _getScript(std::string_view localeID, ByteSink* sink) {
1277
25.8k
    constexpr int32_t LENGTH = 4;
1278
1279
25.8k
    size_t len = 0;
1280
98.0k
    while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len]) &&
1281
98.0k
            uprv_isASCIILetter(localeID[len])) {
1282
72.2k
        if (len == LENGTH) { return 0; }
1283
72.2k
        len++;
1284
72.2k
    }
1285
25.8k
    if (len != LENGTH) { return 0; }
1286
1287
10.5k
    if (sink == nullptr) { return len; }
1288
1289
4.62k
    char scratch[LENGTH];
1290
4.62k
    int32_t capacity = 0;
1291
4.62k
    char* buffer = sink->GetAppendBuffer(
1292
4.62k
            LENGTH, LENGTH, scratch, UPRV_LENGTHOF(scratch), &capacity);
1293
1294
4.62k
    buffer[0] = uprv_toupper(localeID[0]);
1295
18.5k
    for (int32_t i = 1; i < LENGTH; ++i) {
1296
13.8k
        buffer[i] = uprv_tolower(localeID[i]);
1297
13.8k
    }
1298
1299
4.62k
    sink->Append(buffer, LENGTH);
1300
4.62k
    return len;
1301
10.5k
}
1302
1303
23.6k
size_t _getRegion(std::string_view localeID, ByteSink* sink) {
1304
23.6k
    constexpr int32_t MINLEN = 2;
1305
23.6k
    constexpr int32_t MAXLEN = ULOC_COUNTRY_CAPACITY - 1;  // Minus NUL.
1306
1307
23.6k
    size_t len = 0;
1308
71.2k
    while (len < localeID.size() && !_isTerminator(localeID[len]) && !_isIDSeparator(localeID[len])) {
1309
47.5k
        if (len == MAXLEN) { return 0; }
1310
47.5k
        len++;
1311
47.5k
    }
1312
23.6k
    if (len < MINLEN) { return 0; }
1313
1314
23.6k
    if (sink == nullptr) { return len; }
1315
1316
23.6k
    char scratch[ULOC_COUNTRY_CAPACITY];
1317
23.6k
    int32_t capacity = 0;
1318
23.6k
    char* buffer = sink->GetAppendBuffer(
1319
23.6k
            ULOC_COUNTRY_CAPACITY,
1320
23.6k
            ULOC_COUNTRY_CAPACITY,
1321
23.6k
            scratch,
1322
23.6k
            UPRV_LENGTHOF(scratch),
1323
23.6k
            &capacity);
1324
1325
71.0k
    for (size_t i = 0; i < len; ++i) {
1326
47.4k
        buffer[i] = uprv_toupper(localeID[i]);
1327
47.4k
    }
1328
1329
23.6k
    if (len == 3) {
1330
        /* convert 3 character code to 2 character code if possible *CWB*/
1331
264
        U_ASSERT(capacity >= 4);
1332
264
        buffer[3] = '\0';
1333
264
        std::optional<int16_t> offset = _findIndex(COUNTRIES_3, buffer);
1334
264
        if (offset.has_value()) {
1335
0
            const char* const alias = COUNTRIES[*offset];
1336
0
            sink->Append(alias, static_cast<int32_t>(uprv_strlen(alias)));
1337
0
            return len;
1338
0
        }
1339
264
    }
1340
1341
23.6k
    sink->Append(buffer, static_cast<int32_t>(len));
1342
23.6k
    return len;
1343
23.6k
}
1344
1345
/**
1346
 * @param needSeparator if true, then add leading '_' if any variants
1347
 * are added to 'variant'
1348
 */
1349
size_t
1350
_getVariant(std::string_view localeID,
1351
            char prev,
1352
            ByteSink* sink,
1353
            bool needSeparator,
1354
59
            UErrorCode& status) {
1355
59
    if (U_FAILURE(status) || localeID.empty()) return 0;
1356
1357
    // Reasonable upper limit for variants
1358
    // There are no strict limitation of the syntax of variant in the legacy
1359
    // locale format. If the locale is constructed from unicode_locale_id
1360
    // as defined in UTS35, then we know each unicode_variant_subtag
1361
    // could have max length of 8 ((alphanum{5,8} | digit alphanum{3})
1362
    // 179 would allow 20 unicode_variant_subtag with sep in the
1363
    // unicode_locale_id
1364
    // 8*20 + 1*(20-1) = 179
1365
59
    constexpr int32_t MAX_VARIANTS_LENGTH = 179;
1366
1367
    /* get one or more variant tags and separate them with '_' */
1368
59
    size_t index = 0;
1369
59
    if (_isIDSeparator(prev)) {
1370
        /* get a variant string after a '-' or '_' */
1371
59
        for (std::string_view sub = localeID;;) {
1372
59
            size_t next = sub.find_first_of(".@_-");
1373
            // For historical reasons, a trailing separator is included in the variant.
1374
59
            bool finished = next == std::string_view::npos || next + 1 == sub.length();
1375
59
            size_t limit = finished ? sub.length() : next;
1376
59
            index += limit;
1377
59
            if (index > MAX_VARIANTS_LENGTH) {
1378
0
                status = U_ILLEGAL_ARGUMENT_ERROR;
1379
0
                return 0;
1380
0
            }
1381
1382
59
            if (sink != nullptr) {
1383
59
                if (needSeparator) {
1384
0
                    sink->Append("_", 1);
1385
59
                } else {
1386
59
                    needSeparator = true;
1387
59
                }
1388
1389
59
                int32_t length = static_cast<int32_t>(limit);
1390
59
                int32_t minCapacity = uprv_min(length, MAX_VARIANTS_LENGTH);
1391
59
                char scratch[MAX_VARIANTS_LENGTH];
1392
59
                int32_t capacity = 0;
1393
59
                char* buffer = sink->GetAppendBuffer(
1394
59
                        minCapacity, minCapacity, scratch, UPRV_LENGTHOF(scratch), &capacity);
1395
1396
354
                for (size_t i = 0; i < limit; ++i) {
1397
295
                    buffer[i] = uprv_toupper(sub[i]);
1398
295
                }
1399
59
                sink->Append(buffer, length);
1400
59
            }
1401
1402
59
            if (finished) { return index; }
1403
36
            sub.remove_prefix(next);
1404
36
            if (_isTerminator(sub.front()) || _isBCP47Extension(sub)) { return index; }
1405
0
            sub.remove_prefix(1);
1406
0
            index++;
1407
0
        }
1408
59
    }
1409
1410
0
    size_t skip = 0;
1411
    /* if there is no variant tag after a '-' or '_' then look for '@' */
1412
0
    if (prev == '@') {
1413
        /* keep localeID */
1414
0
    } else if (const char* p = locale_getKeywordsStart(localeID); p != nullptr) {
1415
0
        skip = 1 + p - localeID.data(); /* point after the '@' */
1416
0
        localeID.remove_prefix(skip);
1417
0
    } else {
1418
0
        return 0;
1419
0
    }
1420
0
    for (; index < localeID.size() && !_isTerminator(localeID[index]); index++) {
1421
0
        if (index >= MAX_VARIANTS_LENGTH) { // same as length > MAX_VARIANTS_LENGTH
1422
0
            status = U_ILLEGAL_ARGUMENT_ERROR;
1423
0
            return 0;
1424
0
        }
1425
0
        if (needSeparator) {
1426
0
            if (sink != nullptr) {
1427
0
                sink->Append("_", 1);
1428
0
            }
1429
0
            needSeparator = false;
1430
0
        }
1431
0
        if (sink != nullptr) {
1432
0
            char c = uprv_toupper(localeID[index]);
1433
0
            if (c == '-' || c == ',') c = '_';
1434
0
            sink->Append(&c, 1);
1435
0
        }
1436
0
    }
1437
0
    return skip + index;
1438
0
}
1439
1440
}  // namespace
1441
1442
U_EXPORT CharString
1443
0
ulocimp_getLanguage(std::string_view localeID, UErrorCode& status) {
1444
0
    return ByteSinkUtil::viaByteSinkToCharString(
1445
0
        [&](ByteSink& sink, UErrorCode& status) {
1446
0
            ulocimp_getSubtags(
1447
0
                    localeID,
1448
0
                    &sink,
1449
0
                    nullptr,
1450
0
                    nullptr,
1451
0
                    nullptr,
1452
0
                    nullptr,
1453
0
                    status);
1454
0
        },
1455
0
        status);
1456
0
}
1457
1458
U_EXPORT CharString
1459
0
ulocimp_getScript(std::string_view localeID, UErrorCode& status) {
1460
0
    return ByteSinkUtil::viaByteSinkToCharString(
1461
0
        [&](ByteSink& sink, UErrorCode& status) {
1462
0
            ulocimp_getSubtags(
1463
0
                    localeID,
1464
0
                    nullptr,
1465
0
                    &sink,
1466
0
                    nullptr,
1467
0
                    nullptr,
1468
0
                    nullptr,
1469
0
                    status);
1470
0
        },
1471
0
        status);
1472
0
}
1473
1474
U_EXPORT CharString
1475
13.0k
ulocimp_getRegion(std::string_view localeID, UErrorCode& status) {
1476
13.0k
    return ByteSinkUtil::viaByteSinkToCharString(
1477
13.0k
        [&](ByteSink& sink, UErrorCode& status) {
1478
13.0k
            ulocimp_getSubtags(
1479
13.0k
                    localeID,
1480
13.0k
                    nullptr,
1481
13.0k
                    nullptr,
1482
13.0k
                    &sink,
1483
13.0k
                    nullptr,
1484
13.0k
                    nullptr,
1485
13.0k
                    status);
1486
13.0k
        },
1487
13.0k
        status);
1488
13.0k
}
1489
1490
U_EXPORT CharString
1491
0
ulocimp_getVariant(std::string_view localeID, UErrorCode& status) {
1492
0
    return ByteSinkUtil::viaByteSinkToCharString(
1493
0
        [&](ByteSink& sink, UErrorCode& status) {
1494
0
            ulocimp_getSubtags(
1495
0
                    localeID,
1496
0
                    nullptr,
1497
0
                    nullptr,
1498
0
                    nullptr,
1499
0
                    &sink,
1500
0
                    nullptr,
1501
0
                    status);
1502
0
        },
1503
0
        status);
1504
0
}
1505
1506
U_EXPORT void
1507
ulocimp_getSubtags(
1508
        std::string_view localeID,
1509
        CharString* language,
1510
        CharString* script,
1511
        CharString* region,
1512
        CharString* variant,
1513
        const char** pEnd,
1514
57.7k
        UErrorCode& status) {
1515
57.7k
    if (U_FAILURE(status)) { return; }
1516
1517
57.7k
    std::optional<CharStringByteSink> languageSink;
1518
57.7k
    std::optional<CharStringByteSink> scriptSink;
1519
57.7k
    std::optional<CharStringByteSink> regionSink;
1520
57.7k
    std::optional<CharStringByteSink> variantSink;
1521
1522
57.7k
    if (language != nullptr) { languageSink.emplace(language); }
1523
57.7k
    if (script != nullptr) { scriptSink.emplace(script); }
1524
57.7k
    if (region != nullptr) { regionSink.emplace(region); }
1525
57.7k
    if (variant != nullptr) { variantSink.emplace(variant); }
1526
1527
57.7k
    ulocimp_getSubtags(
1528
57.7k
            localeID,
1529
57.7k
            languageSink.has_value() ? &*languageSink : nullptr,
1530
57.7k
            scriptSink.has_value() ? &*scriptSink : nullptr,
1531
57.7k
            regionSink.has_value() ? &*regionSink : nullptr,
1532
57.7k
            variantSink.has_value() ? &*variantSink : nullptr,
1533
57.7k
            pEnd,
1534
57.7k
            status);
1535
57.7k
}
1536
1537
U_EXPORT void
1538
ulocimp_getSubtags(
1539
        std::string_view localeID,
1540
        ByteSink* language,
1541
        ByteSink* script,
1542
        ByteSink* region,
1543
        ByteSink* variant,
1544
        const char** pEnd,
1545
70.8k
        UErrorCode& status) {
1546
70.8k
    if (U_FAILURE(status)) { return; }
1547
1548
70.8k
    if (pEnd != nullptr) {
1549
57.6k
        *pEnd = localeID.data();
1550
57.6k
    } else if (language == nullptr &&
1551
13.2k
               script == nullptr &&
1552
13.2k
               region == nullptr &&
1553
13.2k
               variant == nullptr) {
1554
0
        return;
1555
0
    }
1556
1557
70.8k
    if (localeID.empty()) { return; }
1558
1559
45.6k
    bool hasRegion = false;
1560
1561
45.6k
    {
1562
45.6k
        size_t len = _getLanguage(localeID, language, status);
1563
45.6k
        if (U_FAILURE(status)) { return; }
1564
45.6k
        if (len > 0) {
1565
45.6k
            localeID.remove_prefix(len);
1566
45.6k
        }
1567
45.6k
    }
1568
1569
45.6k
    if (pEnd != nullptr) {
1570
37.1k
        *pEnd = localeID.data();
1571
37.1k
    } else if (script == nullptr &&
1572
8.57k
               region == nullptr &&
1573
8.57k
               variant == nullptr) {
1574
0
        return;
1575
0
    }
1576
1577
45.6k
    if (localeID.empty()) { return; }
1578
1579
34.9k
    if (_isIDSeparator(localeID.front())) {
1580
25.8k
        std::string_view sub = localeID;
1581
25.8k
        sub.remove_prefix(1);
1582
25.8k
        size_t len = _getScript(sub, script);
1583
25.8k
        if (len > 0) {
1584
10.5k
            localeID.remove_prefix(len + 1);
1585
10.5k
            if (pEnd != nullptr) { *pEnd = localeID.data(); }
1586
10.5k
        }
1587
25.8k
    }
1588
1589
34.9k
    if ((region == nullptr && variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }
1590
1591
34.5k
    if (_isIDSeparator(localeID.front())) {
1592
23.6k
        std::string_view sub = localeID;
1593
23.6k
        sub.remove_prefix(1);
1594
23.6k
        size_t len = _getRegion(sub, region);
1595
23.6k
        if (len > 0) {
1596
23.6k
            hasRegion = true;
1597
23.6k
            localeID.remove_prefix(len + 1);
1598
23.6k
            if (pEnd != nullptr) { *pEnd = localeID.data(); }
1599
23.6k
        }
1600
23.6k
    }
1601
1602
34.5k
    if ((variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }
1603
1604
18.4k
    bool hasVariant = false;
1605
1606
18.4k
    if (_isIDSeparator(localeID.front()) && !_isBCP47Extension(localeID)) {
1607
59
        std::string_view sub = localeID;
1608
        /* If there was no country ID, skip a possible extra IDSeparator */
1609
59
        size_t skip = !hasRegion && localeID.size() > 1 && _isIDSeparator(localeID[1]) ? 2 : 1;
1610
59
        sub.remove_prefix(skip);
1611
59
        size_t len = _getVariant(sub, localeID[0], variant, false, status);
1612
59
        if (U_FAILURE(status)) { return; }
1613
59
        if (len > 0) {
1614
59
            hasVariant = true;
1615
59
            localeID.remove_prefix(skip + len);
1616
59
            if (pEnd != nullptr) { *pEnd = localeID.data(); }
1617
59
        }
1618
59
    }
1619
1620
18.4k
    if ((variant == nullptr && pEnd == nullptr) || localeID.empty()) { return; }
1621
1622
18.4k
    if (_isBCP47Extension(localeID)) {
1623
0
        localeID.remove_prefix(2);
1624
0
        constexpr char vaposix[] = "-va-posix";
1625
0
        constexpr size_t length = sizeof vaposix - 1;
1626
0
        for (size_t next;; localeID.remove_prefix(next)) {
1627
0
            next = localeID.find('-', 1);
1628
0
            if (next == std::string_view::npos) { break; }
1629
0
            next = localeID.find('-', next + 1);
1630
0
            bool finished = next == std::string_view::npos;
1631
0
            std::string_view sub = localeID;
1632
0
            if (!finished) { sub.remove_suffix(sub.length() - next); }
1633
1634
0
            if (sub.length() == length && uprv_strnicmp(sub.data(), vaposix, length) == 0) {
1635
0
                if (variant != nullptr) {
1636
0
                    if (hasVariant) { variant->Append("_", 1); }
1637
0
                    constexpr char posix[] = "POSIX";
1638
0
                    variant->Append(posix, sizeof posix - 1);
1639
0
                }
1640
0
                if (pEnd != nullptr) { *pEnd = localeID.data() + length; }
1641
0
            }
1642
1643
0
            if (finished) { break; }
1644
0
        }
1645
0
    }
1646
18.4k
}
1647
1648
/* Keyword enumeration */
1649
1650
typedef struct UKeywordsContext {
1651
    char* keywords;
1652
    char* current;
1653
} UKeywordsContext;
1654
1655
U_CDECL_BEGIN
1656
1657
static void U_CALLCONV
1658
0
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1659
0
    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1660
0
    uprv_free(enumerator->context);
1661
0
    uprv_free(enumerator);
1662
0
}
1663
1664
static int32_t U_CALLCONV
1665
0
uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1666
0
    char *kw = ((UKeywordsContext *)en->context)->keywords;
1667
0
    int32_t result = 0;
1668
0
    while(*kw) {
1669
0
        result++;
1670
0
        kw += uprv_strlen(kw)+1;
1671
0
    }
1672
0
    return result;
1673
0
}
1674
1675
static const char * U_CALLCONV
1676
uloc_kw_nextKeyword(UEnumeration* en,
1677
                    int32_t* resultLength,
1678
0
                    UErrorCode* /*status*/) {
1679
0
    const char* result = ((UKeywordsContext *)en->context)->current;
1680
0
    int32_t len = 0;
1681
0
    if(*result) {
1682
0
        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1683
0
        ((UKeywordsContext *)en->context)->current += len+1;
1684
0
    } else {
1685
0
        result = nullptr;
1686
0
    }
1687
0
    if (resultLength) {
1688
0
        *resultLength = len;
1689
0
    }
1690
0
    return result;
1691
0
}
1692
1693
static void U_CALLCONV
1694
uloc_kw_resetKeywords(UEnumeration* en,
1695
0
                      UErrorCode* /*status*/) {
1696
0
    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1697
0
}
1698
1699
U_CDECL_END
1700
1701
1702
static const UEnumeration gKeywordsEnum = {
1703
    nullptr,
1704
    nullptr,
1705
    uloc_kw_closeKeywords,
1706
    uloc_kw_countKeywords,
1707
    uenum_unextDefault,
1708
    uloc_kw_nextKeyword,
1709
    uloc_kw_resetKeywords
1710
};
1711
1712
U_CAPI UEnumeration* U_EXPORT2
1713
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1714
0
{
1715
0
    if (U_FAILURE(*status)) { return nullptr; }
1716
1717
0
    LocalMemory<UKeywordsContext> myContext;
1718
0
    LocalMemory<UEnumeration> result;
1719
1720
0
    myContext.adoptInstead(static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext))));
1721
0
    result.adoptInstead(static_cast<UEnumeration *>(uprv_malloc(sizeof(UEnumeration))));
1722
0
    if (myContext.isNull() || result.isNull()) {
1723
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1724
0
        return nullptr;
1725
0
    }
1726
0
    uprv_memcpy(result.getAlias(), &gKeywordsEnum, sizeof(UEnumeration));
1727
0
    myContext->keywords = static_cast<char *>(uprv_malloc(keywordListSize+1));
1728
0
    if (myContext->keywords == nullptr) {
1729
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1730
0
        return nullptr;
1731
0
    }
1732
0
    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1733
0
    myContext->keywords[keywordListSize] = 0;
1734
0
    myContext->current = myContext->keywords;
1735
0
    result->context = myContext.orphan();
1736
0
    return result.orphan();
1737
0
}
1738
1739
U_CAPI UEnumeration* U_EXPORT2
1740
uloc_openKeywords(const char* localeID,
1741
                        UErrorCode* status)
1742
0
{
1743
0
    if(status==nullptr || U_FAILURE(*status)) {
1744
0
        return nullptr;
1745
0
    }
1746
1747
0
    CharString tempBuffer;
1748
0
    const char* tmpLocaleID;
1749
1750
0
    if (localeID != nullptr && _hasBCP47Extension(localeID)) {
1751
0
        tempBuffer = ulocimp_forLanguageTag(localeID, -1, nullptr, *status);
1752
0
        tmpLocaleID = U_SUCCESS(*status) && !tempBuffer.isEmpty() ? tempBuffer.data() : localeID;
1753
0
    } else {
1754
0
        if (localeID==nullptr) {
1755
0
            localeID=uloc_getDefault();
1756
0
        }
1757
0
        tmpLocaleID=localeID;
1758
0
    }
1759
1760
0
    ulocimp_getSubtags(
1761
0
            tmpLocaleID,
1762
0
            nullptr,
1763
0
            nullptr,
1764
0
            nullptr,
1765
0
            nullptr,
1766
0
            &tmpLocaleID,
1767
0
            *status);
1768
0
    if (U_FAILURE(*status)) {
1769
0
        return nullptr;
1770
0
    }
1771
1772
    /* keywords are located after '@' */
1773
0
    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != nullptr) {
1774
0
        CharString keywords = ulocimp_getKeywords(tmpLocaleID + 1, '@', false, *status);
1775
0
        if (U_FAILURE(*status)) {
1776
0
            return nullptr;
1777
0
        }
1778
0
        return uloc_openKeywordList(keywords.data(), keywords.length(), status);
1779
0
    }
1780
0
    return nullptr;
1781
0
}
1782
1783
1784
/* bit-flags for 'options' parameter of _canonicalize */
1785
51.0k
#define _ULOC_STRIP_KEYWORDS 0x2
1786
157k
#define _ULOC_CANONICALIZE   0x1
1787
1788
namespace {
1789
1790
192k
inline bool OPTION_SET(uint32_t options, uint32_t mask) { return (options & mask) != 0; }
1791
1792
constexpr char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1793
constexpr int32_t I_DEFAULT_LENGTH = UPRV_LENGTHOF(i_default);
1794
1795
/**
1796
 * Canonicalize the given localeID, to level 1 or to level 2,
1797
 * depending on the options.  To specify level 1, pass in options=0.
1798
 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1799
 *
1800
 * This is the code underlying uloc_getName and uloc_canonicalize.
1801
 */
1802
void
1803
_canonicalize(std::string_view localeID,
1804
              ByteSink& sink,
1805
              uint32_t options,
1806
48.1k
              UErrorCode& err) {
1807
48.1k
    if (U_FAILURE(err)) {
1808
0
        return;
1809
0
    }
1810
1811
48.1k
    int32_t j, fieldCount=0;
1812
48.1k
    CharString tempBuffer;  // if localeID has a BCP47 extension, tmpLocaleID points to this
1813
48.1k
    CharString localeIDWithHyphens;  // if localeID has a BPC47 extension and have _, tmpLocaleID points to this
1814
48.1k
    std::string_view origLocaleID;
1815
48.1k
    std::string_view tmpLocaleID;
1816
48.1k
    size_t keywordAssign = std::string_view::npos;
1817
48.1k
    size_t separatorIndicator = std::string_view::npos;
1818
1819
48.1k
    if (_hasBCP47Extension(localeID)) {
1820
0
        std::string_view localeIDPtr = localeID;
1821
1822
        // convert all underbars to hyphens, unless the "BCP47 extension" comes at the beginning of the string
1823
0
        if (localeID.size() >= 2 && localeID.find('_') != std::string_view::npos && localeID[1] != '-' && localeID[1] != '_') {
1824
0
            localeIDWithHyphens.append(localeID, err);
1825
0
            if (U_SUCCESS(err)) {
1826
0
                for (char* p = localeIDWithHyphens.data(); *p != '\0'; ++p) {
1827
0
                    if (*p == '_') {
1828
0
                        *p = '-';
1829
0
                    }
1830
0
                }
1831
0
                localeIDPtr = localeIDWithHyphens.toStringPiece();
1832
0
            }
1833
0
        }
1834
1835
0
        tempBuffer = ulocimp_forLanguageTag(localeIDPtr.data(), static_cast<int32_t>(localeIDPtr.size()), nullptr, err);
1836
0
        tmpLocaleID = U_SUCCESS(err) && !tempBuffer.isEmpty() ? static_cast<std::string_view>(tempBuffer.toStringPiece()) : localeIDPtr;
1837
48.1k
    } else {
1838
48.1k
        tmpLocaleID=localeID;
1839
48.1k
    }
1840
1841
48.1k
    origLocaleID=tmpLocaleID;
1842
1843
    /* get all pieces, one after another, and separate with '_' */
1844
48.1k
    CharString tag;
1845
48.1k
    CharString script;
1846
48.1k
    CharString country;
1847
48.1k
    CharString variant;
1848
48.1k
    const char* end = nullptr;
1849
48.1k
    ulocimp_getSubtags(
1850
48.1k
            tmpLocaleID,
1851
48.1k
            &tag,
1852
48.1k
            &script,
1853
48.1k
            &country,
1854
48.1k
            &variant,
1855
48.1k
            &end,
1856
48.1k
            err);
1857
48.1k
    if (U_FAILURE(err)) {
1858
0
        return;
1859
0
    }
1860
48.1k
    U_ASSERT(end != nullptr);
1861
48.1k
    if (end > tmpLocaleID.data()) {
1862
32.2k
        tmpLocaleID.remove_prefix(end - tmpLocaleID.data());
1863
32.2k
    }
1864
1865
48.1k
    if (tag.length() == I_DEFAULT_LENGTH && origLocaleID.length() >= I_DEFAULT_LENGTH &&
1866
48.1k
            uprv_strncmp(origLocaleID.data(), i_default, I_DEFAULT_LENGTH) == 0) {
1867
0
        tag.clear();
1868
0
        tag.append(uloc_getDefault(), err);
1869
48.1k
    } else {
1870
48.1k
        if (!script.isEmpty()) {
1871
3.76k
            ++fieldCount;
1872
3.76k
            tag.append('_', err);
1873
3.76k
            tag.append(script, err);
1874
3.76k
        }
1875
48.1k
        if (!country.isEmpty()) {
1876
13.9k
            ++fieldCount;
1877
13.9k
            tag.append('_', err);
1878
13.9k
            tag.append(country, err);
1879
13.9k
        }
1880
48.1k
        if (!variant.isEmpty()) {
1881
50
            ++fieldCount;
1882
50
            if (country.isEmpty()) {
1883
9
                tag.append('_', err);
1884
9
            }
1885
50
            tag.append('_', err);
1886
50
            tag.append(variant, err);
1887
50
        }
1888
48.1k
    }
1889
1890
    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1891
48.1k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && !tmpLocaleID.empty() && tmpLocaleID.front() == '.') {
1892
0
        tag.append('.', err);
1893
0
        tmpLocaleID.remove_prefix(1);
1894
0
        size_t length;
1895
0
        if (size_t atPos = tmpLocaleID.find('@'); atPos != std::string_view::npos) {
1896
0
            length = atPos;
1897
0
        } else {
1898
0
            length = tmpLocaleID.length();
1899
0
        }
1900
        // The longest charset name we found in IANA charset registry
1901
        // https://www.iana.org/assignments/character-sets/ is
1902
        // "Extended_UNIX_Code_Packed_Format_for_Japanese" in length 45.
1903
        // we therefore restrict the length here to be 64 which is a power of 2
1904
        // number that is longer than 45.
1905
0
        constexpr size_t kMaxCharsetLength = 64;
1906
0
        if (length > kMaxCharsetLength) {
1907
0
           err = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1908
0
           return;
1909
0
        }
1910
0
        if (length > 0) {
1911
0
            tag.append(tmpLocaleID.data(), static_cast<int32_t>(length), err);
1912
0
            tmpLocaleID.remove_prefix(length);
1913
0
        }
1914
0
    }
1915
1916
    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1917
       After this, tmpLocaleID either starts at '@' or is empty. */
1918
48.1k
    if (const char* start = locale_getKeywordsStart(tmpLocaleID); start != nullptr) {
1919
13.5k
        if (start > tmpLocaleID.data()) {
1920
0
            tmpLocaleID.remove_prefix(start - tmpLocaleID.data());
1921
0
        }
1922
13.5k
        keywordAssign = tmpLocaleID.find('=');
1923
13.5k
        separatorIndicator = tmpLocaleID.find(';');
1924
34.5k
    } else {
1925
34.5k
        tmpLocaleID = {};
1926
34.5k
    }
1927
1928
    /* Copy POSIX-style variant, if any [mr@FOO] */
1929
48.1k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1930
48.1k
        !tmpLocaleID.empty() && keywordAssign == std::string_view::npos) {
1931
0
        tag.append(tmpLocaleID, err);
1932
0
        tmpLocaleID = {};
1933
0
    }
1934
1935
48.1k
    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1936
        /* Handle @FOO variant if @ is present and not followed by = */
1937
13.2k
        if (!tmpLocaleID.empty() && keywordAssign == std::string_view::npos) {
1938
            /* Add missing '_' if needed */
1939
0
            if (fieldCount < 2 || (fieldCount < 3 && !script.isEmpty())) {
1940
0
                do {
1941
0
                    tag.append('_', err);
1942
0
                    ++fieldCount;
1943
0
                } while(fieldCount<2);
1944
0
            }
1945
1946
0
            CharStringByteSink s(&tag);
1947
0
            std::string_view sub = tmpLocaleID;
1948
0
            sub.remove_prefix(1);
1949
0
            _getVariant(sub, '@', &s, !variant.isEmpty(), err);
1950
0
            if (U_FAILURE(err)) { return; }
1951
0
        }
1952
1953
        /* Look up the ID in the canonicalization map */
1954
145k
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1955
132k
            StringPiece id(CANONICALIZE_MAP[j].id);
1956
132k
            if (tag == id) {
1957
0
                if (id.empty() && !tmpLocaleID.empty()) {
1958
0
                    break; /* Don't remap "" if keywords present */
1959
0
                }
1960
0
                tag.clear();
1961
0
                tag.append(CANONICALIZE_MAP[j].canonicalID, err);
1962
0
                break;
1963
0
            }
1964
132k
        }
1965
13.2k
    }
1966
1967
48.1k
    sink.Append(tag.data(), tag.length());
1968
1969
48.1k
    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1970
45.3k
        if (!tmpLocaleID.empty() && keywordAssign != std::string_view::npos &&
1971
45.3k
            (separatorIndicator == std::string_view::npos || separatorIndicator > keywordAssign)) {
1972
13.5k
            sink.Append("@", 1);
1973
13.5k
            ++fieldCount;
1974
13.5k
            tmpLocaleID.remove_prefix(1);
1975
13.5k
            ulocimp_getKeywords(tmpLocaleID, '@', sink, true, err);
1976
13.5k
        }
1977
45.3k
    }
1978
48.1k
}
1979
1980
}  // namespace
1981
1982
/* ### ID parsing API **************************************************/
1983
1984
U_CAPI int32_t  U_EXPORT2
1985
uloc_getParent(const char*    localeID,
1986
               char* parent,
1987
               int32_t parentCapacity,
1988
               UErrorCode* err)
1989
0
{
1990
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
1991
0
        parent, parentCapacity,
1992
0
        [&](ByteSink& sink, UErrorCode& status) {
1993
0
            ulocimp_getParent(localeID, sink, status);
1994
0
        },
1995
0
        *err);
1996
0
}
1997
1998
U_EXPORT CharString
1999
ulocimp_getParent(const char* localeID,
2000
                  UErrorCode& err)
2001
0
{
2002
0
    return ByteSinkUtil::viaByteSinkToCharString(
2003
0
        [&](ByteSink& sink, UErrorCode& status) {
2004
0
            ulocimp_getParent(localeID, sink, status);
2005
0
        },
2006
0
        err);
2007
0
}
2008
2009
U_EXPORT void
2010
ulocimp_getParent(const char* localeID,
2011
                  icu::ByteSink& sink,
2012
                  UErrorCode& err)
2013
0
{
2014
0
    if (U_FAILURE(err)) { return; }
2015
2016
0
    const char *lastUnderscore;
2017
0
    int32_t i;
2018
2019
0
    if (localeID == nullptr)
2020
0
        localeID = uloc_getDefault();
2021
2022
0
    lastUnderscore=uprv_strrchr(localeID, '_');
2023
0
    if(lastUnderscore!=nullptr) {
2024
0
        i = static_cast<int32_t>(lastUnderscore - localeID);
2025
0
    } else {
2026
0
        i=0;
2027
0
    }
2028
2029
0
    if (i > 0) {
2030
0
        if (uprv_strnicmp(localeID, "und_", 4) == 0) {
2031
0
            localeID += 3;
2032
0
            i -= 3;
2033
0
        }
2034
0
        sink.Append(localeID, i);
2035
0
    }
2036
0
}
2037
2038
U_CAPI int32_t U_EXPORT2
2039
uloc_getLanguage(const char*    localeID,
2040
         char* language,
2041
         int32_t languageCapacity,
2042
         UErrorCode* err)
2043
0
{
2044
0
    if (localeID == nullptr) {
2045
0
        localeID = uloc_getDefault();
2046
0
    }
2047
2048
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
2049
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2050
0
        language, languageCapacity,
2051
0
        [&](ByteSink& sink, UErrorCode& status) {
2052
0
            ulocimp_getSubtags(
2053
0
                    localeID,
2054
0
                    &sink,
2055
0
                    nullptr,
2056
0
                    nullptr,
2057
0
                    nullptr,
2058
0
                    nullptr,
2059
0
                    status);
2060
0
        },
2061
0
        *err);
2062
0
}
2063
2064
U_CAPI int32_t U_EXPORT2
2065
uloc_getScript(const char*    localeID,
2066
         char* script,
2067
         int32_t scriptCapacity,
2068
         UErrorCode* err)
2069
0
{
2070
0
    if (localeID == nullptr) {
2071
0
        localeID = uloc_getDefault();
2072
0
    }
2073
2074
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2075
0
        script, scriptCapacity,
2076
0
        [&](ByteSink& sink, UErrorCode& status) {
2077
0
            ulocimp_getSubtags(
2078
0
                    localeID,
2079
0
                    nullptr,
2080
0
                    &sink,
2081
0
                    nullptr,
2082
0
                    nullptr,
2083
0
                    nullptr,
2084
0
                    status);
2085
0
        },
2086
0
        *err);
2087
0
}
2088
2089
U_CAPI int32_t  U_EXPORT2
2090
uloc_getCountry(const char* localeID,
2091
            char* country,
2092
            int32_t countryCapacity,
2093
            UErrorCode* err)
2094
0
{
2095
0
    if (localeID == nullptr) {
2096
0
        localeID = uloc_getDefault();
2097
0
    }
2098
2099
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2100
0
        country, countryCapacity,
2101
0
        [&](ByteSink& sink, UErrorCode& status) {
2102
0
            ulocimp_getSubtags(
2103
0
                    localeID,
2104
0
                    nullptr,
2105
0
                    nullptr,
2106
0
                    &sink,
2107
0
                    nullptr,
2108
0
                    nullptr,
2109
0
                    status);
2110
0
        },
2111
0
        *err);
2112
0
}
2113
2114
U_CAPI int32_t  U_EXPORT2
2115
uloc_getVariant(const char* localeID,
2116
                char* variant,
2117
                int32_t variantCapacity,
2118
                UErrorCode* err)
2119
0
{
2120
0
    if (localeID == nullptr) {
2121
0
        localeID = uloc_getDefault();
2122
0
    }
2123
2124
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2125
0
        variant, variantCapacity,
2126
0
        [&](ByteSink& sink, UErrorCode& status) {
2127
0
            ulocimp_getSubtags(
2128
0
                    localeID,
2129
0
                    nullptr,
2130
0
                    nullptr,
2131
0
                    nullptr,
2132
0
                    &sink,
2133
0
                    nullptr,
2134
0
                    status);
2135
0
        },
2136
0
        *err);
2137
0
}
2138
2139
U_CAPI int32_t  U_EXPORT2
2140
uloc_getName(const char* localeID,
2141
             char* name,
2142
             int32_t nameCapacity,
2143
             UErrorCode* err)
2144
0
{
2145
0
    if (localeID == nullptr) {
2146
0
        localeID = uloc_getDefault();
2147
0
    }
2148
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2149
0
        name, nameCapacity,
2150
0
        [&](ByteSink& sink, UErrorCode& status) {
2151
0
            ulocimp_getName(localeID, sink, status);
2152
0
        },
2153
0
        *err);
2154
0
}
2155
2156
U_EXPORT CharString
2157
ulocimp_getName(std::string_view localeID,
2158
                UErrorCode& err)
2159
0
{
2160
0
    return ByteSinkUtil::viaByteSinkToCharString(
2161
0
        [&](ByteSink& sink, UErrorCode& status) {
2162
0
            ulocimp_getName(localeID, sink, status);
2163
0
        },
2164
0
        err);
2165
0
}
2166
2167
U_EXPORT void
2168
ulocimp_getName(std::string_view localeID,
2169
                ByteSink& sink,
2170
                UErrorCode& err)
2171
32.0k
{
2172
32.0k
    _canonicalize(localeID, sink, 0, err);
2173
32.0k
}
2174
2175
U_CAPI int32_t  U_EXPORT2
2176
uloc_getBaseName(const char* localeID,
2177
                 char* name,
2178
                 int32_t nameCapacity,
2179
                 UErrorCode* err)
2180
0
{
2181
0
    if (localeID == nullptr) {
2182
0
        localeID = uloc_getDefault();
2183
0
    }
2184
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2185
0
        name, nameCapacity,
2186
0
        [&](ByteSink& sink, UErrorCode& status) {
2187
0
            ulocimp_getBaseName(localeID, sink, status);
2188
0
        },
2189
0
        *err);
2190
0
}
2191
2192
U_EXPORT CharString
2193
ulocimp_getBaseName(std::string_view localeID,
2194
                    UErrorCode& err)
2195
2.85k
{
2196
2.85k
    return ByteSinkUtil::viaByteSinkToCharString(
2197
2.85k
        [&](ByteSink& sink, UErrorCode& status) {
2198
2.85k
            ulocimp_getBaseName(localeID, sink, status);
2199
2.85k
        },
2200
2.85k
        err);
2201
2.85k
}
2202
2203
U_EXPORT void
2204
ulocimp_getBaseName(std::string_view localeID,
2205
                    ByteSink& sink,
2206
                    UErrorCode& err)
2207
2.85k
{
2208
2.85k
    _canonicalize(localeID, sink, _ULOC_STRIP_KEYWORDS, err);
2209
2.85k
}
2210
2211
U_CAPI int32_t  U_EXPORT2
2212
uloc_canonicalize(const char* localeID,
2213
                  char* name,
2214
                  int32_t nameCapacity,
2215
                  UErrorCode* err)
2216
0
{
2217
0
    if (localeID == nullptr) {
2218
0
        localeID = uloc_getDefault();
2219
0
    }
2220
0
    return ByteSinkUtil::viaByteSinkToTerminatedChars(
2221
0
        name, nameCapacity,
2222
0
        [&](ByteSink& sink, UErrorCode& status) {
2223
0
            ulocimp_canonicalize(localeID, sink, status);
2224
0
        },
2225
0
        *err);
2226
0
}
2227
2228
U_EXPORT CharString
2229
ulocimp_canonicalize(std::string_view localeID,
2230
                     UErrorCode& err)
2231
12.3k
{
2232
12.3k
    return ByteSinkUtil::viaByteSinkToCharString(
2233
12.3k
        [&](ByteSink& sink, UErrorCode& status) {
2234
12.3k
            ulocimp_canonicalize(localeID, sink, status);
2235
12.3k
        },
2236
12.3k
        err);
2237
12.3k
}
2238
2239
U_EXPORT void
2240
ulocimp_canonicalize(std::string_view localeID,
2241
                     ByteSink& sink,
2242
                     UErrorCode& err)
2243
13.2k
{
2244
13.2k
    _canonicalize(localeID, sink, _ULOC_CANONICALIZE, err);
2245
13.2k
}
2246
2247
U_CAPI const char*  U_EXPORT2
2248
uloc_getISO3Language(const char* localeID)
2249
0
{
2250
0
    UErrorCode err = U_ZERO_ERROR;
2251
2252
0
    if (localeID == nullptr)
2253
0
    {
2254
0
        localeID = uloc_getDefault();
2255
0
    }
2256
0
    CharString lang = ulocimp_getLanguage(localeID, err);
2257
0
    if (U_FAILURE(err))
2258
0
        return "";
2259
0
    std::optional<int16_t> offset = _findIndex(LANGUAGES, lang.data());
2260
0
    return offset.has_value() ? LANGUAGES_3[*offset] : "";
2261
0
}
2262
2263
U_CAPI const char*  U_EXPORT2
2264
uloc_getISO3Country(const char* localeID)
2265
0
{
2266
0
    UErrorCode err = U_ZERO_ERROR;
2267
2268
0
    if (localeID == nullptr)
2269
0
    {
2270
0
        localeID = uloc_getDefault();
2271
0
    }
2272
0
    CharString cntry = ulocimp_getRegion(localeID, err);
2273
0
    if (U_FAILURE(err))
2274
0
        return "";
2275
0
    std::optional<int16_t> offset = _findIndex(COUNTRIES, cntry.data());
2276
0
    return offset.has_value() ? COUNTRIES_3[*offset] : "";
2277
0
}
2278
2279
U_CAPI uint32_t  U_EXPORT2
2280
uloc_getLCID(const char* localeID)
2281
0
{
2282
0
    UErrorCode status = U_ZERO_ERROR;
2283
0
    uint32_t   lcid = 0;
2284
2285
    /* Check for incomplete id. */
2286
0
    if (!localeID || uprv_strlen(localeID) < 2) {
2287
0
        return 0;
2288
0
    }
2289
2290
    // First, attempt Windows platform lookup if available, but fall
2291
    // through to catch any special cases (ICU vs Windows name differences).
2292
0
    lcid = uprv_convertToLCIDPlatform(localeID, &status);
2293
0
    if (U_FAILURE(status)) {
2294
0
        return 0;
2295
0
    }
2296
0
    if (lcid > 0) {
2297
        // Windows found an LCID, return that
2298
0
        return lcid;
2299
0
    }
2300
2301
0
    CharString langID = ulocimp_getLanguage(localeID, status);
2302
0
    if (U_FAILURE(status)) {
2303
0
        return 0;
2304
0
    }
2305
2306
0
    if (uprv_strchr(localeID, '@')) {
2307
        // uprv_convertToLCID does not support keywords other than collation.
2308
        // Remove all keywords except collation.
2309
0
        CharString collVal = ulocimp_getKeywordValue(localeID, "collation", status);
2310
0
        if (U_SUCCESS(status) && !collVal.isEmpty()) {
2311
0
            CharString tmpLocaleID = ulocimp_getBaseName(localeID, status);
2312
0
            ulocimp_setKeywordValue("collation", collVal.toStringPiece(), tmpLocaleID, status);
2313
0
            if (U_SUCCESS(status)) {
2314
0
                return uprv_convertToLCID(langID.data(), tmpLocaleID.data(), &status);
2315
0
            }
2316
0
        }
2317
2318
        // fall through - all keywords are simply ignored
2319
0
        status = U_ZERO_ERROR;
2320
0
    }
2321
2322
0
    return uprv_convertToLCID(langID.data(), localeID, &status);
2323
0
}
2324
2325
U_CAPI int32_t U_EXPORT2
2326
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2327
                UErrorCode *status)
2328
0
{
2329
0
    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2330
0
}
2331
2332
/* ### Default locale **************************************************/
2333
2334
U_CAPI const char*  U_EXPORT2
2335
uloc_getDefault()
2336
5.72k
{
2337
5.72k
    return locale_get_default();
2338
5.72k
}
2339
2340
U_CAPI void  U_EXPORT2
2341
uloc_setDefault(const char*   newDefaultLocale,
2342
             UErrorCode* err)
2343
0
{
2344
0
    if (U_FAILURE(*err))
2345
0
        return;
2346
    /* the error code isn't currently used for anything by this function*/
2347
2348
    /* propagate change to C++ */
2349
0
    locale_set_default(newDefaultLocale);
2350
0
}
2351
2352
/**
2353
 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2354
 * to an array of pointers to arrays of char.  All of these pointers are owned
2355
 * by ICU-- do not delete them, and do not write through them.  The array is
2356
 * terminated with a null pointer.
2357
 */
2358
U_CAPI const char* const*  U_EXPORT2
2359
uloc_getISOLanguages()
2360
0
{
2361
0
    return LANGUAGES;
2362
0
}
2363
2364
/**
2365
 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2366
 * pointer to an array of pointers to arrays of char.  All of these pointers are
2367
 * owned by ICU-- do not delete them, and do not write through them.  The array is
2368
 * terminated with a null pointer.
2369
 */
2370
U_CAPI const char* const*  U_EXPORT2
2371
uloc_getISOCountries()
2372
0
{
2373
0
    return COUNTRIES;
2374
0
}
2375
2376
U_CAPI const char* U_EXPORT2
2377
uloc_toUnicodeLocaleKey(const char* keyword)
2378
0
{
2379
0
    if (keyword == nullptr || *keyword == '\0') { return nullptr; }
2380
0
    std::optional<std::string_view> result = ulocimp_toBcpKeyWithFallback(keyword);
2381
0
    return result.has_value() ? result->data() : nullptr;  // Known to be NUL terminated.
2382
0
}
2383
2384
U_EXPORT std::optional<std::string_view>
2385
ulocimp_toBcpKeyWithFallback(std::string_view keyword)
2386
4.63k
{
2387
4.63k
    std::optional<std::string_view> bcpKey = ulocimp_toBcpKey(keyword);
2388
4.63k
    if (!bcpKey.has_value() &&
2389
4.63k
        ultag_isUnicodeLocaleKey(keyword.data(), static_cast<int32_t>(keyword.size()))) {
2390
        // unknown keyword, but syntax is fine..
2391
0
        return keyword;
2392
0
    }
2393
4.63k
    return bcpKey;
2394
4.63k
}
2395
2396
U_CAPI const char* U_EXPORT2
2397
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2398
4.63k
{
2399
4.63k
    if (keyword == nullptr || *keyword == '\0' ||
2400
4.63k
        value == nullptr || *value == '\0') { return nullptr; }
2401
4.63k
    std::optional<std::string_view> result = ulocimp_toBcpTypeWithFallback(keyword, value);
2402
4.63k
    return result.has_value() ? result->data() : nullptr;  // Known to be NUL terminated.
2403
4.63k
}
2404
2405
U_EXPORT std::optional<std::string_view>
2406
ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value)
2407
9.27k
{
2408
9.27k
    std::optional<std::string_view> bcpType = ulocimp_toBcpType(keyword, value);
2409
9.27k
    if (!bcpType.has_value() &&
2410
9.27k
        ultag_isUnicodeLocaleType(value.data(), static_cast<int32_t>(value.size()))) {
2411
        // unknown keyword, but syntax is fine..
2412
0
        return value;
2413
0
    }
2414
9.27k
    return bcpType;
2415
9.27k
}
2416
2417
namespace {
2418
2419
bool
2420
isWellFormedLegacyKey(std::string_view key)
2421
0
{
2422
0
    return std::all_of(key.begin(), key.end(), UPRV_ISALPHANUM);
2423
0
}
2424
2425
bool
2426
isWellFormedLegacyType(std::string_view legacyType)
2427
0
{
2428
0
    int32_t alphaNumLen = 0;
2429
0
    for (char c : legacyType) {
2430
0
        if (c == '_' || c == '/' || c == '-') {
2431
0
            if (alphaNumLen == 0) {
2432
0
                return false;
2433
0
            }
2434
0
            alphaNumLen = 0;
2435
0
        } else if (UPRV_ISALPHANUM(c)) {
2436
0
            alphaNumLen++;
2437
0
        } else {
2438
0
            return false;
2439
0
        }
2440
0
    }
2441
0
    return alphaNumLen != 0;
2442
0
}
2443
2444
}  // namespace
2445
2446
U_CAPI const char* U_EXPORT2
2447
uloc_toLegacyKey(const char* keyword)
2448
0
{
2449
0
    if (keyword == nullptr || *keyword == '\0') { return nullptr; }
2450
0
    std::optional<std::string_view> result = ulocimp_toLegacyKeyWithFallback(keyword);
2451
0
    return result.has_value() ? result->data() : nullptr;  // Known to be NUL terminated.
2452
0
}
2453
2454
U_EXPORT std::optional<std::string_view>
2455
ulocimp_toLegacyKeyWithFallback(std::string_view keyword)
2456
4.63k
{
2457
4.63k
    std::optional<std::string_view> legacyKey = ulocimp_toLegacyKey(keyword);
2458
4.63k
    if (!legacyKey.has_value() && isWellFormedLegacyKey(keyword)) {
2459
        // Checks if the specified locale key is well-formed with the legacy locale syntax.
2460
        //
2461
        // Note:
2462
        //  LDML/CLDR provides some definition of keyword syntax in
2463
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2464
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2465
        //  Keys can only consist of [0-9a-zA-Z].
2466
0
        return keyword;
2467
0
    }
2468
4.63k
    return legacyKey;
2469
4.63k
}
2470
2471
U_CAPI const char* U_EXPORT2
2472
uloc_toLegacyType(const char* keyword, const char* value)
2473
0
{
2474
0
    if (keyword == nullptr || *keyword == '\0' ||
2475
0
        value == nullptr || *value == '\0') { return nullptr; }
2476
0
    std::optional<std::string_view> result = ulocimp_toLegacyTypeWithFallback(keyword, value);
2477
0
    return result.has_value() ? result->data() : nullptr;  // Known to be NUL terminated.
2478
0
}
2479
2480
U_EXPORT std::optional<std::string_view>
2481
ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value)
2482
4.63k
{
2483
4.63k
    std::optional<std::string_view> legacyType = ulocimp_toLegacyType(keyword, value);
2484
4.63k
    if (!legacyType.has_value() && isWellFormedLegacyType(value)) {
2485
        // Checks if the specified locale type is well-formed with the legacy locale syntax.
2486
        //
2487
        // Note:
2488
        //  LDML/CLDR provides some definition of keyword syntax in
2489
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2490
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2491
        //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2492
        //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2493
0
        return value;
2494
0
    }
2495
4.63k
    return legacyType;
2496
4.63k
}
2497
2498
/*eof*/