Coverage Report

Created: 2023-06-07 07:17

/src/icu/source/common/uloc.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 1997-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*
9
* File ULOC.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   04/01/97    aliu        Creation.
15
*   08/21/98    stephen     JDK 1.2 sync
16
*   12/08/98    rtg         New Locale implementation and C API
17
*   03/15/99    damiba      overhaul.
18
*   04/06/99    stephen     changed setDefault() to realloc and copy
19
*   06/14/99    stephen     Changed calls to ures_open for new params
20
*   07/21/99    stephen     Modified setDefault() to propagate to C++
21
*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
22
*                           brought canonicalization code into line with spec
23
*****************************************************************************/
24
25
/*
26
   POSIX's locale format, from putil.c: [no spaces]
27
28
     ll [ _CC ] [ . MM ] [ @ VV]
29
30
     l = lang, C = ctry, M = charmap, V = variant
31
*/
32
33
#include "unicode/utypes.h"
34
#include "unicode/ustring.h"
35
#include "unicode/uloc.h"
36
37
#include "putilimp.h"
38
#include "ustr_imp.h"
39
#include "ulocimp.h"
40
#include "umutex.h"
41
#include "cstring.h"
42
#include "cmemory.h"
43
#include "locmap.h"
44
#include "uarrsort.h"
45
#include "uenumimp.h"
46
#include "uassert.h"
47
#include "charstr.h"
48
49
#include <stdio.h> /* for sprintf */
50
51
U_NAMESPACE_USE
52
53
/* ### Declarations **************************************************/
54
55
/* Locale stuff from locid.cpp */
56
U_CFUNC void locale_set_default(const char *id);
57
U_CFUNC const char *locale_get_default(void);
58
U_CFUNC int32_t
59
locale_getKeywords(const char *localeID,
60
            char prev,
61
            char *keywords, int32_t keywordCapacity,
62
            char *values, int32_t valuesCapacity, int32_t *valLen,
63
            UBool valuesToo,
64
            UErrorCode *status);
65
66
/* ### Data tables **************************************************/
67
68
/**
69
 * Table of language codes, both 2- and 3-letter, with preference
70
 * given to 2-letter codes where possible.  Includes 3-letter codes
71
 * that lack a 2-letter equivalent.
72
 *
73
 * This list must be in sorted order.  This list is returned directly
74
 * to the user by some API.
75
 *
76
 * This list must be kept in sync with LANGUAGES_3, with corresponding
77
 * entries matched.
78
 *
79
 * This table should be terminated with a NULL entry, followed by a
80
 * second list, and another NULL entry.  The first list is visible to
81
 * user code when this array is returned by API.  The second list
82
 * contains codes we support, but do not expose through user API.
83
 *
84
 * Notes
85
 *
86
 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87
 * include the revisions up to 2001/7/27 *CWB*
88
 *
89
 * The 3 character codes are the terminology codes like RFC 3066.  This
90
 * is compatible with prior ICU codes
91
 *
92
 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93
 * table but now at the end of the table because 3 character codes are
94
 * duplicates.  This avoids bad searches going from 3 to 2 character
95
 * codes.
96
 *
97
 * The range qaa-qtz is reserved for local use
98
 */
99
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
100
/* ISO639 table version is 20150505 */
101
static const char * const LANGUAGES[] = {
102
    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
103
    "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
104
    "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
105
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
106
    "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",
107
    "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
108
    "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
109
    "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",
110
    "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",
111
    "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",
112
    "ca",  "cad", "car", "cay", "cch", "ce",  "ceb", "cgg",
113
    "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",
114
    "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",
115
    "cs",  "csb", "cu",  "cv",  "cy",
116
    "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",
117
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
118
    "dyo", "dyu", "dz",  "dzg",
119
    "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",
120
    "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",
121
    "ext",
122
    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",
123
    "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",
124
    "frs", "fur", "fy",
125
    "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
126
    "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",
127
    "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",
128
    "gur", "guz", "gv",  "gwi",
129
    "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",
130
    "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",
131
    "hup", "hy",  "hz",
132
    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",
133
    "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",
134
    "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
135
    "jv",
136
    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
137
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",
138
    "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",
139
    "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",
140
    "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",
141
    "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",
142
    "kv",  "kw",  "ky",
143
    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",
144
    "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",
145
    "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",
146
    "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",
147
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
148
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",
149
    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
150
    "ml",  "mn",  "mnc", "mni", "moh", "mos", "mr",  "mrj",
151
    "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",
152
    "my",  "mye", "myv", "mzn",
153
    "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",
154
    "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",
155
    "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",
156
    "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",
157
    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",
158
    "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
159
    "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",
160
    "pon", "prg", "pro", "ps",  "pt",
161
    "qu",  "quc", "qug",
162
    "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",
163
    "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",
164
    "rw",  "rwk",
165
    "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",
166
    "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",
167
    "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",
168
    "sgs", "shi", "shn", "shu", "si",  "sid", "sk",
169
    "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",
170
    "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",
171
    "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",
172
    "sv",  "sw",  "swb", "swc", "syc", "syr", "szl",
173
    "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",
174
    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr", "tl",
175
    "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tpi",
176
    "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",
177
    "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",
178
    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
179
    "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vo",
180
    "vot", "vro", "vun",
181
    "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",
182
    "xal", "xh",  "xmf", "xog",
183
    "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",
184
    "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",
185
    "zun", "zxx", "zza",
186
NULL,
187
    "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
188
NULL
189
};
190
191
static const char* const DEPRECATED_LANGUAGES[]={
192
    "in", "iw", "ji", "jw", NULL, NULL
193
};
194
static const char* const REPLACEMENT_LANGUAGES[]={
195
    "id", "he", "yi", "jv", NULL, NULL
196
};
197
198
/**
199
 * Table of 3-letter language codes.
200
 *
201
 * This is a lookup table used to convert 3-letter language codes to
202
 * their 2-letter equivalent, where possible.  It must be kept in sync
203
 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
204
 * same language as LANGUAGES_3[i].  The commented-out lines are
205
 * copied from LANGUAGES to make eyeballing this baby easier.
206
 *
207
 * Where a 3-letter language code has no 2-letter equivalent, the
208
 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
209
 *
210
 * This table should be terminated with a NULL entry, followed by a
211
 * second list, and another NULL entry.  The two lists correspond to
212
 * the two lists in LANGUAGES.
213
 */
214
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
215
/* ISO639 table version is 20150505 */
216
static const char * const LANGUAGES_3[] = {
217
    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
218
    "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
219
    "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
220
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
221
    "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
222
    "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
223
    "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
224
    "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
225
    "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
226
    "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
227
    "cat", "cad", "car", "cay", "cch", "che", "ceb", "cgg",
228
    "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
229
    "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
230
    "ces", "csb", "chu", "chv", "cym",
231
    "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
232
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
233
    "dyo", "dyu", "dzo", "dzg",
234
    "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
235
    "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
236
    "ext",
237
    "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
238
    "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
239
    "frs", "fur", "fry",
240
    "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
241
    "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
242
    "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
243
    "gur", "guz", "glv", "gwi",
244
    "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
245
    "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
246
    "hup", "hye", "her",
247
    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
248
    "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
249
    "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
250
    "jav",
251
    "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
252
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
253
    "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
254
    "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
255
    "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
256
    "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
257
    "kom", "cor", "kir",
258
    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
259
    "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
260
    "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
261
    "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
262
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
263
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
264
    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
265
    "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
266
    "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
267
    "mya", "mye", "myv", "mzn",
268
    "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
269
    "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
270
    "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
271
    "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
272
    "oci", "oji", "orm", "ori", "oss", "osa", "ota",
273
    "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
274
    "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
275
    "pon", "prg", "pro", "pus", "por",
276
    "que", "quc", "qug",
277
    "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
278
    "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
279
    "kin", "rwk",
280
    "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
281
    "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
282
    "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
283
    "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
284
    "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
285
    "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
286
    "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
287
    "swe", "swa", "swb", "swc", "syc", "syr", "szl",
288
    "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
289
    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
290
    "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
291
    "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
292
    "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
293
    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
294
    "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
295
    "vot", "vro", "vun",
296
    "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
297
    "xal", "xho", "xmf", "xog",
298
    "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
299
    "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
300
    "zun", "zxx", "zza",
301
NULL,
302
/*  "in",  "iw",  "ji",  "jw",  "sh",                          */
303
    "ind", "heb", "yid", "jaw", "srp",
304
NULL
305
};
306
307
/**
308
 * Table of 2-letter country codes.
309
 *
310
 * This list must be in sorted order.  This list is returned directly
311
 * to the user by some API.
312
 *
313
 * This list must be kept in sync with COUNTRIES_3, with corresponding
314
 * entries matched.
315
 *
316
 * This table should be terminated with a NULL entry, followed by a
317
 * second list, and another NULL entry.  The first list is visible to
318
 * user code when this array is returned by API.  The second list
319
 * contains codes we support, but do not expose through user API.
320
 *
321
 * Notes:
322
 *
323
 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
324
 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
325
 * new codes keeping the old ones for compatibility updated to include
326
 * 1999/12/03 revisions *CWB*
327
 *
328
 * RO(ROM) is now RO(ROU) according to
329
 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
330
 */
331
static const char * const COUNTRIES[] = {
332
    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
333
    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
334
    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
335
    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
336
    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
337
    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
338
    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
339
    "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
340
    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
341
    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
342
    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
343
    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
344
    "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
345
    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
346
    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
347
    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
348
    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
349
    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
350
    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
351
    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
352
    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
353
    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
354
    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
355
    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
356
    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
357
    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
358
    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
359
    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
360
    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
361
    "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
362
NULL,
363
    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
364
NULL
365
};
366
367
static const char* const DEPRECATED_COUNTRIES[] = {
368
    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
369
};
370
static const char* const REPLACEMENT_COUNTRIES[] = {
371
/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
372
    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
373
};
374
375
/**
376
 * Table of 3-letter country codes.
377
 *
378
 * This is a lookup table used to convert 3-letter country codes to
379
 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
380
 * For all valid i, COUNTRIES[i] must refer to the same country as
381
 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
382
 * to make eyeballing this baby easier.
383
 *
384
 * This table should be terminated with a NULL entry, followed by a
385
 * second list, and another NULL entry.  The two lists correspond to
386
 * the two lists in COUNTRIES.
387
 */
388
static const char * const COUNTRIES_3[] = {
389
/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
390
    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
391
/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
392
    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
393
/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
394
    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
395
/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
396
    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
397
/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
398
    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
399
/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
400
    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
401
/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
402
    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
403
/*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
404
    "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
405
/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
406
    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
407
/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
408
    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
409
/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
410
    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
411
/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
412
    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
413
/*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
414
    "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
415
/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
416
    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
417
/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
418
    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
419
/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
420
    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
421
/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
422
    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
423
/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
424
    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
425
/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
426
    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
427
/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
428
    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
429
/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
430
    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
431
/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
432
    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
433
/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
434
    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
435
/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
436
    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
437
/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
438
    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
439
/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
440
    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
441
/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
442
    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
443
/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
444
    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
445
/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
446
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
447
/*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
448
    "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
449
NULL,
450
/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
451
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
452
NULL
453
};
454
455
typedef struct CanonicalizationMap {
456
    const char *id;          /* input ID */
457
    const char *canonicalID; /* canonicalized output ID */
458
    const char *keyword;     /* keyword, or NULL if none */
459
    const char *value;       /* keyword value, or NULL if kw==NULL */
460
} CanonicalizationMap;
461
462
/**
463
 * A map to canonicalize locale IDs.  This handles a variety of
464
 * different semantic kinds of transformations.
465
 */
466
static const CanonicalizationMap CANONICALIZE_MAP[] = {
467
    { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
468
    { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
469
    { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
470
    { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
471
    { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
472
    { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
473
    { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
474
    { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
475
    { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
476
    { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
477
    { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
478
    { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
479
    { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
480
    { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
481
    { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
482
    { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
483
    { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
484
    { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
485
    { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
486
    { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
487
    { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
488
    { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
489
    { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
490
    { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
491
    { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
492
    { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
493
    { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
494
    { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
495
    { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
496
    { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
497
    { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
498
    { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
499
    { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
500
    { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
501
    { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
502
    { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
503
    { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
504
    { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
505
    { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
506
    { "zh_GAN",         "gan", NULL, NULL }, /* registered name */
507
    { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
508
    { "zh_HAKKA",       "hak", NULL, NULL }, /* registered name */
509
    { "zh_MIN_NAN",     "nan", NULL, NULL }, /* registered name */
510
    { "zh_WUU",         "wuu", NULL, NULL }, /* registered name */
511
    { "zh_XIANG",       "hsn", NULL, NULL }, /* registered name */
512
    { "zh_YUE",         "yue", NULL, NULL }, /* registered name */
513
};
514
515
typedef struct VariantMap {
516
    const char *variant;          /* input ID */
517
    const char *keyword;     /* keyword, or NULL if none */
518
    const char *value;       /* keyword value, or NULL if kw==NULL */
519
} VariantMap;
520
521
static const VariantMap VARIANT_MAP[] = {
522
    { "EURO",   "currency", "EUR" },
523
    { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
524
    { "STROKE", "collation", "stroke" }  /* Solaris variant */
525
};
526
527
/* ### BCP47 Conversion *******************************************/
528
/* Test if the locale id has BCP47 u extension and does not have '@' */
529
2.38k
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
530
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
531
#define _ConvertBCP47(finalID, id, buffer, length,err) \
532
0
        if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 || U_FAILURE(*err)) { \
533
0
            finalID=id; \
534
0
        } else { \
535
0
            finalID=buffer; \
536
0
        }
537
/* Gets the size of the shortest subtag in the given localeID. */
538
2.38k
static int32_t getShortestSubtagLength(const char *localeID) {
539
2.38k
    int32_t localeIDLength = uprv_strlen(localeID);
540
2.38k
    int32_t length = localeIDLength;
541
2.38k
    int32_t tmpLength = 0;
542
2.38k
    int32_t i;
543
2.38k
    UBool reset = TRUE;
544
545
28.5k
    for (i = 0; i < localeIDLength; i++) {
546
26.2k
        if (localeID[i] != '_' && localeID[i] != '-') {
547
21.4k
            if (reset) {
548
7.14k
                tmpLength = 0;
549
7.14k
                reset = FALSE;
550
7.14k
            }
551
21.4k
            tmpLength++;
552
21.4k
        } else {
553
4.76k
            if (tmpLength != 0 && tmpLength < length) {
554
2.38k
                length = tmpLength;
555
2.38k
            }
556
4.76k
            reset = TRUE;
557
4.76k
        }
558
26.2k
    }
559
560
2.38k
    return length;
561
2.38k
}
562
563
/* ### Keywords **************************************************/
564
0
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
565
0
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
566
/* Punctuation/symbols allowed in legacy key values */
567
0
#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
568
569
0
#define ULOC_KEYWORD_BUFFER_LEN 25
570
0
#define ULOC_MAX_NO_KEYWORDS 25
571
572
U_CAPI const char * U_EXPORT2
573
2.38k
locale_getKeywordsStart(const char *localeID) {
574
2.38k
    const char *result = NULL;
575
2.38k
    if((result = uprv_strchr(localeID, '@')) != NULL) {
576
0
        return result;
577
0
    }
578
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
579
    else {
580
        /* We do this because the @ sign is variant, and the @ sign used on one
581
        EBCDIC machine won't be compiled the same way on other EBCDIC based
582
        machines. */
583
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
584
        const uint8_t *charToFind = ebcdicSigns;
585
        while(*charToFind) {
586
            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
587
                return result;
588
            }
589
            charToFind++;
590
        }
591
    }
592
#endif
593
2.38k
    return NULL;
594
2.38k
}
595
596
/**
597
 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
598
 * @param keywordName incoming name to be canonicalized
599
 * @param status return status (keyword too long)
600
 * @return length of the keyword name
601
 */
602
static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
603
0
{
604
0
  int32_t keywordNameLen = 0;
605
606
0
  for (; *keywordName != 0; keywordName++) {
607
0
    if (!UPRV_ISALPHANUM(*keywordName)) {
608
0
      *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
609
0
      return 0;
610
0
    }
611
0
    if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
612
0
      buf[keywordNameLen++] = uprv_tolower(*keywordName);
613
0
    } else {
614
      /* keyword name too long for internal buffer */
615
0
      *status = U_INTERNAL_PROGRAM_ERROR;
616
0
      return 0;
617
0
    }
618
0
  }
619
0
  if (keywordNameLen == 0) {
620
0
    *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
621
0
    return 0;
622
0
  }
623
0
  buf[keywordNameLen] = 0; /* terminate */
624
625
0
  return keywordNameLen;
626
0
}
627
628
typedef struct {
629
    char keyword[ULOC_KEYWORD_BUFFER_LEN];
630
    int32_t keywordLen;
631
    const char *valueStart;
632
    int32_t valueLen;
633
} KeywordStruct;
634
635
static int32_t U_CALLCONV
636
0
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
637
0
    const char* leftString = ((const KeywordStruct *)left)->keyword;
638
0
    const char* rightString = ((const KeywordStruct *)right)->keyword;
639
0
    return uprv_strcmp(leftString, rightString);
640
0
}
641
642
/**
643
 * Both addKeyword and addValue must already be in canonical form.
644
 * Either both addKeyword and addValue are NULL, or neither is NULL.
645
 * If they are not NULL they must be zero terminated.
646
 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
647
 */
648
static int32_t
649
_getKeywords(const char *localeID,
650
             char prev,
651
             char *keywords, int32_t keywordCapacity,
652
             char *values, int32_t valuesCapacity, int32_t *valLen,
653
             UBool valuesToo,
654
             const char* addKeyword,
655
             const char* addValue,
656
             UErrorCode *status)
657
0
{
658
0
    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
659
660
0
    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
661
0
    int32_t numKeywords = 0;
662
0
    const char* pos = localeID;
663
0
    const char* equalSign = NULL;
664
0
    const char* semicolon = NULL;
665
0
    int32_t i = 0, j, n;
666
0
    int32_t keywordsLen = 0;
667
0
    int32_t valuesLen = 0;
668
669
0
    if(prev == '@') { /* start of keyword definition */
670
        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
671
0
        do {
672
0
            UBool duplicate = FALSE;
673
            /* skip leading spaces */
674
0
            while(*pos == ' ') {
675
0
                pos++;
676
0
            }
677
0
            if (!*pos) { /* handle trailing "; " */
678
0
                break;
679
0
            }
680
0
            if(numKeywords == maxKeywords) {
681
0
                *status = U_INTERNAL_PROGRAM_ERROR;
682
0
                return 0;
683
0
            }
684
0
            equalSign = uprv_strchr(pos, '=');
685
0
            semicolon = uprv_strchr(pos, ';');
686
            /* lack of '=' [foo@currency] is illegal */
687
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
688
0
            if(!equalSign || (semicolon && semicolon<equalSign)) {
689
0
                *status = U_INVALID_FORMAT_ERROR;
690
0
                return 0;
691
0
            }
692
            /* need to normalize both keyword and keyword name */
693
0
            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
694
                /* keyword name too long for internal buffer */
695
0
                *status = U_INTERNAL_PROGRAM_ERROR;
696
0
                return 0;
697
0
            }
698
0
            for(i = 0, n = 0; i < equalSign - pos; ++i) {
699
0
                if (pos[i] != ' ') {
700
0
                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
701
0
                }
702
0
            }
703
704
            /* zero-length keyword is an error. */
705
0
            if (n == 0) {
706
0
                *status = U_INVALID_FORMAT_ERROR;
707
0
                return 0;
708
0
            }
709
710
0
            keywordList[numKeywords].keyword[n] = 0;
711
0
            keywordList[numKeywords].keywordLen = n;
712
            /* now grab the value part. First we skip the '=' */
713
0
            equalSign++;
714
            /* then we leading spaces */
715
0
            while(*equalSign == ' ') {
716
0
                equalSign++;
717
0
            }
718
719
            /* Premature end or zero-length value */
720
0
            if (!*equalSign || equalSign == semicolon) {
721
0
                *status = U_INVALID_FORMAT_ERROR;
722
0
                return 0;
723
0
            }
724
725
0
            keywordList[numKeywords].valueStart = equalSign;
726
727
0
            pos = semicolon;
728
0
            i = 0;
729
0
            if(pos) {
730
0
                while(*(pos - i - 1) == ' ') {
731
0
                    i++;
732
0
                }
733
0
                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
734
0
                pos++;
735
0
            } else {
736
0
                i = (int32_t)uprv_strlen(equalSign);
737
0
                while(i && equalSign[i-1] == ' ') {
738
0
                    i--;
739
0
                }
740
0
                keywordList[numKeywords].valueLen = i;
741
0
            }
742
            /* If this is a duplicate keyword, then ignore it */
743
0
            for (j=0; j<numKeywords; ++j) {
744
0
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
745
0
                    duplicate = TRUE;
746
0
                    break;
747
0
                }
748
0
            }
749
0
            if (!duplicate) {
750
0
                ++numKeywords;
751
0
            }
752
0
        } while(pos);
753
754
        /* Handle addKeyword/addValue. */
755
0
        if (addKeyword != NULL) {
756
0
            UBool duplicate = FALSE;
757
0
            U_ASSERT(addValue != NULL);
758
            /* Search for duplicate; if found, do nothing. Explicit keyword
759
               overrides addKeyword. */
760
0
            for (j=0; j<numKeywords; ++j) {
761
0
                if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
762
0
                    duplicate = TRUE;
763
0
                    break;
764
0
                }
765
0
            }
766
0
            if (!duplicate) {
767
0
                if (numKeywords == maxKeywords) {
768
0
                    *status = U_INTERNAL_PROGRAM_ERROR;
769
0
                    return 0;
770
0
                }
771
0
                uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
772
0
                keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
773
0
                keywordList[numKeywords].valueStart = addValue;
774
0
                keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
775
0
                ++numKeywords;
776
0
            }
777
0
        } else {
778
0
            U_ASSERT(addValue == NULL);
779
0
        }
780
781
        /* now we have a list of keywords */
782
        /* we need to sort it */
783
0
        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
784
785
        /* Now construct the keyword part */
786
0
        for(i = 0; i < numKeywords; i++) {
787
0
            if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
788
0
                uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
789
0
                if(valuesToo) {
790
0
                    keywords[keywordsLen + keywordList[i].keywordLen] = '=';
791
0
                } else {
792
0
                    keywords[keywordsLen + keywordList[i].keywordLen] = 0;
793
0
                }
794
0
            }
795
0
            keywordsLen += keywordList[i].keywordLen + 1;
796
0
            if(valuesToo) {
797
0
                if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
798
0
                    uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
799
0
                }
800
0
                keywordsLen += keywordList[i].valueLen;
801
802
0
                if(i < numKeywords - 1) {
803
0
                    if(keywordsLen < keywordCapacity) {
804
0
                        keywords[keywordsLen] = ';';
805
0
                    }
806
0
                    keywordsLen++;
807
0
                }
808
0
            }
809
0
            if(values) {
810
0
                if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
811
0
                    uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
812
0
                    values[valuesLen + keywordList[i].valueLen] = 0;
813
0
                }
814
0
                valuesLen += keywordList[i].valueLen + 1;
815
0
            }
816
0
        }
817
0
        if(values) {
818
0
            values[valuesLen] = 0;
819
0
            if(valLen) {
820
0
                *valLen = valuesLen;
821
0
            }
822
0
        }
823
0
        return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
824
0
    } else {
825
0
        return 0;
826
0
    }
827
0
}
828
829
U_CFUNC int32_t
830
locale_getKeywords(const char *localeID,
831
                   char prev,
832
                   char *keywords, int32_t keywordCapacity,
833
                   char *values, int32_t valuesCapacity, int32_t *valLen,
834
                   UBool valuesToo,
835
0
                   UErrorCode *status) {
836
0
    return _getKeywords(localeID, prev, keywords, keywordCapacity,
837
0
                        values, valuesCapacity, valLen, valuesToo,
838
0
                        NULL, NULL, status);
839
0
}
840
841
U_CAPI int32_t U_EXPORT2
842
uloc_getKeywordValue(const char* localeID,
843
                     const char* keywordName,
844
                     char* buffer, int32_t bufferCapacity,
845
                     UErrorCode* status)
846
0
{
847
0
    const char* startSearchHere = NULL;
848
0
    const char* nextSeparator = NULL;
849
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
850
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
851
0
    int32_t result = 0;
852
853
0
    if(status && U_SUCCESS(*status) && localeID) {
854
0
      char tempBuffer[ULOC_FULLNAME_CAPACITY];
855
0
      const char* tmpLocaleID;
856
857
0
      if (keywordName == NULL || keywordName[0] == 0) {
858
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
859
0
        return 0;
860
0
      }
861
862
0
      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
863
0
      if(U_FAILURE(*status)) {
864
0
        return 0;
865
0
      }
866
867
0
      if (_hasBCP47Extension(localeID)) {
868
0
          _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
869
0
      } else {
870
0
          tmpLocaleID=localeID;
871
0
      }
872
873
0
      startSearchHere = locale_getKeywordsStart(tmpLocaleID);
874
0
      if(startSearchHere == NULL) {
875
          /* no keywords, return at once */
876
0
          return 0;
877
0
      }
878
879
      /* find the first keyword */
880
0
      while(startSearchHere) {
881
0
          const char* keyValueTail;
882
0
          int32_t keyValueLen;
883
884
0
          startSearchHere++; /* skip @ or ; */
885
0
          nextSeparator = uprv_strchr(startSearchHere, '=');
886
0
          if(!nextSeparator) {
887
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
888
0
              return 0;
889
0
          }
890
          /* strip leading & trailing spaces (TC decided to tolerate these) */
891
0
          while(*startSearchHere == ' ') {
892
0
              startSearchHere++;
893
0
          }
894
0
          keyValueTail = nextSeparator;
895
0
          while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
896
0
              keyValueTail--;
897
0
          }
898
          /* now keyValueTail points to first char after the keyName */
899
          /* copy & normalize keyName from locale */
900
0
          if (startSearchHere == keyValueTail) {
901
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
902
0
              return 0;
903
0
          }
904
0
          keyValueLen = 0;
905
0
          while (startSearchHere < keyValueTail) {
906
0
            if (!UPRV_ISALPHANUM(*startSearchHere)) {
907
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
908
0
              return 0;
909
0
            }
910
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
911
0
              localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
912
0
            } else {
913
              /* keyword name too long for internal buffer */
914
0
              *status = U_INTERNAL_PROGRAM_ERROR;
915
0
              return 0;
916
0
            }
917
0
          }
918
0
          localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
919
920
0
          startSearchHere = uprv_strchr(nextSeparator, ';');
921
922
0
          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
923
               /* current entry matches the keyword. */
924
0
             nextSeparator++; /* skip '=' */
925
              /* First strip leading & trailing spaces (TC decided to tolerate these) */
926
0
              while(*nextSeparator == ' ') {
927
0
                nextSeparator++;
928
0
              }
929
0
              keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
930
0
              while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
931
0
                keyValueTail--;
932
0
              }
933
              /* Now copy the value, but check well-formedness */
934
0
              if (nextSeparator == keyValueTail) {
935
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
936
0
                return 0;
937
0
              }
938
0
              keyValueLen = 0;
939
0
              while (nextSeparator < keyValueTail) {
940
0
                if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
941
0
                  *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
942
0
                  return 0;
943
0
                }
944
0
                if (keyValueLen < bufferCapacity) {
945
                  /* Should we lowercase value to return here? Tests expect as-is. */
946
0
                  buffer[keyValueLen++] = *nextSeparator++;
947
0
                } else { /* keep advancing so we return correct length in case of overflow */
948
0
                  keyValueLen++;
949
0
                  nextSeparator++;
950
0
                }
951
0
              }
952
0
              result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
953
0
              return result;
954
0
          }
955
0
      }
956
0
    }
957
0
    return 0;
958
0
}
959
960
U_CAPI int32_t U_EXPORT2
961
uloc_setKeywordValue(const char* keywordName,
962
                     const char* keywordValue,
963
                     char* buffer, int32_t bufferCapacity,
964
                     UErrorCode* status)
965
0
{
966
    /* TODO: sorting. removal. */
967
0
    int32_t keywordNameLen;
968
0
    int32_t keywordValueLen;
969
0
    int32_t bufLen;
970
0
    int32_t needLen = 0;
971
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
972
0
    char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
973
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
974
0
    int32_t rc;
975
0
    char* nextSeparator = NULL;
976
0
    char* nextEqualsign = NULL;
977
0
    char* startSearchHere = NULL;
978
0
    char* keywordStart = NULL;
979
0
    CharString updatedKeysAndValues;
980
0
    int32_t updatedKeysAndValuesLen;
981
0
    UBool handledInputKeyAndValue = FALSE;
982
0
    char keyValuePrefix = '@';
983
984
0
    if(U_FAILURE(*status)) {
985
0
        return -1;
986
0
    }
987
0
    if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
988
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
989
0
        return 0;
990
0
    }
991
0
    bufLen = (int32_t)uprv_strlen(buffer);
992
0
    if(bufferCapacity<bufLen) {
993
        /* The capacity is less than the length?! Is this NULL terminated? */
994
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
995
0
        return 0;
996
0
    }
997
0
    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
998
0
    if(U_FAILURE(*status)) {
999
0
        return 0;
1000
0
    }
1001
1002
0
    keywordValueLen = 0;
1003
0
    if(keywordValue) {
1004
0
        while (*keywordValue != 0) {
1005
0
            if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
1006
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
1007
0
                return 0;
1008
0
            }
1009
0
            if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
1010
                /* Should we force lowercase in value to set? */
1011
0
                keywordValueBuffer[keywordValueLen++] = *keywordValue++;
1012
0
            } else {
1013
                /* keywordValue too long for internal buffer */
1014
0
                *status = U_INTERNAL_PROGRAM_ERROR;
1015
0
                return 0;
1016
0
            }
1017
0
        }
1018
0
    }
1019
0
    keywordValueBuffer[keywordValueLen] = 0; /* terminate */
1020
1021
0
    startSearchHere = (char*)locale_getKeywordsStart(buffer);
1022
0
    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
1023
0
        if(keywordValueLen == 0) { /* no keywords = nothing to remove */
1024
0
            return bufLen;
1025
0
        }
1026
1027
0
        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1028
0
        if(startSearchHere) { /* had a single @ */
1029
0
            needLen--; /* already had the @ */
1030
            /* startSearchHere points at the @ */
1031
0
        } else {
1032
0
            startSearchHere=buffer+bufLen;
1033
0
        }
1034
0
        if(needLen >= bufferCapacity) {
1035
0
            *status = U_BUFFER_OVERFLOW_ERROR;
1036
0
            return needLen; /* no change */
1037
0
        }
1038
0
        *startSearchHere++ = '@';
1039
0
        uprv_strcpy(startSearchHere, keywordNameBuffer);
1040
0
        startSearchHere += keywordNameLen;
1041
0
        *startSearchHere++ = '=';
1042
0
        uprv_strcpy(startSearchHere, keywordValueBuffer);
1043
0
        return needLen;
1044
0
    } /* end shortcut - no @ */
1045
1046
0
    keywordStart = startSearchHere;
1047
    /* search for keyword */
1048
0
    while(keywordStart) {
1049
0
        const char* keyValueTail;
1050
0
        int32_t keyValueLen;
1051
1052
0
        keywordStart++; /* skip @ or ; */
1053
0
        nextEqualsign = uprv_strchr(keywordStart, '=');
1054
0
        if (!nextEqualsign) {
1055
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
1056
0
            return 0;
1057
0
        }
1058
        /* strip leading & trailing spaces (TC decided to tolerate these) */
1059
0
        while(*keywordStart == ' ') {
1060
0
            keywordStart++;
1061
0
        }
1062
0
        keyValueTail = nextEqualsign;
1063
0
        while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
1064
0
            keyValueTail--;
1065
0
        }
1066
        /* now keyValueTail points to first char after the keyName */
1067
        /* copy & normalize keyName from locale */
1068
0
        if (keywordStart == keyValueTail) {
1069
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
1070
0
            return 0;
1071
0
        }
1072
0
        keyValueLen = 0;
1073
0
        while (keywordStart < keyValueTail) {
1074
0
            if (!UPRV_ISALPHANUM(*keywordStart)) {
1075
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1076
0
                return 0;
1077
0
            }
1078
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
1079
0
                localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1080
0
            } else {
1081
                /* keyword name too long for internal buffer */
1082
0
                *status = U_INTERNAL_PROGRAM_ERROR;
1083
0
                return 0;
1084
0
            }
1085
0
        }
1086
0
        localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
1087
1088
0
        nextSeparator = uprv_strchr(nextEqualsign, ';');
1089
1090
        /* start processing the value part */
1091
0
        nextEqualsign++; /* skip '=' */
1092
        /* First strip leading & trailing spaces (TC decided to tolerate these) */
1093
0
        while(*nextEqualsign == ' ') {
1094
0
            nextEqualsign++;
1095
0
        }
1096
0
        keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1097
0
        while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1098
0
            keyValueTail--;
1099
0
        }
1100
0
        if (nextEqualsign == keyValueTail) {
1101
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1102
0
            return 0;
1103
0
        }
1104
1105
0
        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1106
0
        if(rc == 0) {
1107
            /* Current entry matches the input keyword. Update the entry */
1108
0
            if(keywordValueLen > 0) { /* updating a value */
1109
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1110
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1111
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1112
0
                updatedKeysAndValues.append('=', *status);
1113
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1114
0
            } /* else removing this entry, don't emit anything */
1115
0
            handledInputKeyAndValue = TRUE;
1116
0
        } else {
1117
           /* input keyword sorts earlier than current entry, add before current entry */
1118
0
            if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1119
                /* insert new entry at this location */
1120
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1121
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1122
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1123
0
                updatedKeysAndValues.append('=', *status);
1124
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1125
0
                handledInputKeyAndValue = TRUE;
1126
0
            }
1127
            /* copy the current entry */
1128
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1129
0
            keyValuePrefix = ';'; /* for any subsequent key-value pair */
1130
0
            updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1131
0
            updatedKeysAndValues.append('=', *status);
1132
0
            updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status);
1133
0
        }
1134
0
        if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1135
            /* append new entry at the end, it sorts later than existing entries */
1136
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1137
            /* skip keyValuePrefix update, no subsequent key-value pair */
1138
0
            updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1139
0
            updatedKeysAndValues.append('=', *status);
1140
0
            updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1141
0
            handledInputKeyAndValue = TRUE;
1142
0
        }
1143
0
        keywordStart = nextSeparator;
1144
0
    } /* end loop searching */
1145
1146
    /* Any error from updatedKeysAndValues.append above would be internal and not due to
1147
     * problems with the passed-in locale. So if we did encounter problems with the
1148
     * passed-in locale above, those errors took precedence and overrode any error
1149
     * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1150
     * are errors here they are from updatedKeysAndValues.append; they do cause an
1151
     * error return but the passed-in locale is unmodified and the original bufLen is
1152
     * returned.
1153
     */
1154
0
    if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1155
        /* if input key/value specified removal of a keyword not present in locale, or
1156
         * there was an error in CharString.append, leave original locale alone. */
1157
0
        return bufLen;
1158
0
    }
1159
1160
0
    updatedKeysAndValuesLen = updatedKeysAndValues.length();
1161
    /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1162
0
    needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
1163
0
    if(needLen >= bufferCapacity) {
1164
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1165
0
        return needLen; /* no change */
1166
0
    }
1167
0
    if (updatedKeysAndValuesLen > 0) {
1168
0
        uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
1169
0
    }
1170
0
    buffer[needLen]=0;
1171
0
    return needLen;
1172
0
}
1173
1174
/* ### ID parsing implementation **************************************************/
1175
1176
4.76k
#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1177
1178
/*returns TRUE if one of the special prefixes is here (s=string)
1179
  'x-' or 'i-' */
1180
2.38k
#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1181
1182
/* Dot terminates it because of POSIX form  where dot precedes the codepage
1183
 * except for variant
1184
 */
1185
57.1k
#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1186
1187
1.19k
static char* _strnchr(const char* str, int32_t len, char c) {
1188
1.19k
    U_ASSERT(str != 0 && len >= 0);
1189
7.14k
    while (len-- != 0) {
1190
5.95k
        char d = *str;
1191
5.95k
        if (d == c) {
1192
0
            return (char*) str;
1193
5.95k
        } else if (d == 0) {
1194
0
            break;
1195
0
        }
1196
5.95k
        ++str;
1197
5.95k
    }
1198
1.19k
    return NULL;
1199
1.19k
}
1200
1201
/**
1202
 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1203
 * a NULL entry, followed by more entries, and a second NULL entry.
1204
 *
1205
 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1206
 * COUNTRIES_3.
1207
 */
1208
static int16_t _findIndex(const char* const* list, const char* key)
1209
0
{
1210
0
    const char* const* anchor = list;
1211
0
    int32_t pass = 0;
1212
1213
    /* Make two passes through two NULL-terminated arrays at 'list' */
1214
0
    while (pass++ < 2) {
1215
0
        while (*list) {
1216
0
            if (uprv_strcmp(key, *list) == 0) {
1217
0
                return (int16_t)(list - anchor);
1218
0
            }
1219
0
            list++;
1220
0
        }
1221
0
        ++list;     /* skip final NULL *CWB*/
1222
0
    }
1223
0
    return -1;
1224
0
}
1225
1226
/* count the length of src while copying it to dest; return strlen(src) */
1227
static inline int32_t
1228
0
_copyCount(char *dest, int32_t destCapacity, const char *src) {
1229
0
    const char *anchor;
1230
0
    char c;
1231
1232
0
    anchor=src;
1233
0
    for(;;) {
1234
0
        if((c=*src)==0) {
1235
0
            return (int32_t)(src-anchor);
1236
0
        }
1237
0
        if(destCapacity<=0) {
1238
0
            return (int32_t)((src-anchor)+uprv_strlen(src));
1239
0
        }
1240
0
        ++src;
1241
0
        *dest++=c;
1242
0
        --destCapacity;
1243
0
    }
1244
0
}
1245
1246
U_CFUNC const char*
1247
0
uloc_getCurrentCountryID(const char* oldID){
1248
0
    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1249
0
    if (offset >= 0) {
1250
0
        return REPLACEMENT_COUNTRIES[offset];
1251
0
    }
1252
0
    return oldID;
1253
0
}
1254
U_CFUNC const char*
1255
0
uloc_getCurrentLanguageID(const char* oldID){
1256
0
    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1257
0
    if (offset >= 0) {
1258
0
        return REPLACEMENT_LANGUAGES[offset];
1259
0
    }
1260
0
    return oldID;
1261
0
}
1262
/*
1263
 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1264
 * avoid duplicating code to handle the earlier locale ID pieces
1265
 * in the functions for the later ones by
1266
 * setting the *pEnd pointer to where they stopped parsing
1267
 *
1268
 * TODO try to use this in Locale
1269
 */
1270
U_CFUNC int32_t
1271
ulocimp_getLanguage(const char *localeID,
1272
                    char *language, int32_t languageCapacity,
1273
2.38k
                    const char **pEnd) {
1274
2.38k
    int32_t i=0;
1275
2.38k
    int32_t offset;
1276
2.38k
    char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1277
1278
    /* if it starts with i- or x- then copy that prefix */
1279
2.38k
    if(_isIDPrefix(localeID)) {
1280
0
        if(i<languageCapacity) {
1281
0
            language[i]=(char)uprv_tolower(*localeID);
1282
0
        }
1283
0
        if(i<languageCapacity) {
1284
0
            language[i+1]='-';
1285
0
        }
1286
0
        i+=2;
1287
0
        localeID+=2;
1288
0
    }
1289
1290
    /* copy the language as far as possible and count its length */
1291
7.14k
    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1292
4.76k
        if(i<languageCapacity) {
1293
4.76k
            language[i]=(char)uprv_tolower(*localeID);
1294
4.76k
        }
1295
4.76k
        if(i<3) {
1296
4.76k
            U_ASSERT(i>=0);
1297
4.76k
            lang[i]=(char)uprv_tolower(*localeID);
1298
4.76k
        }
1299
4.76k
        i++;
1300
4.76k
        localeID++;
1301
4.76k
    }
1302
1303
2.38k
    if(i==3) {
1304
        /* convert 3 character code to 2 character code if possible *CWB*/
1305
0
        offset=_findIndex(LANGUAGES_3, lang);
1306
0
        if(offset>=0) {
1307
0
            i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1308
0
        }
1309
0
    }
1310
1311
2.38k
    if(pEnd!=NULL) {
1312
2.38k
        *pEnd=localeID;
1313
2.38k
    }
1314
2.38k
    return i;
1315
2.38k
}
1316
1317
U_CFUNC int32_t
1318
ulocimp_getScript(const char *localeID,
1319
                  char *script, int32_t scriptCapacity,
1320
                  const char **pEnd)
1321
2.38k
{
1322
2.38k
    int32_t idLen = 0;
1323
1324
2.38k
    if (pEnd != NULL) {
1325
2.38k
        *pEnd = localeID;
1326
2.38k
    }
1327
1328
    /* copy the second item as far as possible and count its length */
1329
7.14k
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1330
7.14k
            && uprv_isASCIILetter(localeID[idLen])) {
1331
4.76k
        idLen++;
1332
4.76k
    }
1333
1334
    /* If it's exactly 4 characters long, then it's a script and not a country. */
1335
2.38k
    if (idLen == 4) {
1336
0
        int32_t i;
1337
0
        if (pEnd != NULL) {
1338
0
            *pEnd = localeID+idLen;
1339
0
        }
1340
0
        if(idLen > scriptCapacity) {
1341
0
            idLen = scriptCapacity;
1342
0
        }
1343
0
        if (idLen >= 1) {
1344
0
            script[0]=(char)uprv_toupper(*(localeID++));
1345
0
        }
1346
0
        for (i = 1; i < idLen; i++) {
1347
0
            script[i]=(char)uprv_tolower(*(localeID++));
1348
0
        }
1349
0
    }
1350
2.38k
    else {
1351
2.38k
        idLen = 0;
1352
2.38k
    }
1353
2.38k
    return idLen;
1354
2.38k
}
1355
1356
U_CFUNC int32_t
1357
ulocimp_getCountry(const char *localeID,
1358
                   char *country, int32_t countryCapacity,
1359
                   const char **pEnd)
1360
2.38k
{
1361
2.38k
    int32_t idLen=0;
1362
2.38k
    char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1363
2.38k
    int32_t offset;
1364
1365
    /* copy the country as far as possible and count its length */
1366
7.14k
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1367
4.76k
        if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1368
4.76k
            cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1369
4.76k
        }
1370
4.76k
        idLen++;
1371
4.76k
    }
1372
1373
    /* the country should be either length 2 or 3 */
1374
2.38k
    if (idLen == 2 || idLen == 3) {
1375
2.38k
        UBool gotCountry = FALSE;
1376
        /* convert 3 character code to 2 character code if possible *CWB*/
1377
2.38k
        if(idLen==3) {
1378
0
            offset=_findIndex(COUNTRIES_3, cnty);
1379
0
            if(offset>=0) {
1380
0
                idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1381
0
                gotCountry = TRUE;
1382
0
            }
1383
0
        }
1384
2.38k
        if (!gotCountry) {
1385
2.38k
            int32_t i = 0;
1386
7.14k
            for (i = 0; i < idLen; i++) {
1387
4.76k
                if (i < countryCapacity) {
1388
4.76k
                    country[i]=(char)uprv_toupper(localeID[i]);
1389
4.76k
                }
1390
4.76k
            }
1391
2.38k
        }
1392
2.38k
        localeID+=idLen;
1393
2.38k
    } else {
1394
0
        idLen = 0;
1395
0
    }
1396
1397
2.38k
    if(pEnd!=NULL) {
1398
2.38k
        *pEnd=localeID;
1399
2.38k
    }
1400
1401
2.38k
    return idLen;
1402
2.38k
}
1403
1404
/**
1405
 * @param needSeparator if true, then add leading '_' if any variants
1406
 * are added to 'variant'
1407
 */
1408
static int32_t
1409
_getVariantEx(const char *localeID,
1410
              char prev,
1411
              char *variant, int32_t variantCapacity,
1412
2.38k
              UBool needSeparator) {
1413
2.38k
    int32_t i=0;
1414
1415
    /* get one or more variant tags and separate them with '_' */
1416
2.38k
    if(_isIDSeparator(prev)) {
1417
        /* get a variant string after a '-' or '_' */
1418
14.2k
        while(!_isTerminator(*localeID)) {
1419
11.9k
            if (needSeparator) {
1420
0
                if (i<variantCapacity) {
1421
0
                    variant[i] = '_';
1422
0
                }
1423
0
                ++i;
1424
0
                needSeparator = FALSE;
1425
0
            }
1426
11.9k
            if(i<variantCapacity) {
1427
11.9k
                variant[i]=(char)uprv_toupper(*localeID);
1428
11.9k
                if(variant[i]=='-') {
1429
0
                    variant[i]='_';
1430
0
                }
1431
11.9k
            }
1432
11.9k
            i++;
1433
11.9k
            localeID++;
1434
11.9k
        }
1435
2.38k
    }
1436
1437
    /* if there is no variant tag after a '-' or '_' then look for '@' */
1438
2.38k
    if(i==0) {
1439
0
        if(prev=='@') {
1440
            /* keep localeID */
1441
0
        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1442
0
            ++localeID; /* point after the '@' */
1443
0
        } else {
1444
0
            return 0;
1445
0
        }
1446
0
        while(!_isTerminator(*localeID)) {
1447
0
            if (needSeparator) {
1448
0
                if (i<variantCapacity) {
1449
0
                    variant[i] = '_';
1450
0
                }
1451
0
                ++i;
1452
0
                needSeparator = FALSE;
1453
0
            }
1454
0
            if(i<variantCapacity) {
1455
0
                variant[i]=(char)uprv_toupper(*localeID);
1456
0
                if(variant[i]=='-' || variant[i]==',') {
1457
0
                    variant[i]='_';
1458
0
                }
1459
0
            }
1460
0
            i++;
1461
0
            localeID++;
1462
0
        }
1463
0
    }
1464
1465
2.38k
    return i;
1466
2.38k
}
1467
1468
static int32_t
1469
_getVariant(const char *localeID,
1470
            char prev,
1471
2.38k
            char *variant, int32_t variantCapacity) {
1472
2.38k
    return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1473
2.38k
}
1474
1475
/**
1476
 * Delete ALL instances of a variant from the given list of one or
1477
 * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1478
 * @param variants the source string of one or more variants,
1479
 * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1480
 * terminated; if it is, trailing zero will NOT be maintained.
1481
 * @param variantsLen length of variants
1482
 * @param toDelete variant to delete, without separators, e.g.  "EURO"
1483
 * or "PREEURO"; not zero terminated
1484
 * @param toDeleteLen length of toDelete
1485
 * @return number of characters deleted from variants
1486
 */
1487
static int32_t
1488
_deleteVariant(char* variants, int32_t variantsLen,
1489
               const char* toDelete, int32_t toDeleteLen)
1490
3.57k
{
1491
3.57k
    int32_t delta = 0; /* number of chars deleted */
1492
3.57k
    for (;;) {
1493
3.57k
        UBool flag = FALSE;
1494
3.57k
        if (variantsLen < toDeleteLen) {
1495
2.38k
            return delta;
1496
2.38k
        }
1497
1.19k
        if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1498
1.19k
            (variantsLen == toDeleteLen ||
1499
0
             (flag=(variants[toDeleteLen] == '_'))))
1500
0
        {
1501
0
            int32_t d = toDeleteLen + (flag?1:0);
1502
0
            variantsLen -= d;
1503
0
            delta += d;
1504
0
            if (variantsLen > 0) {
1505
0
                uprv_memmove(variants, variants+d, variantsLen);
1506
0
            }
1507
1.19k
        } else {
1508
1.19k
            char* p = _strnchr(variants, variantsLen, '_');
1509
1.19k
            if (p == NULL) {
1510
1.19k
                return delta;
1511
1.19k
            }
1512
0
            ++p;
1513
0
            variantsLen -= (int32_t)(p - variants);
1514
0
            variants = p;
1515
0
        }
1516
1.19k
    }
1517
3.57k
}
1518
1519
/* Keyword enumeration */
1520
1521
typedef struct UKeywordsContext {
1522
    char* keywords;
1523
    char* current;
1524
} UKeywordsContext;
1525
1526
U_CDECL_BEGIN
1527
1528
static void U_CALLCONV
1529
0
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1530
0
    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1531
0
    uprv_free(enumerator->context);
1532
0
    uprv_free(enumerator);
1533
0
}
1534
1535
static int32_t U_CALLCONV
1536
0
uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1537
0
    char *kw = ((UKeywordsContext *)en->context)->keywords;
1538
0
    int32_t result = 0;
1539
0
    while(*kw) {
1540
0
        result++;
1541
0
        kw += uprv_strlen(kw)+1;
1542
0
    }
1543
0
    return result;
1544
0
}
1545
1546
static const char * U_CALLCONV
1547
uloc_kw_nextKeyword(UEnumeration* en,
1548
                    int32_t* resultLength,
1549
0
                    UErrorCode* /*status*/) {
1550
0
    const char* result = ((UKeywordsContext *)en->context)->current;
1551
0
    int32_t len = 0;
1552
0
    if(*result) {
1553
0
        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1554
0
        ((UKeywordsContext *)en->context)->current += len+1;
1555
0
    } else {
1556
0
        result = NULL;
1557
0
    }
1558
0
    if (resultLength) {
1559
0
        *resultLength = len;
1560
0
    }
1561
0
    return result;
1562
0
}
1563
1564
static void U_CALLCONV
1565
uloc_kw_resetKeywords(UEnumeration* en,
1566
0
                      UErrorCode* /*status*/) {
1567
0
    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1568
0
}
1569
1570
U_CDECL_END
1571
1572
1573
static const UEnumeration gKeywordsEnum = {
1574
    NULL,
1575
    NULL,
1576
    uloc_kw_closeKeywords,
1577
    uloc_kw_countKeywords,
1578
    uenum_unextDefault,
1579
    uloc_kw_nextKeyword,
1580
    uloc_kw_resetKeywords
1581
};
1582
1583
U_CAPI UEnumeration* U_EXPORT2
1584
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1585
0
{
1586
0
    UKeywordsContext *myContext = NULL;
1587
0
    UEnumeration *result = NULL;
1588
1589
0
    if(U_FAILURE(*status)) {
1590
0
        return NULL;
1591
0
    }
1592
0
    result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1593
    /* Null pointer test */
1594
0
    if (result == NULL) {
1595
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1596
0
        return NULL;
1597
0
    }
1598
0
    uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1599
0
    myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1600
0
    if (myContext == NULL) {
1601
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1602
0
        uprv_free(result);
1603
0
        return NULL;
1604
0
    }
1605
0
    myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1606
0
    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1607
0
    myContext->keywords[keywordListSize] = 0;
1608
0
    myContext->current = myContext->keywords;
1609
0
    result->context = myContext;
1610
0
    return result;
1611
0
}
1612
1613
U_CAPI UEnumeration* U_EXPORT2
1614
uloc_openKeywords(const char* localeID,
1615
                        UErrorCode* status)
1616
0
{
1617
0
    int32_t i=0;
1618
0
    char keywords[256];
1619
0
    int32_t keywordsCapacity = 256;
1620
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1621
0
    const char* tmpLocaleID;
1622
1623
0
    if(status==NULL || U_FAILURE(*status)) {
1624
0
        return 0;
1625
0
    }
1626
1627
0
    if (_hasBCP47Extension(localeID)) {
1628
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1629
0
    } else {
1630
0
        if (localeID==NULL) {
1631
0
           localeID=uloc_getDefault();
1632
0
        }
1633
0
        tmpLocaleID=localeID;
1634
0
    }
1635
1636
    /* Skip the language */
1637
0
    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1638
0
    if(_isIDSeparator(*tmpLocaleID)) {
1639
0
        const char *scriptID;
1640
        /* Skip the script if available */
1641
0
        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1642
0
        if(scriptID != tmpLocaleID+1) {
1643
            /* Found optional script */
1644
0
            tmpLocaleID = scriptID;
1645
0
        }
1646
        /* Skip the Country */
1647
0
        if (_isIDSeparator(*tmpLocaleID)) {
1648
0
            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1649
0
            if(_isIDSeparator(*tmpLocaleID)) {
1650
0
                _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1651
0
            }
1652
0
        }
1653
0
    }
1654
1655
    /* keywords are located after '@' */
1656
0
    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1657
0
        i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1658
0
    }
1659
1660
0
    if(i) {
1661
0
        return uloc_openKeywordList(keywords, i, status);
1662
0
    } else {
1663
0
        return NULL;
1664
0
    }
1665
0
}
1666
1667
1668
/* bit-flags for 'options' parameter of _canonicalize */
1669
0
#define _ULOC_STRIP_KEYWORDS 0x2
1670
1.19k
#define _ULOC_CANONICALIZE   0x1
1671
1672
14.2k
#define OPTION_SET(options, mask) ((options & mask) != 0)
1673
1674
static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1675
2.38k
#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1676
1677
/**
1678
 * Canonicalize the given localeID, to level 1 or to level 2,
1679
 * depending on the options.  To specify level 1, pass in options=0.
1680
 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1681
 *
1682
 * This is the code underlying uloc_getName and uloc_canonicalize.
1683
 */
1684
static int32_t
1685
_canonicalize(const char* localeID,
1686
              char* result,
1687
              int32_t resultCapacity,
1688
              uint32_t options,
1689
2.38k
              UErrorCode* err) {
1690
2.38k
    int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1691
2.38k
    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1692
2.38k
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1693
2.38k
    const char* origLocaleID;
1694
2.38k
    const char* tmpLocaleID;
1695
2.38k
    const char* keywordAssign = NULL;
1696
2.38k
    const char* separatorIndicator = NULL;
1697
2.38k
    const char* addKeyword = NULL;
1698
2.38k
    const char* addValue = NULL;
1699
2.38k
    char* name;
1700
2.38k
    char* variant = NULL; /* pointer into name, or NULL */
1701
1702
2.38k
    if (U_FAILURE(*err)) {
1703
0
        return 0;
1704
0
    }
1705
1706
2.38k
    if (_hasBCP47Extension(localeID)) {
1707
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1708
2.38k
    } else {
1709
2.38k
        if (localeID==NULL) {
1710
0
           localeID=uloc_getDefault();
1711
0
        }
1712
2.38k
        tmpLocaleID=localeID;
1713
2.38k
    }
1714
1715
2.38k
    origLocaleID=tmpLocaleID;
1716
1717
    /* if we are doing a full canonicalization, then put results in
1718
       localeBuffer, if necessary; otherwise send them to result. */
1719
2.38k
    if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1720
2.38k
        (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1721
0
        name = localeBuffer;
1722
0
        nameCapacity = (int32_t)sizeof(localeBuffer);
1723
2.38k
    } else {
1724
2.38k
        name = result;
1725
2.38k
        nameCapacity = resultCapacity;
1726
2.38k
    }
1727
1728
    /* get all pieces, one after another, and separate with '_' */
1729
2.38k
    len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1730
1731
2.38k
    if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1732
0
        const char *d = uloc_getDefault();
1733
1734
0
        len = (int32_t)uprv_strlen(d);
1735
1736
0
        if (name != NULL) {
1737
0
            uprv_strncpy(name, d, len);
1738
0
        }
1739
2.38k
    } else if(_isIDSeparator(*tmpLocaleID)) {
1740
2.38k
        const char *scriptID;
1741
1742
2.38k
        ++fieldCount;
1743
2.38k
        if(len<nameCapacity) {
1744
2.38k
            name[len]='_';
1745
2.38k
        }
1746
2.38k
        ++len;
1747
1748
2.38k
        scriptSize=ulocimp_getScript(tmpLocaleID+1,
1749
2.38k
            (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1750
2.38k
        if(scriptSize > 0) {
1751
            /* Found optional script */
1752
0
            tmpLocaleID = scriptID;
1753
0
            ++fieldCount;
1754
0
            len+=scriptSize;
1755
0
            if (_isIDSeparator(*tmpLocaleID)) {
1756
                /* If there is something else, then we add the _ */
1757
0
                if(len<nameCapacity) {
1758
0
                    name[len]='_';
1759
0
                }
1760
0
                ++len;
1761
0
            }
1762
0
        }
1763
1764
2.38k
        if (_isIDSeparator(*tmpLocaleID)) {
1765
2.38k
            const char *cntryID;
1766
2.38k
            int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1767
2.38k
                (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1768
2.38k
            if (cntrySize > 0) {
1769
                /* Found optional country */
1770
2.38k
                tmpLocaleID = cntryID;
1771
2.38k
                len+=cntrySize;
1772
2.38k
            }
1773
2.38k
            if(_isIDSeparator(*tmpLocaleID)) {
1774
                /* If there is something else, then we add the _  if we found country before. */
1775
2.38k
                if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1776
2.38k
                    ++fieldCount;
1777
2.38k
                    if(len<nameCapacity) {
1778
2.38k
                        name[len]='_';
1779
2.38k
                    }
1780
2.38k
                    ++len;
1781
2.38k
                }
1782
1783
2.38k
                variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1784
2.38k
                    (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1785
2.38k
                if (variantSize > 0) {
1786
2.38k
                    variant = len<nameCapacity ? name+len : NULL;
1787
2.38k
                    len += variantSize;
1788
2.38k
                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1789
2.38k
                }
1790
2.38k
            }
1791
2.38k
        }
1792
2.38k
    }
1793
1794
    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1795
2.38k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1796
0
        UBool done = FALSE;
1797
0
        do {
1798
0
            char c = *tmpLocaleID;
1799
0
            switch (c) {
1800
0
            case 0:
1801
0
            case '@':
1802
0
                done = TRUE;
1803
0
                break;
1804
0
            default:
1805
0
                if (len<nameCapacity) {
1806
0
                    name[len] = c;
1807
0
                }
1808
0
                ++len;
1809
0
                ++tmpLocaleID;
1810
0
                break;
1811
0
            }
1812
0
        } while (!done);
1813
0
    }
1814
1815
    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1816
       After this, tmpLocaleID either points to '@' or is NULL */
1817
2.38k
    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1818
0
        keywordAssign = uprv_strchr(tmpLocaleID, '=');
1819
0
        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1820
0
    }
1821
1822
    /* Copy POSIX-style variant, if any [mr@FOO] */
1823
2.38k
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1824
2.38k
        tmpLocaleID != NULL && keywordAssign == NULL) {
1825
0
        for (;;) {
1826
0
            char c = *tmpLocaleID;
1827
0
            if (c == 0) {
1828
0
                break;
1829
0
            }
1830
0
            if (len<nameCapacity) {
1831
0
                name[len] = c;
1832
0
            }
1833
0
            ++len;
1834
0
            ++tmpLocaleID;
1835
0
        }
1836
0
    }
1837
1838
2.38k
    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1839
        /* Handle @FOO variant if @ is present and not followed by = */
1840
1.19k
        if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1841
0
            int32_t posixVariantSize;
1842
            /* Add missing '_' if needed */
1843
0
            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1844
0
                do {
1845
0
                    if(len<nameCapacity) {
1846
0
                        name[len]='_';
1847
0
                    }
1848
0
                    ++len;
1849
0
                    ++fieldCount;
1850
0
                } while(fieldCount<2);
1851
0
            }
1852
0
            posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1853
0
                                             (UBool)(variantSize > 0));
1854
0
            if (posixVariantSize > 0) {
1855
0
                if (variant == NULL) {
1856
0
                    variant = name+len;
1857
0
                }
1858
0
                len += posixVariantSize;
1859
0
                variantSize += posixVariantSize;
1860
0
            }
1861
0
        }
1862
1863
        /* Handle generic variants first */
1864
1.19k
        if (variant) {
1865
4.76k
            for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) {
1866
3.57k
                const char* variantToCompare = VARIANT_MAP[j].variant;
1867
3.57k
                int32_t n = (int32_t)uprv_strlen(variantToCompare);
1868
3.57k
                int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1869
3.57k
                len -= variantLen;
1870
3.57k
                if (variantLen > 0) {
1871
0
                    if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1872
0
                        --len;
1873
0
                    }
1874
0
                    addKeyword = VARIANT_MAP[j].keyword;
1875
0
                    addValue = VARIANT_MAP[j].value;
1876
0
                    break;
1877
0
                }
1878
3.57k
            }
1879
1.19k
            if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1880
0
                --len;
1881
0
            }
1882
1.19k
        }
1883
1884
        /* Look up the ID in the canonicalization map */
1885
55.9k
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1886
54.7k
            const char* id = CANONICALIZE_MAP[j].id;
1887
54.7k
            int32_t n = (int32_t)uprv_strlen(id);
1888
54.7k
            if (len == n && uprv_strncmp(name, id, n) == 0) {
1889
0
                if (n == 0 && tmpLocaleID != NULL) {
1890
0
                    break; /* Don't remap "" if keywords present */
1891
0
                }
1892
0
                len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1893
0
                if (CANONICALIZE_MAP[j].keyword) {
1894
0
                    addKeyword = CANONICALIZE_MAP[j].keyword;
1895
0
                    addValue = CANONICALIZE_MAP[j].value;
1896
0
                }
1897
0
                break;
1898
0
            }
1899
54.7k
        }
1900
1.19k
    }
1901
1902
2.38k
    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1903
2.38k
        if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1904
2.38k
            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1905
0
            if(len<nameCapacity) {
1906
0
                name[len]='@';
1907
0
            }
1908
0
            ++len;
1909
0
            ++fieldCount;
1910
0
            len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1911
0
                                NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1912
2.38k
        } else if (addKeyword != NULL) {
1913
0
            U_ASSERT(addValue != NULL && len < nameCapacity);
1914
            /* inelegant but works -- later make _getKeywords do this? */
1915
0
            len += _copyCount(name+len, nameCapacity-len, "@");
1916
0
            len += _copyCount(name+len, nameCapacity-len, addKeyword);
1917
0
            len += _copyCount(name+len, nameCapacity-len, "=");
1918
0
            len += _copyCount(name+len, nameCapacity-len, addValue);
1919
0
        }
1920
2.38k
    }
1921
1922
2.38k
    if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1923
0
        uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1924
0
    }
1925
1926
2.38k
    return u_terminateChars(result, resultCapacity, len, err);
1927
2.38k
}
1928
1929
/* ### ID parsing API **************************************************/
1930
1931
U_CAPI int32_t  U_EXPORT2
1932
uloc_getParent(const char*    localeID,
1933
               char* parent,
1934
               int32_t parentCapacity,
1935
               UErrorCode* err)
1936
0
{
1937
0
    const char *lastUnderscore;
1938
0
    int32_t i;
1939
1940
0
    if (U_FAILURE(*err))
1941
0
        return 0;
1942
1943
0
    if (localeID == NULL)
1944
0
        localeID = uloc_getDefault();
1945
1946
0
    lastUnderscore=uprv_strrchr(localeID, '_');
1947
0
    if(lastUnderscore!=NULL) {
1948
0
        i=(int32_t)(lastUnderscore-localeID);
1949
0
    } else {
1950
0
        i=0;
1951
0
    }
1952
1953
0
    if(i>0 && parent != localeID) {
1954
0
        uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1955
0
    }
1956
0
    return u_terminateChars(parent, parentCapacity, i, err);
1957
0
}
1958
1959
U_CAPI int32_t U_EXPORT2
1960
uloc_getLanguage(const char*    localeID,
1961
         char* language,
1962
         int32_t languageCapacity,
1963
         UErrorCode* err)
1964
0
{
1965
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1966
0
    int32_t i=0;
1967
1968
0
    if (err==NULL || U_FAILURE(*err)) {
1969
0
        return 0;
1970
0
    }
1971
1972
0
    if(localeID==NULL) {
1973
0
        localeID=uloc_getDefault();
1974
0
    }
1975
1976
0
    i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1977
0
    return u_terminateChars(language, languageCapacity, i, err);
1978
0
}
1979
1980
U_CAPI int32_t U_EXPORT2
1981
uloc_getScript(const char*    localeID,
1982
         char* script,
1983
         int32_t scriptCapacity,
1984
         UErrorCode* err)
1985
0
{
1986
0
    int32_t i=0;
1987
1988
0
    if(err==NULL || U_FAILURE(*err)) {
1989
0
        return 0;
1990
0
    }
1991
1992
0
    if(localeID==NULL) {
1993
0
        localeID=uloc_getDefault();
1994
0
    }
1995
1996
    /* skip the language */
1997
0
    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
1998
0
    if(_isIDSeparator(*localeID)) {
1999
0
        i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
2000
0
    }
2001
0
    return u_terminateChars(script, scriptCapacity, i, err);
2002
0
}
2003
2004
U_CAPI int32_t  U_EXPORT2
2005
uloc_getCountry(const char* localeID,
2006
            char* country,
2007
            int32_t countryCapacity,
2008
            UErrorCode* err)
2009
0
{
2010
0
    int32_t i=0;
2011
2012
0
    if(err==NULL || U_FAILURE(*err)) {
2013
0
        return 0;
2014
0
    }
2015
2016
0
    if(localeID==NULL) {
2017
0
        localeID=uloc_getDefault();
2018
0
    }
2019
2020
    /* Skip the language */
2021
0
    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
2022
0
    if(_isIDSeparator(*localeID)) {
2023
0
        const char *scriptID;
2024
        /* Skip the script if available */
2025
0
        ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
2026
0
        if(scriptID != localeID+1) {
2027
            /* Found optional script */
2028
0
            localeID = scriptID;
2029
0
        }
2030
0
        if(_isIDSeparator(*localeID)) {
2031
0
            i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
2032
0
        }
2033
0
    }
2034
0
    return u_terminateChars(country, countryCapacity, i, err);
2035
0
}
2036
2037
U_CAPI int32_t  U_EXPORT2
2038
uloc_getVariant(const char* localeID,
2039
                char* variant,
2040
                int32_t variantCapacity,
2041
                UErrorCode* err)
2042
0
{
2043
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
2044
0
    const char* tmpLocaleID;
2045
0
    int32_t i=0;
2046
2047
0
    if(err==NULL || U_FAILURE(*err)) {
2048
0
        return 0;
2049
0
    }
2050
2051
0
    if (_hasBCP47Extension(localeID)) {
2052
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
2053
0
    } else {
2054
0
        if (localeID==NULL) {
2055
0
           localeID=uloc_getDefault();
2056
0
        }
2057
0
        tmpLocaleID=localeID;
2058
0
    }
2059
2060
    /* Skip the language */
2061
0
    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2062
0
    if(_isIDSeparator(*tmpLocaleID)) {
2063
0
        const char *scriptID;
2064
        /* Skip the script if available */
2065
0
        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2066
0
        if(scriptID != tmpLocaleID+1) {
2067
            /* Found optional script */
2068
0
            tmpLocaleID = scriptID;
2069
0
        }
2070
        /* Skip the Country */
2071
0
        if (_isIDSeparator(*tmpLocaleID)) {
2072
0
            const char *cntryID;
2073
0
            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2074
0
            if (cntryID != tmpLocaleID+1) {
2075
                /* Found optional country */
2076
0
                tmpLocaleID = cntryID;
2077
0
            }
2078
0
            if(_isIDSeparator(*tmpLocaleID)) {
2079
                /* If there was no country ID, skip a possible extra IDSeparator */
2080
0
                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2081
0
                    tmpLocaleID++;
2082
0
                }
2083
0
                i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2084
0
            }
2085
0
        }
2086
0
    }
2087
2088
    /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2089
    /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2090
/*
2091
    if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2092
        i=_getVariant(localeID+1, '@', variant, variantCapacity);
2093
    }
2094
*/
2095
0
    return u_terminateChars(variant, variantCapacity, i, err);
2096
0
}
2097
2098
U_CAPI int32_t  U_EXPORT2
2099
uloc_getName(const char* localeID,
2100
             char* name,
2101
             int32_t nameCapacity,
2102
             UErrorCode* err)
2103
1.19k
{
2104
1.19k
    return _canonicalize(localeID, name, nameCapacity, 0, err);
2105
1.19k
}
2106
2107
U_CAPI int32_t  U_EXPORT2
2108
uloc_getBaseName(const char* localeID,
2109
                 char* name,
2110
                 int32_t nameCapacity,
2111
                 UErrorCode* err)
2112
0
{
2113
0
    return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2114
0
}
2115
2116
U_CAPI int32_t  U_EXPORT2
2117
uloc_canonicalize(const char* localeID,
2118
                  char* name,
2119
                  int32_t nameCapacity,
2120
                  UErrorCode* err)
2121
1.19k
{
2122
1.19k
    return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2123
1.19k
}
2124
2125
U_CAPI const char*  U_EXPORT2
2126
uloc_getISO3Language(const char* localeID)
2127
0
{
2128
0
    int16_t offset;
2129
0
    char lang[ULOC_LANG_CAPACITY];
2130
0
    UErrorCode err = U_ZERO_ERROR;
2131
2132
0
    if (localeID == NULL)
2133
0
    {
2134
0
        localeID = uloc_getDefault();
2135
0
    }
2136
0
    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2137
0
    if (U_FAILURE(err))
2138
0
        return "";
2139
0
    offset = _findIndex(LANGUAGES, lang);
2140
0
    if (offset < 0)
2141
0
        return "";
2142
0
    return LANGUAGES_3[offset];
2143
0
}
2144
2145
U_CAPI const char*  U_EXPORT2
2146
uloc_getISO3Country(const char* localeID)
2147
0
{
2148
0
    int16_t offset;
2149
0
    char cntry[ULOC_LANG_CAPACITY];
2150
0
    UErrorCode err = U_ZERO_ERROR;
2151
2152
0
    if (localeID == NULL)
2153
0
    {
2154
0
        localeID = uloc_getDefault();
2155
0
    }
2156
0
    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2157
0
    if (U_FAILURE(err))
2158
0
        return "";
2159
0
    offset = _findIndex(COUNTRIES, cntry);
2160
0
    if (offset < 0)
2161
0
        return "";
2162
2163
0
    return COUNTRIES_3[offset];
2164
0
}
2165
2166
U_CAPI uint32_t  U_EXPORT2
2167
uloc_getLCID(const char* localeID)
2168
0
{
2169
0
    UErrorCode status = U_ZERO_ERROR;
2170
0
    char       langID[ULOC_FULLNAME_CAPACITY];
2171
0
    uint32_t   lcid = 0;
2172
2173
    /* Check for incomplete id. */
2174
0
    if (!localeID || uprv_strlen(localeID) < 2) {
2175
0
        return 0;
2176
0
    }
2177
2178
    // Attempt platform lookup if available
2179
0
    lcid = uprv_convertToLCIDPlatform(localeID);
2180
0
    if (lcid > 0)
2181
0
    {
2182
        // Windows found an LCID, return that
2183
0
        return lcid;
2184
0
    }
2185
2186
0
    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2187
0
    if (U_FAILURE(status)) {
2188
0
        return 0;
2189
0
    }
2190
2191
0
    if (uprv_strchr(localeID, '@')) {
2192
        // uprv_convertToLCID does not support keywords other than collation.
2193
        // Remove all keywords except collation.
2194
0
        int32_t len;
2195
0
        char collVal[ULOC_KEYWORDS_CAPACITY];
2196
0
        char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2197
2198
0
        len = uloc_getKeywordValue(localeID, "collation", collVal,
2199
0
            UPRV_LENGTHOF(collVal) - 1, &status);
2200
2201
0
        if (U_SUCCESS(status) && len > 0) {
2202
0
            collVal[len] = 0;
2203
2204
0
            len = uloc_getBaseName(localeID, tmpLocaleID,
2205
0
                UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2206
2207
0
            if (U_SUCCESS(status) && len > 0) {
2208
0
                tmpLocaleID[len] = 0;
2209
2210
0
                len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2211
0
                    UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2212
2213
0
                if (U_SUCCESS(status) && len > 0) {
2214
0
                    tmpLocaleID[len] = 0;
2215
0
                    return uprv_convertToLCID(langID, tmpLocaleID, &status);
2216
0
                }
2217
0
            }
2218
0
        }
2219
2220
        // fall through - all keywords are simply ignored
2221
0
        status = U_ZERO_ERROR;
2222
0
    }
2223
2224
0
    return uprv_convertToLCID(langID, localeID, &status);
2225
0
}
2226
2227
U_CAPI int32_t U_EXPORT2
2228
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2229
                UErrorCode *status)
2230
0
{
2231
0
    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2232
0
}
2233
2234
/* ### Default locale **************************************************/
2235
2236
U_CAPI const char*  U_EXPORT2
2237
uloc_getDefault()
2238
3.57k
{
2239
3.57k
    return locale_get_default();
2240
3.57k
}
2241
2242
U_CAPI void  U_EXPORT2
2243
uloc_setDefault(const char*   newDefaultLocale,
2244
             UErrorCode* err)
2245
0
{
2246
0
    if (U_FAILURE(*err))
2247
0
        return;
2248
    /* the error code isn't currently used for anything by this function*/
2249
2250
    /* propagate change to C++ */
2251
0
    locale_set_default(newDefaultLocale);
2252
0
}
2253
2254
/**
2255
 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2256
 * to an array of pointers to arrays of char.  All of these pointers are owned
2257
 * by ICU-- do not delete them, and do not write through them.  The array is
2258
 * terminated with a null pointer.
2259
 */
2260
U_CAPI const char* const*  U_EXPORT2
2261
uloc_getISOLanguages()
2262
0
{
2263
0
    return LANGUAGES;
2264
0
}
2265
2266
/**
2267
 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2268
 * pointer to an array of pointers to arrays of char.  All of these pointers are
2269
 * owned by ICU-- do not delete them, and do not write through them.  The array is
2270
 * terminated with a null pointer.
2271
 */
2272
U_CAPI const char* const*  U_EXPORT2
2273
uloc_getISOCountries()
2274
0
{
2275
0
    return COUNTRIES;
2276
0
}
2277
2278
2279
/* this function to be moved into cstring.c later */
2280
static char gDecimal = 0;
2281
2282
static /* U_CAPI */
2283
double
2284
/* U_EXPORT2 */
2285
0
_uloc_strtod(const char *start, char **end) {
2286
0
    char *decimal;
2287
0
    char *myEnd;
2288
0
    char buf[30];
2289
0
    double rv;
2290
0
    if (!gDecimal) {
2291
0
        char rep[5];
2292
        /* For machines that decide to change the decimal on you,
2293
        and try to be too smart with localization.
2294
        This normally should be just a '.'. */
2295
0
        sprintf(rep, "%+1.1f", 1.0);
2296
0
        gDecimal = rep[2];
2297
0
    }
2298
2299
0
    if(gDecimal == '.') {
2300
0
        return uprv_strtod(start, end); /* fall through to OS */
2301
0
    } else {
2302
0
        uprv_strncpy(buf, start, 29);
2303
0
        buf[29]=0;
2304
0
        decimal = uprv_strchr(buf, '.');
2305
0
        if(decimal) {
2306
0
            *decimal = gDecimal;
2307
0
        } else {
2308
0
            return uprv_strtod(start, end); /* no decimal point */
2309
0
        }
2310
0
        rv = uprv_strtod(buf, &myEnd);
2311
0
        if(end) {
2312
0
            *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2313
0
        }
2314
0
        return rv;
2315
0
    }
2316
0
}
2317
2318
typedef struct {
2319
    float q;
2320
    int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2321
    char locale[ULOC_FULLNAME_CAPACITY+1];
2322
} _acceptLangItem;
2323
2324
static int32_t U_CALLCONV
2325
uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2326
0
{
2327
0
    const _acceptLangItem *aa = (const _acceptLangItem*)a;
2328
0
    const _acceptLangItem *bb = (const _acceptLangItem*)b;
2329
2330
0
    int32_t rc = 0;
2331
0
    if(bb->q < aa->q) {
2332
0
        rc = -1;  /* A > B */
2333
0
    } else if(bb->q > aa->q) {
2334
0
        rc = 1;   /* A < B */
2335
0
    } else {
2336
0
        rc = 0;   /* A = B */
2337
0
    }
2338
2339
0
    if(rc==0) {
2340
0
        rc = uprv_stricmp(aa->locale, bb->locale);
2341
0
    }
2342
2343
#if defined(ULOC_DEBUG)
2344
    /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2345
    aa->locale, aa->q,
2346
    bb->locale, bb->q,
2347
    rc);*/
2348
#endif
2349
2350
0
    return rc;
2351
0
}
2352
2353
/*
2354
mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2355
*/
2356
2357
U_CAPI int32_t U_EXPORT2
2358
uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2359
                            const char *httpAcceptLanguage,
2360
                            UEnumeration* availableLocales,
2361
                            UErrorCode *status)
2362
0
{
2363
0
  MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
2364
0
    char tmp[ULOC_FULLNAME_CAPACITY +1];
2365
0
    int32_t n = 0;
2366
0
    const char *itemEnd;
2367
0
    const char *paramEnd;
2368
0
    const char *s;
2369
0
    const char *t;
2370
0
    int32_t res;
2371
0
    int32_t i;
2372
0
    int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2373
2374
0
    if(U_FAILURE(*status)) {
2375
0
        return -1;
2376
0
    }
2377
2378
0
    for(s=httpAcceptLanguage;s&&*s;) {
2379
0
        while(isspace(*s)) /* eat space at the beginning */
2380
0
            s++;
2381
0
        itemEnd=uprv_strchr(s,',');
2382
0
        paramEnd=uprv_strchr(s,';');
2383
0
        if(!itemEnd) {
2384
0
            itemEnd = httpAcceptLanguage+l; /* end of string */
2385
0
        }
2386
0
        if(paramEnd && paramEnd<itemEnd) {
2387
            /* semicolon (;) is closer than end (,) */
2388
0
            t = paramEnd+1;
2389
0
            if(*t=='q') {
2390
0
                t++;
2391
0
            }
2392
0
            while(isspace(*t)) {
2393
0
                t++;
2394
0
            }
2395
0
            if(*t=='=') {
2396
0
                t++;
2397
0
            }
2398
0
            while(isspace(*t)) {
2399
0
                t++;
2400
0
            }
2401
0
            items[n].q = (float)_uloc_strtod(t,NULL);
2402
0
        } else {
2403
            /* no semicolon - it's 1.0 */
2404
0
            items[n].q = 1.0f;
2405
0
            paramEnd = itemEnd;
2406
0
        }
2407
0
        items[n].dummy=0;
2408
        /* eat spaces prior to semi */
2409
0
        for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2410
0
            ;
2411
0
        int32_t slen = ((t+1)-s);
2412
0
        if(slen > ULOC_FULLNAME_CAPACITY) {
2413
0
          *status = U_BUFFER_OVERFLOW_ERROR;
2414
0
          return -1; // too big
2415
0
        }
2416
0
        uprv_strncpy(items[n].locale, s, slen);
2417
0
        items[n].locale[slen]=0; // terminate
2418
0
        int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
2419
0
        if(U_FAILURE(*status)) return -1;
2420
0
        if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
2421
            // canonicalization had an effect- copy back
2422
0
            uprv_strncpy(items[n].locale, tmp, clen);
2423
0
            items[n].locale[clen] = 0; // terminate
2424
0
        }
2425
#if defined(ULOC_DEBUG)
2426
        /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2427
#endif
2428
0
        n++;
2429
0
        s = itemEnd;
2430
0
        while(*s==',') { /* eat duplicate commas */
2431
0
            s++;
2432
0
        }
2433
0
        if(n>=items.getCapacity()) { // If we need more items
2434
0
          if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
2435
0
              *status = U_MEMORY_ALLOCATION_ERROR;
2436
0
              return -1;
2437
0
          }
2438
#if defined(ULOC_DEBUG)
2439
          fprintf(stderr,"malloced at size %d\n", items.getCapacity());
2440
#endif
2441
0
        }
2442
0
    }
2443
0
    uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2444
0
    if (U_FAILURE(*status)) {
2445
0
        return -1;
2446
0
    }
2447
0
    LocalMemory<const char*> strs(NULL);
2448
0
    if (strs.allocateInsteadAndReset(n) == NULL) {
2449
0
        *status = U_MEMORY_ALLOCATION_ERROR;
2450
0
        return -1;
2451
0
    }
2452
0
    for(i=0;i<n;i++) {
2453
#if defined(ULOC_DEBUG)
2454
        /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2455
#endif
2456
0
        strs[i]=items[i].locale;
2457
0
    }
2458
0
    res =  uloc_acceptLanguage(result, resultAvailable, outResult,
2459
0
                               strs.getAlias(), n, availableLocales, status);
2460
0
    return res;
2461
0
}
2462
2463
2464
U_CAPI int32_t U_EXPORT2
2465
uloc_acceptLanguage(char *result, int32_t resultAvailable,
2466
                    UAcceptResult *outResult, const char **acceptList,
2467
                    int32_t acceptListCount,
2468
                    UEnumeration* availableLocales,
2469
                    UErrorCode *status)
2470
0
{
2471
0
    int32_t i,j;
2472
0
    int32_t len;
2473
0
    int32_t maxLen=0;
2474
0
    char tmp[ULOC_FULLNAME_CAPACITY+1];
2475
0
    const char *l;
2476
0
    char **fallbackList;
2477
0
    if(U_FAILURE(*status)) {
2478
0
        return -1;
2479
0
    }
2480
0
    fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2481
0
    if(fallbackList==NULL) {
2482
0
        *status = U_MEMORY_ALLOCATION_ERROR;
2483
0
        return -1;
2484
0
    }
2485
0
    for(i=0;i<acceptListCount;i++) {
2486
#if defined(ULOC_DEBUG)
2487
        fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2488
#endif
2489
0
        while((l=uenum_next(availableLocales, NULL, status))) {
2490
#if defined(ULOC_DEBUG)
2491
            fprintf(stderr,"  %s\n", l);
2492
#endif
2493
0
            len = (int32_t)uprv_strlen(l);
2494
0
            if(!uprv_strcmp(acceptList[i], l)) {
2495
0
                if(outResult) {
2496
0
                    *outResult = ULOC_ACCEPT_VALID;
2497
0
                }
2498
#if defined(ULOC_DEBUG)
2499
                fprintf(stderr, "MATCH! %s\n", l);
2500
#endif
2501
0
                if(len>0) {
2502
0
                    uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2503
0
                }
2504
0
                for(j=0;j<i;j++) {
2505
0
                    uprv_free(fallbackList[j]);
2506
0
                }
2507
0
                uprv_free(fallbackList);
2508
0
                return u_terminateChars(result, resultAvailable, len, status);
2509
0
            }
2510
0
            if(len>maxLen) {
2511
0
                maxLen = len;
2512
0
            }
2513
0
        }
2514
0
        uenum_reset(availableLocales, status);
2515
        /* save off parent info */
2516
0
        if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2517
0
            fallbackList[i] = uprv_strdup(tmp);
2518
0
        } else {
2519
0
            fallbackList[i]=0;
2520
0
        }
2521
0
    }
2522
2523
0
    for(maxLen--;maxLen>0;maxLen--) {
2524
0
        for(i=0;i<acceptListCount;i++) {
2525
0
            if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2526
#if defined(ULOC_DEBUG)
2527
                fprintf(stderr,"Try: [%s]", fallbackList[i]);
2528
#endif
2529
0
                while((l=uenum_next(availableLocales, NULL, status))) {
2530
#if defined(ULOC_DEBUG)
2531
                    fprintf(stderr,"  %s\n", l);
2532
#endif
2533
0
                    len = (int32_t)uprv_strlen(l);
2534
0
                    if(!uprv_strcmp(fallbackList[i], l)) {
2535
0
                        if(outResult) {
2536
0
                            *outResult = ULOC_ACCEPT_FALLBACK;
2537
0
                        }
2538
#if defined(ULOC_DEBUG)
2539
                        fprintf(stderr, "fallback MATCH! %s\n", l);
2540
#endif
2541
0
                        if(len>0) {
2542
0
                            uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2543
0
                        }
2544
0
                        for(j=0;j<acceptListCount;j++) {
2545
0
                            uprv_free(fallbackList[j]);
2546
0
                        }
2547
0
                        uprv_free(fallbackList);
2548
0
                        return u_terminateChars(result, resultAvailable, len, status);
2549
0
                    }
2550
0
                }
2551
0
                uenum_reset(availableLocales, status);
2552
2553
0
                if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2554
0
                    uprv_free(fallbackList[i]);
2555
0
                    fallbackList[i] = uprv_strdup(tmp);
2556
0
                } else {
2557
0
                    uprv_free(fallbackList[i]);
2558
0
                    fallbackList[i]=0;
2559
0
                }
2560
0
            }
2561
0
        }
2562
0
        if(outResult) {
2563
0
            *outResult = ULOC_ACCEPT_FAILED;
2564
0
        }
2565
0
    }
2566
0
    for(i=0;i<acceptListCount;i++) {
2567
0
        uprv_free(fallbackList[i]);
2568
0
    }
2569
0
    uprv_free(fallbackList);
2570
0
    return -1;
2571
0
}
2572
2573
U_CAPI const char* U_EXPORT2
2574
uloc_toUnicodeLocaleKey(const char* keyword)
2575
0
{
2576
0
    const char* bcpKey = ulocimp_toBcpKey(keyword);
2577
0
    if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2578
        // unknown keyword, but syntax is fine..
2579
0
        return keyword;
2580
0
    }
2581
0
    return bcpKey;
2582
0
}
2583
2584
U_CAPI const char* U_EXPORT2
2585
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2586
0
{
2587
0
    const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2588
0
    if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2589
        // unknown keyword, but syntax is fine..
2590
0
        return value;
2591
0
    }
2592
0
    return bcpType;
2593
0
}
2594
2595
static UBool
2596
isWellFormedLegacyKey(const char* legacyKey)
2597
0
{
2598
0
    const char* p = legacyKey;
2599
0
    while (*p) {
2600
0
        if (!UPRV_ISALPHANUM(*p)) {
2601
0
            return FALSE;
2602
0
        }
2603
0
        p++;
2604
0
    }
2605
0
    return TRUE;
2606
0
}
2607
2608
static UBool
2609
isWellFormedLegacyType(const char* legacyType)
2610
0
{
2611
0
    const char* p = legacyType;
2612
0
    int32_t alphaNumLen = 0;
2613
0
    while (*p) {
2614
0
        if (*p == '_' || *p == '/' || *p == '-') {
2615
0
            if (alphaNumLen == 0) {
2616
0
                return FALSE;
2617
0
            }
2618
0
            alphaNumLen = 0;
2619
0
        } else if (UPRV_ISALPHANUM(*p)) {
2620
0
            alphaNumLen++;
2621
0
        } else {
2622
0
            return FALSE;
2623
0
        }
2624
0
        p++;
2625
0
    }
2626
0
    return (alphaNumLen != 0);
2627
0
}
2628
2629
U_CAPI const char* U_EXPORT2
2630
uloc_toLegacyKey(const char* keyword)
2631
0
{
2632
0
    const char* legacyKey = ulocimp_toLegacyKey(keyword);
2633
0
    if (legacyKey == NULL) {
2634
        // Checks if the specified locale key is well-formed with the legacy locale syntax.
2635
        //
2636
        // Note:
2637
        //  LDML/CLDR provides some definition of keyword syntax in
2638
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2639
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2640
        //  Keys can only consist of [0-9a-zA-Z].
2641
0
        if (isWellFormedLegacyKey(keyword)) {
2642
0
            return keyword;
2643
0
        }
2644
0
    }
2645
0
    return legacyKey;
2646
0
}
2647
2648
U_CAPI const char* U_EXPORT2
2649
uloc_toLegacyType(const char* keyword, const char* value)
2650
0
{
2651
0
    const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2652
0
    if (legacyType == NULL) {
2653
        // Checks if the specified locale type is well-formed with the legacy locale syntax.
2654
        //
2655
        // Note:
2656
        //  LDML/CLDR provides some definition of keyword syntax in
2657
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2658
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2659
        //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2660
        //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2661
0
        if (isWellFormedLegacyType(value)) {
2662
0
            return value;
2663
0
        }
2664
0
    }
2665
0
    return legacyType;
2666
0
}
2667
2668
/*eof*/