Coverage Report

Created: 2026-03-12 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/icu/source/common/uloc.cpp
Line
Count
Source
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 1997-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*
9
* File ULOC.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   04/01/97    aliu        Creation.
15
*   08/21/98    stephen     JDK 1.2 sync
16
*   12/08/98    rtg         New Locale implementation and C API
17
*   03/15/99    damiba      overhaul.
18
*   04/06/99    stephen     changed setDefault() to realloc and copy
19
*   06/14/99    stephen     Changed calls to ures_open for new params
20
*   07/21/99    stephen     Modified setDefault() to propagate to C++
21
*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
22
*                           brought canonicalization code into line with spec
23
*****************************************************************************/
24
25
/*
26
   POSIX's locale format, from putil.c: [no spaces]
27
28
     ll [ _CC ] [ . MM ] [ @ VV]
29
30
     l = lang, C = ctry, M = charmap, V = variant
31
*/
32
33
#include "unicode/utypes.h"
34
#include "unicode/ustring.h"
35
#include "unicode/uloc.h"
36
37
#include "putilimp.h"
38
#include "ustr_imp.h"
39
#include "ulocimp.h"
40
#include "umutex.h"
41
#include "cstring.h"
42
#include "cmemory.h"
43
#include "locmap.h"
44
#include "uarrsort.h"
45
#include "uenumimp.h"
46
#include "uassert.h"
47
#include "charstr.h"
48
49
#include <stdio.h> /* for sprintf */
50
51
U_NAMESPACE_USE
52
53
/* ### Declarations **************************************************/
54
55
/* Locale stuff from locid.cpp */
56
U_CFUNC void locale_set_default(const char *id);
57
U_CFUNC const char *locale_get_default(void);
58
U_CFUNC int32_t
59
locale_getKeywords(const char *localeID,
60
            char prev,
61
            char *keywords, int32_t keywordCapacity,
62
            char *values, int32_t valuesCapacity, int32_t *valLen,
63
            UBool valuesToo,
64
            UErrorCode *status);
65
66
/* ### Data tables **************************************************/
67
68
/**
69
 * Table of language codes, both 2- and 3-letter, with preference
70
 * given to 2-letter codes where possible.  Includes 3-letter codes
71
 * that lack a 2-letter equivalent.
72
 *
73
 * This list must be in sorted order.  This list is returned directly
74
 * to the user by some API.
75
 *
76
 * This list must be kept in sync with LANGUAGES_3, with corresponding
77
 * entries matched.
78
 *
79
 * This table should be terminated with a NULL entry, followed by a
80
 * second list, and another NULL entry.  The first list is visible to
81
 * user code when this array is returned by API.  The second list
82
 * contains codes we support, but do not expose through user API.
83
 *
84
 * Notes
85
 *
86
 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87
 * include the revisions up to 2001/7/27 *CWB*
88
 *
89
 * The 3 character codes are the terminology codes like RFC 3066.  This
90
 * is compatible with prior ICU codes
91
 *
92
 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93
 * table but now at the end of the table because 3 character codes are
94
 * duplicates.  This avoids bad searches going from 3 to 2 character
95
 * codes.
96
 *
97
 * The range qaa-qtz is reserved for local use
98
 */
99
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
100
/* ISO639 table version is 20150505 */
101
/* Subsequent hand addition of selected languages */
102
static const char * const LANGUAGES[] = {
103
    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
104
    "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
105
    "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
106
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
107
    "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",
108
    "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
109
    "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
110
    "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",
111
    "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",
112
    "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",
113
    "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",
114
    "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",
115
    "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",
116
    "cs",  "csb", "cu",  "cv",  "cy",
117
    "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",
118
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
119
    "dyo", "dyu", "dz",  "dzg",
120
    "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",
121
    "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",
122
    "ext",
123
    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",
124
    "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",
125
    "frs", "fur", "fy",
126
    "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
127
    "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",
128
    "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",
129
    "gur", "guz", "gv",  "gwi",
130
    "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",
131
    "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",
132
    "hup", "hy",  "hz",
133
    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",
134
    "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",
135
    "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
136
    "jv",
137
    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
138
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",
139
    "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",
140
    "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",
141
    "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",
142
    "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",
143
    "kv",  "kw",  "ky",
144
    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",
145
    "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",
146
    "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",
147
    "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",
148
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
149
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",
150
    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
151
    "ml",  "mn",  "mnc", "mni", "moh", "mos", "mr",  "mrj",
152
    "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",
153
    "my",  "mye", "myv", "mzn",
154
    "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",
155
    "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",
156
    "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",
157
    "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",
158
    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",
159
    "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
160
    "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",
161
    "pon", "prg", "pro", "ps",  "pt",
162
    "qu",  "quc", "qug",
163
    "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",
164
    "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",
165
    "rw",  "rwk",
166
    "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",
167
    "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",
168
    "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",
169
    "sgs", "shi", "shn", "shu", "si",  "sid", "sk",
170
    "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",
171
    "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",
172
    "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",
173
    "sv",  "sw",  "swb", "swc", "syc", "syr", "szl",
174
    "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",
175
    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr", "tl",
176
    "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tpi",
177
    "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",
178
    "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",
179
    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
180
    "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vo",
181
    "vot", "vro", "vun",
182
    "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",
183
    "xal", "xh",  "xmf", "xog",
184
    "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",
185
    "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",
186
    "zun", "zxx", "zza",
187
NULL,
188
    "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
189
NULL
190
};
191
192
static const char* const DEPRECATED_LANGUAGES[]={
193
    "in", "iw", "ji", "jw", NULL, NULL
194
};
195
static const char* const REPLACEMENT_LANGUAGES[]={
196
    "id", "he", "yi", "jv", NULL, NULL
197
};
198
199
/**
200
 * Table of 3-letter language codes.
201
 *
202
 * This is a lookup table used to convert 3-letter language codes to
203
 * their 2-letter equivalent, where possible.  It must be kept in sync
204
 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
205
 * same language as LANGUAGES_3[i].  The commented-out lines are
206
 * copied from LANGUAGES to make eyeballing this baby easier.
207
 *
208
 * Where a 3-letter language code has no 2-letter equivalent, the
209
 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
210
 *
211
 * This table should be terminated with a NULL entry, followed by a
212
 * second list, and another NULL entry.  The two lists correspond to
213
 * the two lists in LANGUAGES.
214
 */
215
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
216
/* ISO639 table version is 20150505 */
217
/* Subsequent hand addition of selected languages */
218
static const char * const LANGUAGES_3[] = {
219
    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
220
    "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
221
    "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
222
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
223
    "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
224
    "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
225
    "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
226
    "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
227
    "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
228
    "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
229
    "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
230
    "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
231
    "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
232
    "ces", "csb", "chu", "chv", "cym",
233
    "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
234
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
235
    "dyo", "dyu", "dzo", "dzg",
236
    "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
237
    "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
238
    "ext",
239
    "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
240
    "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
241
    "frs", "fur", "fry",
242
    "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
243
    "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
244
    "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
245
    "gur", "guz", "glv", "gwi",
246
    "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
247
    "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
248
    "hup", "hye", "her",
249
    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
250
    "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
251
    "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
252
    "jav",
253
    "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
254
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
255
    "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
256
    "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
257
    "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
258
    "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
259
    "kom", "cor", "kir",
260
    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
261
    "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
262
    "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
263
    "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
264
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
265
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
266
    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
267
    "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
268
    "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
269
    "mya", "mye", "myv", "mzn",
270
    "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
271
    "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
272
    "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
273
    "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
274
    "oci", "oji", "orm", "ori", "oss", "osa", "ota",
275
    "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
276
    "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
277
    "pon", "prg", "pro", "pus", "por",
278
    "que", "quc", "qug",
279
    "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
280
    "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
281
    "kin", "rwk",
282
    "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
283
    "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
284
    "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
285
    "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
286
    "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
287
    "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
288
    "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
289
    "swe", "swa", "swb", "swc", "syc", "syr", "szl",
290
    "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
291
    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
292
    "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
293
    "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
294
    "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
295
    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
296
    "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
297
    "vot", "vro", "vun",
298
    "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
299
    "xal", "xho", "xmf", "xog",
300
    "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
301
    "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
302
    "zun", "zxx", "zza",
303
NULL,
304
/*  "in",  "iw",  "ji",  "jw",  "sh",                          */
305
    "ind", "heb", "yid", "jaw", "srp",
306
NULL
307
};
308
309
/**
310
 * Table of 2-letter country codes.
311
 *
312
 * This list must be in sorted order.  This list is returned directly
313
 * to the user by some API.
314
 *
315
 * This list must be kept in sync with COUNTRIES_3, with corresponding
316
 * entries matched.
317
 *
318
 * This table should be terminated with a NULL entry, followed by a
319
 * second list, and another NULL entry.  The first list is visible to
320
 * user code when this array is returned by API.  The second list
321
 * contains codes we support, but do not expose through user API.
322
 *
323
 * Notes:
324
 *
325
 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
326
 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
327
 * new codes keeping the old ones for compatibility updated to include
328
 * 1999/12/03 revisions *CWB*
329
 *
330
 * RO(ROM) is now RO(ROU) according to
331
 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
332
 */
333
static const char * const COUNTRIES[] = {
334
    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
335
    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
336
    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
337
    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
338
    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
339
    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
340
    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
341
    "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
342
    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
343
    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
344
    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
345
    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
346
    "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
347
    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
348
    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
349
    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
350
    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
351
    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
352
    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
353
    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
354
    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
355
    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
356
    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
357
    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
358
    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
359
    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
360
    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
361
    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
362
    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
363
    "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
364
NULL,
365
    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
366
NULL
367
};
368
369
static const char* const DEPRECATED_COUNTRIES[] = {
370
    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
371
};
372
static const char* const REPLACEMENT_COUNTRIES[] = {
373
/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
374
    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
375
};
376
377
/**
378
 * Table of 3-letter country codes.
379
 *
380
 * This is a lookup table used to convert 3-letter country codes to
381
 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
382
 * For all valid i, COUNTRIES[i] must refer to the same country as
383
 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
384
 * to make eyeballing this baby easier.
385
 *
386
 * This table should be terminated with a NULL entry, followed by a
387
 * second list, and another NULL entry.  The two lists correspond to
388
 * the two lists in COUNTRIES.
389
 */
390
static const char * const COUNTRIES_3[] = {
391
/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
392
    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
393
/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
394
    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
395
/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
396
    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
397
/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
398
    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
399
/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
400
    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
401
/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
402
    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
403
/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
404
    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
405
/*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
406
    "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
407
/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
408
    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
409
/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
410
    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
411
/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
412
    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
413
/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
414
    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
415
/*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
416
    "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
417
/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
418
    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
419
/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
420
    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
421
/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
422
    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
423
/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
424
    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
425
/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
426
    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
427
/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
428
    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
429
/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
430
    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
431
/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
432
    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
433
/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
434
    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
435
/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
436
    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
437
/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
438
    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
439
/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
440
    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
441
/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
442
    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
443
/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
444
    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
445
/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
446
    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
447
/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
448
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
449
/*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
450
    "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
451
NULL,
452
/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
453
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
454
NULL
455
};
456
457
typedef struct CanonicalizationMap {
458
    const char *id;          /* input ID */
459
    const char *canonicalID; /* canonicalized output ID */
460
    const char *keyword;     /* keyword, or NULL if none */
461
    const char *value;       /* keyword value, or NULL if kw==NULL */
462
} CanonicalizationMap;
463
464
/**
465
 * A map to canonicalize locale IDs.  This handles a variety of
466
 * different semantic kinds of transformations.
467
 */
468
static const CanonicalizationMap CANONICALIZE_MAP[] = {
469
    { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
470
    { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
471
    { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
472
    { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
473
    { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
474
    { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
475
    { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
476
    { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
477
    { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
478
    { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
479
    { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
480
    { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
481
    { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
482
    { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
483
    { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
484
    { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
485
    { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
486
    { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
487
    { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
488
    { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
489
    { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
490
    { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
491
    { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
492
    { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
493
    { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
494
    { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
495
    { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
496
    { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
497
    { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
498
    { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
499
    { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
500
    { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
501
    { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
502
    { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
503
    { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
504
    { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
505
    { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
506
    { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
507
    { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
508
    { "zh_GAN",         "gan", NULL, NULL }, /* registered name */
509
    { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
510
    { "zh_HAKKA",       "hak", NULL, NULL }, /* registered name */
511
    { "zh_MIN_NAN",     "nan", NULL, NULL }, /* registered name */
512
    { "zh_WUU",         "wuu", NULL, NULL }, /* registered name */
513
    { "zh_XIANG",       "hsn", NULL, NULL }, /* registered name */
514
    { "zh_YUE",         "yue", NULL, NULL }, /* registered name */
515
};
516
517
typedef struct VariantMap {
518
    const char *variant;          /* input ID */
519
    const char *keyword;     /* keyword, or NULL if none */
520
    const char *value;       /* keyword value, or NULL if kw==NULL */
521
} VariantMap;
522
523
static const VariantMap VARIANT_MAP[] = {
524
    { "EURO",   "currency", "EUR" },
525
    { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
526
    { "STROKE", "collation", "stroke" }  /* Solaris variant */
527
};
528
529
/* ### BCP47 Conversion *******************************************/
530
/* Test if the locale id has BCP47 u extension and does not have '@' */
531
0
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
532
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
533
#define _ConvertBCP47(finalID, id, buffer, length,err) \
534
0
        if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 ||  \
535
0
                U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
536
0
            finalID=id; \
537
0
            if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
538
0
        } else { \
539
0
            finalID=buffer; \
540
0
        }
541
/* Gets the size of the shortest subtag in the given localeID. */
542
0
static int32_t getShortestSubtagLength(const char *localeID) {
543
0
    int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
544
0
    int32_t length = localeIDLength;
545
0
    int32_t tmpLength = 0;
546
0
    int32_t i;
547
0
    UBool reset = TRUE;
548
549
0
    for (i = 0; i < localeIDLength; i++) {
550
0
        if (localeID[i] != '_' && localeID[i] != '-') {
551
0
            if (reset) {
552
0
                tmpLength = 0;
553
0
                reset = FALSE;
554
0
            }
555
0
            tmpLength++;
556
0
        } else {
557
0
            if (tmpLength != 0 && tmpLength < length) {
558
0
                length = tmpLength;
559
0
            }
560
0
            reset = TRUE;
561
0
        }
562
0
    }
563
564
0
    return length;
565
0
}
566
567
/* ### Keywords **************************************************/
568
0
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
569
0
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
570
/* Punctuation/symbols allowed in legacy key values */
571
0
#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
572
573
0
#define ULOC_KEYWORD_BUFFER_LEN 25
574
0
#define ULOC_MAX_NO_KEYWORDS 25
575
576
U_CAPI const char * U_EXPORT2
577
0
locale_getKeywordsStart(const char *localeID) {
578
0
    const char *result = NULL;
579
0
    if((result = uprv_strchr(localeID, '@')) != NULL) {
580
0
        return result;
581
0
    }
582
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
583
    else {
584
        /* We do this because the @ sign is variant, and the @ sign used on one
585
        EBCDIC machine won't be compiled the same way on other EBCDIC based
586
        machines. */
587
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
588
        const uint8_t *charToFind = ebcdicSigns;
589
        while(*charToFind) {
590
            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
591
                return result;
592
            }
593
            charToFind++;
594
        }
595
    }
596
#endif
597
0
    return NULL;
598
0
}
599
600
/**
601
 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
602
 * @param keywordName incoming name to be canonicalized
603
 * @param status return status (keyword too long)
604
 * @return length of the keyword name
605
 */
606
static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
607
0
{
608
0
  int32_t keywordNameLen = 0;
609
610
0
  for (; *keywordName != 0; keywordName++) {
611
0
    if (!UPRV_ISALPHANUM(*keywordName)) {
612
0
      *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
613
0
      return 0;
614
0
    }
615
0
    if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
616
0
      buf[keywordNameLen++] = uprv_tolower(*keywordName);
617
0
    } else {
618
      /* keyword name too long for internal buffer */
619
0
      *status = U_INTERNAL_PROGRAM_ERROR;
620
0
      return 0;
621
0
    }
622
0
  }
623
0
  if (keywordNameLen == 0) {
624
0
    *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
625
0
    return 0;
626
0
  }
627
0
  buf[keywordNameLen] = 0; /* terminate */
628
629
0
  return keywordNameLen;
630
0
}
631
632
typedef struct {
633
    char keyword[ULOC_KEYWORD_BUFFER_LEN];
634
    int32_t keywordLen;
635
    const char *valueStart;
636
    int32_t valueLen;
637
} KeywordStruct;
638
639
static int32_t U_CALLCONV
640
0
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
641
0
    const char* leftString = ((const KeywordStruct *)left)->keyword;
642
0
    const char* rightString = ((const KeywordStruct *)right)->keyword;
643
0
    return uprv_strcmp(leftString, rightString);
644
0
}
645
646
/**
647
 * Both addKeyword and addValue must already be in canonical form.
648
 * Either both addKeyword and addValue are NULL, or neither is NULL.
649
 * If they are not NULL they must be zero terminated.
650
 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
651
 */
652
static int32_t
653
_getKeywords(const char *localeID,
654
             char prev,
655
             char *keywords, int32_t keywordCapacity,
656
             char *values, int32_t valuesCapacity, int32_t *valLen,
657
             UBool valuesToo,
658
             const char* addKeyword,
659
             const char* addValue,
660
             UErrorCode *status)
661
0
{
662
0
    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
663
664
0
    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
665
0
    int32_t numKeywords = 0;
666
0
    const char* pos = localeID;
667
0
    const char* equalSign = NULL;
668
0
    const char* semicolon = NULL;
669
0
    int32_t i = 0, j, n;
670
0
    int32_t keywordsLen = 0;
671
0
    int32_t valuesLen = 0;
672
673
0
    if(prev == '@') { /* start of keyword definition */
674
        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
675
0
        do {
676
0
            UBool duplicate = FALSE;
677
            /* skip leading spaces */
678
0
            while(*pos == ' ') {
679
0
                pos++;
680
0
            }
681
0
            if (!*pos) { /* handle trailing "; " */
682
0
                break;
683
0
            }
684
0
            if(numKeywords == maxKeywords) {
685
0
                *status = U_INTERNAL_PROGRAM_ERROR;
686
0
                return 0;
687
0
            }
688
0
            equalSign = uprv_strchr(pos, '=');
689
0
            semicolon = uprv_strchr(pos, ';');
690
            /* lack of '=' [foo@currency] is illegal */
691
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
692
0
            if(!equalSign || (semicolon && semicolon<equalSign)) {
693
0
                *status = U_INVALID_FORMAT_ERROR;
694
0
                return 0;
695
0
            }
696
            /* need to normalize both keyword and keyword name */
697
0
            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
698
                /* keyword name too long for internal buffer */
699
0
                *status = U_INTERNAL_PROGRAM_ERROR;
700
0
                return 0;
701
0
            }
702
0
            for(i = 0, n = 0; i < equalSign - pos; ++i) {
703
0
                if (pos[i] != ' ') {
704
0
                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
705
0
                }
706
0
            }
707
708
            /* zero-length keyword is an error. */
709
0
            if (n == 0) {
710
0
                *status = U_INVALID_FORMAT_ERROR;
711
0
                return 0;
712
0
            }
713
714
0
            keywordList[numKeywords].keyword[n] = 0;
715
0
            keywordList[numKeywords].keywordLen = n;
716
            /* now grab the value part. First we skip the '=' */
717
0
            equalSign++;
718
            /* then we leading spaces */
719
0
            while(*equalSign == ' ') {
720
0
                equalSign++;
721
0
            }
722
723
            /* Premature end or zero-length value */
724
0
            if (!*equalSign || equalSign == semicolon) {
725
0
                *status = U_INVALID_FORMAT_ERROR;
726
0
                return 0;
727
0
            }
728
729
0
            keywordList[numKeywords].valueStart = equalSign;
730
731
0
            pos = semicolon;
732
0
            i = 0;
733
0
            if(pos) {
734
0
                while(*(pos - i - 1) == ' ') {
735
0
                    i++;
736
0
                }
737
0
                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
738
0
                pos++;
739
0
            } else {
740
0
                i = (int32_t)uprv_strlen(equalSign);
741
0
                while(i && equalSign[i-1] == ' ') {
742
0
                    i--;
743
0
                }
744
0
                keywordList[numKeywords].valueLen = i;
745
0
            }
746
            /* If this is a duplicate keyword, then ignore it */
747
0
            for (j=0; j<numKeywords; ++j) {
748
0
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
749
0
                    duplicate = TRUE;
750
0
                    break;
751
0
                }
752
0
            }
753
0
            if (!duplicate) {
754
0
                ++numKeywords;
755
0
            }
756
0
        } while(pos);
757
758
        /* Handle addKeyword/addValue. */
759
0
        if (addKeyword != NULL) {
760
0
            UBool duplicate = FALSE;
761
0
            U_ASSERT(addValue != NULL);
762
            /* Search for duplicate; if found, do nothing. Explicit keyword
763
               overrides addKeyword. */
764
0
            for (j=0; j<numKeywords; ++j) {
765
0
                if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
766
0
                    duplicate = TRUE;
767
0
                    break;
768
0
                }
769
0
            }
770
0
            if (!duplicate) {
771
0
                if (numKeywords == maxKeywords) {
772
0
                    *status = U_INTERNAL_PROGRAM_ERROR;
773
0
                    return 0;
774
0
                }
775
0
                uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
776
0
                keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
777
0
                keywordList[numKeywords].valueStart = addValue;
778
0
                keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
779
0
                ++numKeywords;
780
0
            }
781
0
        } else {
782
0
            U_ASSERT(addValue == NULL);
783
0
        }
784
785
        /* now we have a list of keywords */
786
        /* we need to sort it */
787
0
        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
788
789
        /* Now construct the keyword part */
790
0
        for(i = 0; i < numKeywords; i++) {
791
0
            if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
792
0
                uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
793
0
                if(valuesToo) {
794
0
                    keywords[keywordsLen + keywordList[i].keywordLen] = '=';
795
0
                } else {
796
0
                    keywords[keywordsLen + keywordList[i].keywordLen] = 0;
797
0
                }
798
0
            }
799
0
            keywordsLen += keywordList[i].keywordLen + 1;
800
0
            if(valuesToo) {
801
0
                if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
802
0
                    uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
803
0
                }
804
0
                keywordsLen += keywordList[i].valueLen;
805
806
0
                if(i < numKeywords - 1) {
807
0
                    if(keywordsLen < keywordCapacity) {
808
0
                        keywords[keywordsLen] = ';';
809
0
                    }
810
0
                    keywordsLen++;
811
0
                }
812
0
            }
813
0
            if(values) {
814
0
                if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
815
0
                    uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
816
0
                    values[valuesLen + keywordList[i].valueLen] = 0;
817
0
                }
818
0
                valuesLen += keywordList[i].valueLen + 1;
819
0
            }
820
0
        }
821
0
        if(values) {
822
0
            values[valuesLen] = 0;
823
0
            if(valLen) {
824
0
                *valLen = valuesLen;
825
0
            }
826
0
        }
827
0
        return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
828
0
    } else {
829
0
        return 0;
830
0
    }
831
0
}
832
833
U_CFUNC int32_t
834
locale_getKeywords(const char *localeID,
835
                   char prev,
836
                   char *keywords, int32_t keywordCapacity,
837
                   char *values, int32_t valuesCapacity, int32_t *valLen,
838
                   UBool valuesToo,
839
0
                   UErrorCode *status) {
840
0
    return _getKeywords(localeID, prev, keywords, keywordCapacity,
841
0
                        values, valuesCapacity, valLen, valuesToo,
842
0
                        NULL, NULL, status);
843
0
}
844
845
U_CAPI int32_t U_EXPORT2
846
uloc_getKeywordValue(const char* localeID,
847
                     const char* keywordName,
848
                     char* buffer, int32_t bufferCapacity,
849
                     UErrorCode* status)
850
0
{
851
0
    const char* startSearchHere = NULL;
852
0
    const char* nextSeparator = NULL;
853
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
854
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
855
0
    int32_t result = 0;
856
857
0
    if(status && U_SUCCESS(*status) && localeID) {
858
0
      char tempBuffer[ULOC_FULLNAME_CAPACITY];
859
0
      const char* tmpLocaleID;
860
861
0
      if (keywordName == NULL || keywordName[0] == 0) {
862
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
863
0
        return 0;
864
0
      }
865
866
0
      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
867
0
      if(U_FAILURE(*status)) {
868
0
        return 0;
869
0
      }
870
871
0
      if (_hasBCP47Extension(localeID)) {
872
0
          _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
873
0
      } else {
874
0
          tmpLocaleID=localeID;
875
0
      }
876
877
0
      startSearchHere = locale_getKeywordsStart(tmpLocaleID);
878
0
      if(startSearchHere == NULL) {
879
          /* no keywords, return at once */
880
0
          return 0;
881
0
      }
882
883
      /* find the first keyword */
884
0
      while(startSearchHere) {
885
0
          const char* keyValueTail;
886
0
          int32_t keyValueLen;
887
888
0
          startSearchHere++; /* skip @ or ; */
889
0
          nextSeparator = uprv_strchr(startSearchHere, '=');
890
0
          if(!nextSeparator) {
891
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
892
0
              return 0;
893
0
          }
894
          /* strip leading & trailing spaces (TC decided to tolerate these) */
895
0
          while(*startSearchHere == ' ') {
896
0
              startSearchHere++;
897
0
          }
898
0
          keyValueTail = nextSeparator;
899
0
          while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
900
0
              keyValueTail--;
901
0
          }
902
          /* now keyValueTail points to first char after the keyName */
903
          /* copy & normalize keyName from locale */
904
0
          if (startSearchHere == keyValueTail) {
905
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
906
0
              return 0;
907
0
          }
908
0
          keyValueLen = 0;
909
0
          while (startSearchHere < keyValueTail) {
910
0
            if (!UPRV_ISALPHANUM(*startSearchHere)) {
911
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
912
0
              return 0;
913
0
            }
914
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
915
0
              localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
916
0
            } else {
917
              /* keyword name too long for internal buffer */
918
0
              *status = U_INTERNAL_PROGRAM_ERROR;
919
0
              return 0;
920
0
            }
921
0
          }
922
0
          localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
923
924
0
          startSearchHere = uprv_strchr(nextSeparator, ';');
925
926
0
          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
927
               /* current entry matches the keyword. */
928
0
             nextSeparator++; /* skip '=' */
929
              /* First strip leading & trailing spaces (TC decided to tolerate these) */
930
0
              while(*nextSeparator == ' ') {
931
0
                nextSeparator++;
932
0
              }
933
0
              keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
934
0
              while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
935
0
                keyValueTail--;
936
0
              }
937
              /* Now copy the value, but check well-formedness */
938
0
              if (nextSeparator == keyValueTail) {
939
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
940
0
                return 0;
941
0
              }
942
0
              keyValueLen = 0;
943
0
              while (nextSeparator < keyValueTail) {
944
0
                if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
945
0
                  *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
946
0
                  return 0;
947
0
                }
948
0
                if (keyValueLen < bufferCapacity) {
949
                  /* Should we lowercase value to return here? Tests expect as-is. */
950
0
                  buffer[keyValueLen++] = *nextSeparator++;
951
0
                } else { /* keep advancing so we return correct length in case of overflow */
952
0
                  keyValueLen++;
953
0
                  nextSeparator++;
954
0
                }
955
0
              }
956
0
              result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
957
0
              return result;
958
0
          }
959
0
      }
960
0
    }
961
0
    return 0;
962
0
}
963
964
U_CAPI int32_t U_EXPORT2
965
uloc_setKeywordValue(const char* keywordName,
966
                     const char* keywordValue,
967
                     char* buffer, int32_t bufferCapacity,
968
                     UErrorCode* status)
969
0
{
970
    /* TODO: sorting. removal. */
971
0
    int32_t keywordNameLen;
972
0
    int32_t keywordValueLen;
973
0
    int32_t bufLen;
974
0
    int32_t needLen = 0;
975
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
976
0
    char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
977
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
978
0
    int32_t rc;
979
0
    char* nextSeparator = NULL;
980
0
    char* nextEqualsign = NULL;
981
0
    char* startSearchHere = NULL;
982
0
    char* keywordStart = NULL;
983
0
    CharString updatedKeysAndValues;
984
0
    int32_t updatedKeysAndValuesLen;
985
0
    UBool handledInputKeyAndValue = FALSE;
986
0
    char keyValuePrefix = '@';
987
988
0
    if(U_FAILURE(*status)) {
989
0
        return -1;
990
0
    }
991
0
    if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
992
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
993
0
        return 0;
994
0
    }
995
0
    bufLen = (int32_t)uprv_strlen(buffer);
996
0
    if(bufferCapacity<bufLen) {
997
        /* The capacity is less than the length?! Is this NULL terminated? */
998
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
999
0
        return 0;
1000
0
    }
1001
0
    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
1002
0
    if(U_FAILURE(*status)) {
1003
0
        return 0;
1004
0
    }
1005
1006
0
    keywordValueLen = 0;
1007
0
    if(keywordValue) {
1008
0
        while (*keywordValue != 0) {
1009
0
            if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
1010
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
1011
0
                return 0;
1012
0
            }
1013
0
            if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
1014
                /* Should we force lowercase in value to set? */
1015
0
                keywordValueBuffer[keywordValueLen++] = *keywordValue++;
1016
0
            } else {
1017
                /* keywordValue too long for internal buffer */
1018
0
                *status = U_INTERNAL_PROGRAM_ERROR;
1019
0
                return 0;
1020
0
            }
1021
0
        }
1022
0
    }
1023
0
    keywordValueBuffer[keywordValueLen] = 0; /* terminate */
1024
1025
0
    startSearchHere = (char*)locale_getKeywordsStart(buffer);
1026
0
    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
1027
0
        if(keywordValueLen == 0) { /* no keywords = nothing to remove */
1028
0
            return bufLen;
1029
0
        }
1030
1031
0
        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1032
0
        if(startSearchHere) { /* had a single @ */
1033
0
            needLen--; /* already had the @ */
1034
            /* startSearchHere points at the @ */
1035
0
        } else {
1036
0
            startSearchHere=buffer+bufLen;
1037
0
        }
1038
0
        if(needLen >= bufferCapacity) {
1039
0
            *status = U_BUFFER_OVERFLOW_ERROR;
1040
0
            return needLen; /* no change */
1041
0
        }
1042
0
        *startSearchHere++ = '@';
1043
0
        uprv_strcpy(startSearchHere, keywordNameBuffer);
1044
0
        startSearchHere += keywordNameLen;
1045
0
        *startSearchHere++ = '=';
1046
0
        uprv_strcpy(startSearchHere, keywordValueBuffer);
1047
0
        return needLen;
1048
0
    } /* end shortcut - no @ */
1049
1050
0
    keywordStart = startSearchHere;
1051
    /* search for keyword */
1052
0
    while(keywordStart) {
1053
0
        const char* keyValueTail;
1054
0
        int32_t keyValueLen;
1055
1056
0
        keywordStart++; /* skip @ or ; */
1057
0
        nextEqualsign = uprv_strchr(keywordStart, '=');
1058
0
        if (!nextEqualsign) {
1059
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
1060
0
            return 0;
1061
0
        }
1062
        /* strip leading & trailing spaces (TC decided to tolerate these) */
1063
0
        while(*keywordStart == ' ') {
1064
0
            keywordStart++;
1065
0
        }
1066
0
        keyValueTail = nextEqualsign;
1067
0
        while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
1068
0
            keyValueTail--;
1069
0
        }
1070
        /* now keyValueTail points to first char after the keyName */
1071
        /* copy & normalize keyName from locale */
1072
0
        if (keywordStart == keyValueTail) {
1073
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
1074
0
            return 0;
1075
0
        }
1076
0
        keyValueLen = 0;
1077
0
        while (keywordStart < keyValueTail) {
1078
0
            if (!UPRV_ISALPHANUM(*keywordStart)) {
1079
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1080
0
                return 0;
1081
0
            }
1082
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
1083
0
                localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1084
0
            } else {
1085
                /* keyword name too long for internal buffer */
1086
0
                *status = U_INTERNAL_PROGRAM_ERROR;
1087
0
                return 0;
1088
0
            }
1089
0
        }
1090
0
        localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
1091
1092
0
        nextSeparator = uprv_strchr(nextEqualsign, ';');
1093
1094
        /* start processing the value part */
1095
0
        nextEqualsign++; /* skip '=' */
1096
        /* First strip leading & trailing spaces (TC decided to tolerate these) */
1097
0
        while(*nextEqualsign == ' ') {
1098
0
            nextEqualsign++;
1099
0
        }
1100
0
        keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1101
0
        while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1102
0
            keyValueTail--;
1103
0
        }
1104
0
        if (nextEqualsign == keyValueTail) {
1105
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1106
0
            return 0;
1107
0
        }
1108
1109
0
        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1110
0
        if(rc == 0) {
1111
            /* Current entry matches the input keyword. Update the entry */
1112
0
            if(keywordValueLen > 0) { /* updating a value */
1113
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1114
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1115
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1116
0
                updatedKeysAndValues.append('=', *status);
1117
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1118
0
            } /* else removing this entry, don't emit anything */
1119
0
            handledInputKeyAndValue = TRUE;
1120
0
        } else {
1121
           /* input keyword sorts earlier than current entry, add before current entry */
1122
0
            if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1123
                /* insert new entry at this location */
1124
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1125
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1126
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1127
0
                updatedKeysAndValues.append('=', *status);
1128
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1129
0
                handledInputKeyAndValue = TRUE;
1130
0
            }
1131
            /* copy the current entry */
1132
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1133
0
            keyValuePrefix = ';'; /* for any subsequent key-value pair */
1134
0
            updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1135
0
            updatedKeysAndValues.append('=', *status);
1136
0
            updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status);
1137
0
        }
1138
0
        if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1139
            /* append new entry at the end, it sorts later than existing entries */
1140
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1141
            /* skip keyValuePrefix update, no subsequent key-value pair */
1142
0
            updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1143
0
            updatedKeysAndValues.append('=', *status);
1144
0
            updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1145
0
            handledInputKeyAndValue = TRUE;
1146
0
        }
1147
0
        keywordStart = nextSeparator;
1148
0
    } /* end loop searching */
1149
1150
    /* Any error from updatedKeysAndValues.append above would be internal and not due to
1151
     * problems with the passed-in locale. So if we did encounter problems with the
1152
     * passed-in locale above, those errors took precedence and overrode any error
1153
     * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1154
     * are errors here they are from updatedKeysAndValues.append; they do cause an
1155
     * error return but the passed-in locale is unmodified and the original bufLen is
1156
     * returned.
1157
     */
1158
0
    if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1159
        /* if input key/value specified removal of a keyword not present in locale, or
1160
         * there was an error in CharString.append, leave original locale alone. */
1161
0
        return bufLen;
1162
0
    }
1163
1164
0
    updatedKeysAndValuesLen = updatedKeysAndValues.length();
1165
    /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1166
0
    needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
1167
0
    if(needLen >= bufferCapacity) {
1168
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1169
0
        return needLen; /* no change */
1170
0
    }
1171
0
    if (updatedKeysAndValuesLen > 0) {
1172
0
        uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
1173
0
    }
1174
0
    buffer[needLen]=0;
1175
0
    return needLen;
1176
0
}
1177
1178
/* ### ID parsing implementation **************************************************/
1179
1180
92.7k
#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1181
1182
/*returns TRUE if one of the special prefixes is here (s=string)
1183
  'x-' or 'i-' */
1184
46.3k
#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1185
1186
/* Dot terminates it because of POSIX form  where dot precedes the codepage
1187
 * except for variant
1188
 */
1189
521k
#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1190
1191
0
static char* _strnchr(const char* str, int32_t len, char c) {
1192
0
    U_ASSERT(str != 0 && len >= 0);
1193
0
    while (len-- != 0) {
1194
0
        char d = *str;
1195
0
        if (d == c) {
1196
0
            return (char*) str;
1197
0
        } else if (d == 0) {
1198
0
            break;
1199
0
        }
1200
0
        ++str;
1201
0
    }
1202
0
    return NULL;
1203
0
}
1204
1205
/**
1206
 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1207
 * a NULL entry, followed by more entries, and a second NULL entry.
1208
 *
1209
 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1210
 * COUNTRIES_3.
1211
 */
1212
static int16_t _findIndex(const char* const* list, const char* key)
1213
1.55k
{
1214
1.55k
    const char* const* anchor = list;
1215
1.55k
    int32_t pass = 0;
1216
1217
    /* Make two passes through two NULL-terminated arrays at 'list' */
1218
2.44k
    while (pass++ < 2) {
1219
516k
        while (*list) {
1220
515k
            if (uprv_strcmp(key, *list) == 0) {
1221
1.11k
                return (int16_t)(list - anchor);
1222
1.11k
            }
1223
514k
            list++;
1224
514k
        }
1225
886
        ++list;     /* skip final NULL *CWB*/
1226
886
    }
1227
443
    return -1;
1228
1.55k
}
1229
1230
/* count the length of src while copying it to dest; return strlen(src) */
1231
static inline int32_t
1232
1.11k
_copyCount(char *dest, int32_t destCapacity, const char *src) {
1233
1.11k
    const char *anchor;
1234
1.11k
    char c;
1235
1236
1.11k
    anchor=src;
1237
2.22k
    for(;;) {
1238
2.22k
        if((c=*src)==0) {
1239
371
            return (int32_t)(src-anchor);
1240
371
        }
1241
1.85k
        if(destCapacity<=0) {
1242
742
            return (int32_t)((src-anchor)+uprv_strlen(src));
1243
742
        }
1244
1.11k
        ++src;
1245
1.11k
        *dest++=c;
1246
1.11k
        --destCapacity;
1247
1.11k
    }
1248
1.11k
}
1249
1250
U_CFUNC const char*
1251
0
uloc_getCurrentCountryID(const char* oldID){
1252
0
    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1253
0
    if (offset >= 0) {
1254
0
        return REPLACEMENT_COUNTRIES[offset];
1255
0
    }
1256
0
    return oldID;
1257
0
}
1258
U_CFUNC const char*
1259
0
uloc_getCurrentLanguageID(const char* oldID){
1260
0
    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1261
0
    if (offset >= 0) {
1262
0
        return REPLACEMENT_LANGUAGES[offset];
1263
0
    }
1264
0
    return oldID;
1265
0
}
1266
/*
1267
 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1268
 * avoid duplicating code to handle the earlier locale ID pieces
1269
 * in the functions for the later ones by
1270
 * setting the *pEnd pointer to where they stopped parsing
1271
 *
1272
 * TODO try to use this in Locale
1273
 */
1274
U_CFUNC int32_t
1275
ulocimp_getLanguage(const char *localeID,
1276
                    char *language, int32_t languageCapacity,
1277
46.3k
                    const char **pEnd) {
1278
46.3k
    int32_t i=0;
1279
46.3k
    int32_t offset;
1280
46.3k
    char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1281
1282
    /* if it starts with i- or x- then copy that prefix */
1283
46.3k
    if(_isIDPrefix(localeID)) {
1284
0
        if(i<languageCapacity) {
1285
0
            language[i]=(char)uprv_tolower(*localeID);
1286
0
        }
1287
0
        if(i<languageCapacity) {
1288
0
            language[i+1]='-';
1289
0
        }
1290
0
        i+=2;
1291
0
        localeID+=2;
1292
0
    }
1293
1294
    /* copy the language as far as possible and count its length */
1295
165k
    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1296
118k
        if(i<languageCapacity) {
1297
39.6k
            language[i]=(char)uprv_tolower(*localeID);
1298
39.6k
        }
1299
118k
        if(i<3) {
1300
106k
            U_ASSERT(i>=0);
1301
106k
            lang[i]=(char)uprv_tolower(*localeID);
1302
106k
        }
1303
118k
        i++;
1304
118k
        localeID++;
1305
118k
    }
1306
1307
46.3k
    if(i==3) {
1308
        /* convert 3 character code to 2 character code if possible *CWB*/
1309
1.39k
        offset=_findIndex(LANGUAGES_3, lang);
1310
1.39k
        if(offset>=0) {
1311
1.11k
            i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1312
1.11k
        }
1313
1.39k
    }
1314
1315
46.3k
    if(pEnd!=NULL) {
1316
30.9k
        *pEnd=localeID;
1317
30.9k
    }
1318
46.3k
    return i;
1319
46.3k
}
1320
1321
U_CFUNC int32_t
1322
ulocimp_getScript(const char *localeID,
1323
                  char *script, int32_t scriptCapacity,
1324
                  const char **pEnd)
1325
20.9k
{
1326
20.9k
    int32_t idLen = 0;
1327
1328
20.9k
    if (pEnd != NULL) {
1329
10.4k
        *pEnd = localeID;
1330
10.4k
    }
1331
1332
    /* copy the second item as far as possible and count its length */
1333
63.9k
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1334
43.3k
            && uprv_isASCIILetter(localeID[idLen])) {
1335
43.0k
        idLen++;
1336
43.0k
    }
1337
1338
    /* If it's exactly 4 characters long, then it's a script and not a country. */
1339
20.9k
    if (idLen == 4) {
1340
906
        int32_t i;
1341
906
        if (pEnd != NULL) {
1342
453
            *pEnd = localeID+idLen;
1343
453
        }
1344
906
        if(idLen > scriptCapacity) {
1345
453
            idLen = scriptCapacity;
1346
453
        }
1347
906
        if (idLen >= 1) {
1348
453
            script[0]=(char)uprv_toupper(*(localeID++));
1349
453
        }
1350
2.26k
        for (i = 1; i < idLen; i++) {
1351
1.35k
            script[i]=(char)uprv_tolower(*(localeID++));
1352
1.35k
        }
1353
906
    }
1354
20.0k
    else {
1355
20.0k
        idLen = 0;
1356
20.0k
    }
1357
20.9k
    return idLen;
1358
20.9k
}
1359
1360
U_CFUNC int32_t
1361
ulocimp_getCountry(const char *localeID,
1362
                   char *country, int32_t countryCapacity,
1363
                   const char **pEnd)
1364
10.3k
{
1365
10.3k
    int32_t idLen=0;
1366
10.3k
    char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1367
10.3k
    int32_t offset;
1368
1369
    /* copy the country as far as possible and count its length */
1370
31.3k
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1371
20.9k
        if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1372
20.9k
            cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1373
20.9k
        }
1374
20.9k
        idLen++;
1375
20.9k
    }
1376
1377
    /* the country should be either length 2 or 3 */
1378
10.3k
    if (idLen == 2 || idLen == 3) {
1379
10.3k
        UBool gotCountry = FALSE;
1380
        /* convert 3 character code to 2 character code if possible *CWB*/
1381
10.3k
        if(idLen==3) {
1382
161
            offset=_findIndex(COUNTRIES_3, cnty);
1383
161
            if(offset>=0) {
1384
0
                idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1385
0
                gotCountry = TRUE;
1386
0
            }
1387
161
        }
1388
10.3k
        if (!gotCountry) {
1389
10.3k
            int32_t i = 0;
1390
31.3k
            for (i = 0; i < idLen; i++) {
1391
20.9k
                if (i < countryCapacity) {
1392
20.9k
                    country[i]=(char)uprv_toupper(localeID[i]);
1393
20.9k
                }
1394
20.9k
            }
1395
10.3k
        }
1396
10.3k
        localeID+=idLen;
1397
10.3k
    } else {
1398
0
        idLen = 0;
1399
0
    }
1400
1401
10.3k
    if(pEnd!=NULL) {
1402
0
        *pEnd=localeID;
1403
0
    }
1404
1405
10.3k
    return idLen;
1406
10.3k
}
1407
1408
/**
1409
 * @param needSeparator if true, then add leading '_' if any variants
1410
 * are added to 'variant'
1411
 */
1412
static int32_t
1413
_getVariantEx(const char *localeID,
1414
              char prev,
1415
              char *variant, int32_t variantCapacity,
1416
0
              UBool needSeparator) {
1417
0
    int32_t i=0;
1418
1419
    /* get one or more variant tags and separate them with '_' */
1420
0
    if(_isIDSeparator(prev)) {
1421
        /* get a variant string after a '-' or '_' */
1422
0
        while(!_isTerminator(*localeID)) {
1423
0
            if (needSeparator) {
1424
0
                if (i<variantCapacity) {
1425
0
                    variant[i] = '_';
1426
0
                }
1427
0
                ++i;
1428
0
                needSeparator = FALSE;
1429
0
            }
1430
0
            if(i<variantCapacity) {
1431
0
                variant[i]=(char)uprv_toupper(*localeID);
1432
0
                if(variant[i]=='-') {
1433
0
                    variant[i]='_';
1434
0
                }
1435
0
            }
1436
0
            i++;
1437
0
            localeID++;
1438
0
        }
1439
0
    }
1440
1441
    /* if there is no variant tag after a '-' or '_' then look for '@' */
1442
0
    if(i==0) {
1443
0
        if(prev=='@') {
1444
            /* keep localeID */
1445
0
        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1446
0
            ++localeID; /* point after the '@' */
1447
0
        } else {
1448
0
            return 0;
1449
0
        }
1450
0
        while(!_isTerminator(*localeID)) {
1451
0
            if (needSeparator) {
1452
0
                if (i<variantCapacity) {
1453
0
                    variant[i] = '_';
1454
0
                }
1455
0
                ++i;
1456
0
                needSeparator = FALSE;
1457
0
            }
1458
0
            if(i<variantCapacity) {
1459
0
                variant[i]=(char)uprv_toupper(*localeID);
1460
0
                if(variant[i]=='-' || variant[i]==',') {
1461
0
                    variant[i]='_';
1462
0
                }
1463
0
            }
1464
0
            i++;
1465
0
            localeID++;
1466
0
        }
1467
0
    }
1468
1469
0
    return i;
1470
0
}
1471
1472
static int32_t
1473
_getVariant(const char *localeID,
1474
            char prev,
1475
0
            char *variant, int32_t variantCapacity) {
1476
0
    return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1477
0
}
1478
1479
/**
1480
 * Delete ALL instances of a variant from the given list of one or
1481
 * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1482
 * @param variants the source string of one or more variants,
1483
 * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1484
 * terminated; if it is, trailing zero will NOT be maintained.
1485
 * @param variantsLen length of variants
1486
 * @param toDelete variant to delete, without separators, e.g.  "EURO"
1487
 * or "PREEURO"; not zero terminated
1488
 * @param toDeleteLen length of toDelete
1489
 * @return number of characters deleted from variants
1490
 */
1491
static int32_t
1492
_deleteVariant(char* variants, int32_t variantsLen,
1493
               const char* toDelete, int32_t toDeleteLen)
1494
0
{
1495
0
    int32_t delta = 0; /* number of chars deleted */
1496
0
    for (;;) {
1497
0
        UBool flag = FALSE;
1498
0
        if (variantsLen < toDeleteLen) {
1499
0
            return delta;
1500
0
        }
1501
0
        if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1502
0
            (variantsLen == toDeleteLen ||
1503
0
             (flag=(variants[toDeleteLen] == '_'))))
1504
0
        {
1505
0
            int32_t d = toDeleteLen + (flag?1:0);
1506
0
            variantsLen -= d;
1507
0
            delta += d;
1508
0
            if (variantsLen > 0) {
1509
0
                uprv_memmove(variants, variants+d, variantsLen);
1510
0
            }
1511
0
        } else {
1512
0
            char* p = _strnchr(variants, variantsLen, '_');
1513
0
            if (p == NULL) {
1514
0
                return delta;
1515
0
            }
1516
0
            ++p;
1517
0
            variantsLen -= (int32_t)(p - variants);
1518
0
            variants = p;
1519
0
        }
1520
0
    }
1521
0
}
1522
1523
/* Keyword enumeration */
1524
1525
typedef struct UKeywordsContext {
1526
    char* keywords;
1527
    char* current;
1528
} UKeywordsContext;
1529
1530
U_CDECL_BEGIN
1531
1532
static void U_CALLCONV
1533
0
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1534
0
    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1535
0
    uprv_free(enumerator->context);
1536
0
    uprv_free(enumerator);
1537
0
}
1538
1539
static int32_t U_CALLCONV
1540
0
uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1541
0
    char *kw = ((UKeywordsContext *)en->context)->keywords;
1542
0
    int32_t result = 0;
1543
0
    while(*kw) {
1544
0
        result++;
1545
0
        kw += uprv_strlen(kw)+1;
1546
0
    }
1547
0
    return result;
1548
0
}
1549
1550
static const char * U_CALLCONV
1551
uloc_kw_nextKeyword(UEnumeration* en,
1552
                    int32_t* resultLength,
1553
0
                    UErrorCode* /*status*/) {
1554
0
    const char* result = ((UKeywordsContext *)en->context)->current;
1555
0
    int32_t len = 0;
1556
0
    if(*result) {
1557
0
        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1558
0
        ((UKeywordsContext *)en->context)->current += len+1;
1559
0
    } else {
1560
0
        result = NULL;
1561
0
    }
1562
0
    if (resultLength) {
1563
0
        *resultLength = len;
1564
0
    }
1565
0
    return result;
1566
0
}
1567
1568
static void U_CALLCONV
1569
uloc_kw_resetKeywords(UEnumeration* en,
1570
0
                      UErrorCode* /*status*/) {
1571
0
    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1572
0
}
1573
1574
U_CDECL_END
1575
1576
1577
static const UEnumeration gKeywordsEnum = {
1578
    NULL,
1579
    NULL,
1580
    uloc_kw_closeKeywords,
1581
    uloc_kw_countKeywords,
1582
    uenum_unextDefault,
1583
    uloc_kw_nextKeyword,
1584
    uloc_kw_resetKeywords
1585
};
1586
1587
U_CAPI UEnumeration* U_EXPORT2
1588
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1589
0
{
1590
0
    UKeywordsContext *myContext = NULL;
1591
0
    UEnumeration *result = NULL;
1592
1593
0
    if(U_FAILURE(*status)) {
1594
0
        return NULL;
1595
0
    }
1596
0
    result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1597
    /* Null pointer test */
1598
0
    if (result == NULL) {
1599
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1600
0
        return NULL;
1601
0
    }
1602
0
    uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1603
0
    myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1604
0
    if (myContext == NULL) {
1605
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1606
0
        uprv_free(result);
1607
0
        return NULL;
1608
0
    }
1609
0
    myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1610
0
    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1611
0
    myContext->keywords[keywordListSize] = 0;
1612
0
    myContext->current = myContext->keywords;
1613
0
    result->context = myContext;
1614
0
    return result;
1615
0
}
1616
1617
U_CAPI UEnumeration* U_EXPORT2
1618
uloc_openKeywords(const char* localeID,
1619
                        UErrorCode* status)
1620
0
{
1621
0
    int32_t i=0;
1622
0
    char keywords[256];
1623
0
    int32_t keywordsCapacity = 256;
1624
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1625
0
    const char* tmpLocaleID;
1626
1627
0
    if(status==NULL || U_FAILURE(*status)) {
1628
0
        return 0;
1629
0
    }
1630
1631
0
    if (_hasBCP47Extension(localeID)) {
1632
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1633
0
    } else {
1634
0
        if (localeID==NULL) {
1635
0
           localeID=uloc_getDefault();
1636
0
        }
1637
0
        tmpLocaleID=localeID;
1638
0
    }
1639
1640
    /* Skip the language */
1641
0
    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1642
0
    if(_isIDSeparator(*tmpLocaleID)) {
1643
0
        const char *scriptID;
1644
        /* Skip the script if available */
1645
0
        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1646
0
        if(scriptID != tmpLocaleID+1) {
1647
            /* Found optional script */
1648
0
            tmpLocaleID = scriptID;
1649
0
        }
1650
        /* Skip the Country */
1651
0
        if (_isIDSeparator(*tmpLocaleID)) {
1652
0
            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1653
0
            if(_isIDSeparator(*tmpLocaleID)) {
1654
0
                _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1655
0
            }
1656
0
        }
1657
0
    }
1658
1659
    /* keywords are located after '@' */
1660
0
    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1661
0
        i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1662
0
    }
1663
1664
0
    if(i) {
1665
0
        return uloc_openKeywordList(keywords, i, status);
1666
0
    } else {
1667
0
        return NULL;
1668
0
    }
1669
0
}
1670
1671
1672
/* bit-flags for 'options' parameter of _canonicalize */
1673
0
#define _ULOC_STRIP_KEYWORDS 0x2
1674
0
#define _ULOC_CANONICALIZE   0x1
1675
1676
0
#define OPTION_SET(options, mask) ((options & mask) != 0)
1677
1678
static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1679
0
#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1680
1681
/**
1682
 * Canonicalize the given localeID, to level 1 or to level 2,
1683
 * depending on the options.  To specify level 1, pass in options=0.
1684
 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1685
 *
1686
 * This is the code underlying uloc_getName and uloc_canonicalize.
1687
 */
1688
static int32_t
1689
_canonicalize(const char* localeID,
1690
              char* result,
1691
              int32_t resultCapacity,
1692
              uint32_t options,
1693
0
              UErrorCode* err) {
1694
0
    int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1695
0
    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1696
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1697
0
    const char* origLocaleID;
1698
0
    const char* tmpLocaleID;
1699
0
    const char* keywordAssign = NULL;
1700
0
    const char* separatorIndicator = NULL;
1701
0
    const char* addKeyword = NULL;
1702
0
    const char* addValue = NULL;
1703
0
    char* name;
1704
0
    char* variant = NULL; /* pointer into name, or NULL */
1705
1706
0
    if (U_FAILURE(*err)) {
1707
0
        return 0;
1708
0
    }
1709
1710
0
    if (_hasBCP47Extension(localeID)) {
1711
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1712
0
    } else {
1713
0
        if (localeID==NULL) {
1714
0
           localeID=uloc_getDefault();
1715
0
        }
1716
0
        tmpLocaleID=localeID;
1717
0
    }
1718
1719
0
    origLocaleID=tmpLocaleID;
1720
1721
    /* if we are doing a full canonicalization, then put results in
1722
       localeBuffer, if necessary; otherwise send them to result. */
1723
0
    if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1724
0
        (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1725
0
        name = localeBuffer;
1726
0
        nameCapacity = (int32_t)sizeof(localeBuffer);
1727
0
    } else {
1728
0
        name = result;
1729
0
        nameCapacity = resultCapacity;
1730
0
    }
1731
1732
    /* get all pieces, one after another, and separate with '_' */
1733
0
    len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1734
1735
0
    if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1736
0
        const char *d = uloc_getDefault();
1737
1738
0
        len = (int32_t)uprv_strlen(d);
1739
1740
0
        if (name != NULL) {
1741
0
            uprv_strncpy(name, d, len);
1742
0
        }
1743
0
    } else if(_isIDSeparator(*tmpLocaleID)) {
1744
0
        const char *scriptID;
1745
1746
0
        ++fieldCount;
1747
0
        if(len<nameCapacity) {
1748
0
            name[len]='_';
1749
0
        }
1750
0
        ++len;
1751
1752
0
        scriptSize=ulocimp_getScript(tmpLocaleID+1,
1753
0
            (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1754
0
        if(scriptSize > 0) {
1755
            /* Found optional script */
1756
0
            tmpLocaleID = scriptID;
1757
0
            ++fieldCount;
1758
0
            len+=scriptSize;
1759
0
            if (_isIDSeparator(*tmpLocaleID)) {
1760
                /* If there is something else, then we add the _ */
1761
0
                if(len<nameCapacity) {
1762
0
                    name[len]='_';
1763
0
                }
1764
0
                ++len;
1765
0
            }
1766
0
        }
1767
1768
0
        if (_isIDSeparator(*tmpLocaleID)) {
1769
0
            const char *cntryID;
1770
0
            int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1771
0
                (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1772
0
            if (cntrySize > 0) {
1773
                /* Found optional country */
1774
0
                tmpLocaleID = cntryID;
1775
0
                len+=cntrySize;
1776
0
            }
1777
0
            if(_isIDSeparator(*tmpLocaleID)) {
1778
                /* If there is something else, then we add the _  if we found country before. */
1779
0
                if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1780
0
                    ++fieldCount;
1781
0
                    if(len<nameCapacity) {
1782
0
                        name[len]='_';
1783
0
                    }
1784
0
                    ++len;
1785
0
                }
1786
1787
0
                variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1788
0
                    (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1789
0
                if (variantSize > 0) {
1790
0
                    variant = len<nameCapacity ? name+len : NULL;
1791
0
                    len += variantSize;
1792
0
                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1793
0
                }
1794
0
            }
1795
0
        }
1796
0
    }
1797
1798
    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1799
0
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1800
0
        UBool done = FALSE;
1801
0
        do {
1802
0
            char c = *tmpLocaleID;
1803
0
            switch (c) {
1804
0
            case 0:
1805
0
            case '@':
1806
0
                done = TRUE;
1807
0
                break;
1808
0
            default:
1809
0
                if (len<nameCapacity) {
1810
0
                    name[len] = c;
1811
0
                }
1812
0
                ++len;
1813
0
                ++tmpLocaleID;
1814
0
                break;
1815
0
            }
1816
0
        } while (!done);
1817
0
    }
1818
1819
    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1820
       After this, tmpLocaleID either points to '@' or is NULL */
1821
0
    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1822
0
        keywordAssign = uprv_strchr(tmpLocaleID, '=');
1823
0
        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1824
0
    }
1825
1826
    /* Copy POSIX-style variant, if any [mr@FOO] */
1827
0
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1828
0
        tmpLocaleID != NULL && keywordAssign == NULL) {
1829
0
        for (;;) {
1830
0
            char c = *tmpLocaleID;
1831
0
            if (c == 0) {
1832
0
                break;
1833
0
            }
1834
0
            if (len<nameCapacity) {
1835
0
                name[len] = c;
1836
0
            }
1837
0
            ++len;
1838
0
            ++tmpLocaleID;
1839
0
        }
1840
0
    }
1841
1842
0
    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1843
        /* Handle @FOO variant if @ is present and not followed by = */
1844
0
        if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1845
0
            int32_t posixVariantSize;
1846
            /* Add missing '_' if needed */
1847
0
            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1848
0
                do {
1849
0
                    if(len<nameCapacity) {
1850
0
                        name[len]='_';
1851
0
                    }
1852
0
                    ++len;
1853
0
                    ++fieldCount;
1854
0
                } while(fieldCount<2);
1855
0
            }
1856
0
            posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1857
0
                                             (UBool)(variantSize > 0));
1858
0
            if (posixVariantSize > 0) {
1859
0
                if (variant == NULL) {
1860
0
                    variant = name+len;
1861
0
                }
1862
0
                len += posixVariantSize;
1863
0
                variantSize += posixVariantSize;
1864
0
            }
1865
0
        }
1866
1867
        /* Handle generic variants first */
1868
0
        if (variant) {
1869
0
            for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) {
1870
0
                const char* variantToCompare = VARIANT_MAP[j].variant;
1871
0
                int32_t n = (int32_t)uprv_strlen(variantToCompare);
1872
0
                int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1873
0
                len -= variantLen;
1874
0
                if (variantLen > 0) {
1875
0
                    if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1876
0
                        --len;
1877
0
                    }
1878
0
                    addKeyword = VARIANT_MAP[j].keyword;
1879
0
                    addValue = VARIANT_MAP[j].value;
1880
0
                    break;
1881
0
                }
1882
0
            }
1883
0
            if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1884
0
                --len;
1885
0
            }
1886
0
        }
1887
1888
        /* Look up the ID in the canonicalization map */
1889
0
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1890
0
            const char* id = CANONICALIZE_MAP[j].id;
1891
0
            int32_t n = (int32_t)uprv_strlen(id);
1892
0
            if (len == n && uprv_strncmp(name, id, n) == 0) {
1893
0
                if (n == 0 && tmpLocaleID != NULL) {
1894
0
                    break; /* Don't remap "" if keywords present */
1895
0
                }
1896
0
                len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1897
0
                if (CANONICALIZE_MAP[j].keyword) {
1898
0
                    addKeyword = CANONICALIZE_MAP[j].keyword;
1899
0
                    addValue = CANONICALIZE_MAP[j].value;
1900
0
                }
1901
0
                break;
1902
0
            }
1903
0
        }
1904
0
    }
1905
1906
0
    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1907
0
        if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1908
0
            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1909
0
            if(len<nameCapacity) {
1910
0
                name[len]='@';
1911
0
            }
1912
0
            ++len;
1913
0
            ++fieldCount;
1914
0
            len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1915
0
                                NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1916
0
        } else if (addKeyword != NULL) {
1917
0
            U_ASSERT(addValue != NULL && len < nameCapacity);
1918
            /* inelegant but works -- later make _getKeywords do this? */
1919
0
            len += _copyCount(name+len, nameCapacity-len, "@");
1920
0
            len += _copyCount(name+len, nameCapacity-len, addKeyword);
1921
0
            len += _copyCount(name+len, nameCapacity-len, "=");
1922
0
            len += _copyCount(name+len, nameCapacity-len, addValue);
1923
0
        }
1924
0
    }
1925
1926
0
    if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1927
0
        uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1928
0
    }
1929
1930
0
    return u_terminateChars(result, resultCapacity, len, err);
1931
0
}
1932
1933
/* ### ID parsing API **************************************************/
1934
1935
U_CAPI int32_t  U_EXPORT2
1936
uloc_getParent(const char*    localeID,
1937
               char* parent,
1938
               int32_t parentCapacity,
1939
               UErrorCode* err)
1940
0
{
1941
0
    const char *lastUnderscore;
1942
0
    int32_t i;
1943
1944
0
    if (U_FAILURE(*err))
1945
0
        return 0;
1946
1947
0
    if (localeID == NULL)
1948
0
        localeID = uloc_getDefault();
1949
1950
0
    lastUnderscore=uprv_strrchr(localeID, '_');
1951
0
    if(lastUnderscore!=NULL) {
1952
0
        i=(int32_t)(lastUnderscore-localeID);
1953
0
    } else {
1954
0
        i=0;
1955
0
    }
1956
1957
0
    if(i>0 && parent != localeID) {
1958
0
        uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1959
0
    }
1960
0
    return u_terminateChars(parent, parentCapacity, i, err);
1961
0
}
1962
1963
U_CAPI int32_t U_EXPORT2
1964
uloc_getLanguage(const char*    localeID,
1965
         char* language,
1966
         int32_t languageCapacity,
1967
         UErrorCode* err)
1968
15.4k
{
1969
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1970
15.4k
    int32_t i=0;
1971
1972
15.4k
    if (err==NULL || U_FAILURE(*err)) {
1973
0
        return 0;
1974
0
    }
1975
1976
15.4k
    if(localeID==NULL) {
1977
0
        localeID=uloc_getDefault();
1978
0
    }
1979
1980
15.4k
    i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1981
15.4k
    return u_terminateChars(language, languageCapacity, i, err);
1982
15.4k
}
1983
1984
U_CAPI int32_t U_EXPORT2
1985
uloc_getScript(const char*    localeID,
1986
         char* script,
1987
         int32_t scriptCapacity,
1988
         UErrorCode* err)
1989
15.4k
{
1990
15.4k
    int32_t i=0;
1991
1992
15.4k
    if(err==NULL || U_FAILURE(*err)) {
1993
0
        return 0;
1994
0
    }
1995
1996
15.4k
    if(localeID==NULL) {
1997
0
        localeID=uloc_getDefault();
1998
0
    }
1999
2000
    /* skip the language */
2001
15.4k
    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
2002
15.4k
    if(_isIDSeparator(*localeID)) {
2003
10.4k
        i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
2004
10.4k
    }
2005
15.4k
    return u_terminateChars(script, scriptCapacity, i, err);
2006
15.4k
}
2007
2008
U_CAPI int32_t  U_EXPORT2
2009
uloc_getCountry(const char* localeID,
2010
            char* country,
2011
            int32_t countryCapacity,
2012
            UErrorCode* err)
2013
15.4k
{
2014
15.4k
    int32_t i=0;
2015
2016
15.4k
    if(err==NULL || U_FAILURE(*err)) {
2017
0
        return 0;
2018
0
    }
2019
2020
15.4k
    if(localeID==NULL) {
2021
0
        localeID=uloc_getDefault();
2022
0
    }
2023
2024
    /* Skip the language */
2025
15.4k
    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
2026
15.4k
    if(_isIDSeparator(*localeID)) {
2027
10.4k
        const char *scriptID;
2028
        /* Skip the script if available */
2029
10.4k
        ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
2030
10.4k
        if(scriptID != localeID+1) {
2031
            /* Found optional script */
2032
453
            localeID = scriptID;
2033
453
        }
2034
10.4k
        if(_isIDSeparator(*localeID)) {
2035
10.3k
            i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
2036
10.3k
        }
2037
10.4k
    }
2038
15.4k
    return u_terminateChars(country, countryCapacity, i, err);
2039
15.4k
}
2040
2041
U_CAPI int32_t  U_EXPORT2
2042
uloc_getVariant(const char* localeID,
2043
                char* variant,
2044
                int32_t variantCapacity,
2045
                UErrorCode* err)
2046
0
{
2047
0
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
2048
0
    const char* tmpLocaleID;
2049
0
    int32_t i=0;
2050
2051
0
    if(err==NULL || U_FAILURE(*err)) {
2052
0
        return 0;
2053
0
    }
2054
2055
0
    if (_hasBCP47Extension(localeID)) {
2056
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
2057
0
    } else {
2058
0
        if (localeID==NULL) {
2059
0
           localeID=uloc_getDefault();
2060
0
        }
2061
0
        tmpLocaleID=localeID;
2062
0
    }
2063
2064
    /* Skip the language */
2065
0
    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2066
0
    if(_isIDSeparator(*tmpLocaleID)) {
2067
0
        const char *scriptID;
2068
        /* Skip the script if available */
2069
0
        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2070
0
        if(scriptID != tmpLocaleID+1) {
2071
            /* Found optional script */
2072
0
            tmpLocaleID = scriptID;
2073
0
        }
2074
        /* Skip the Country */
2075
0
        if (_isIDSeparator(*tmpLocaleID)) {
2076
0
            const char *cntryID;
2077
0
            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2078
0
            if (cntryID != tmpLocaleID+1) {
2079
                /* Found optional country */
2080
0
                tmpLocaleID = cntryID;
2081
0
            }
2082
0
            if(_isIDSeparator(*tmpLocaleID)) {
2083
                /* If there was no country ID, skip a possible extra IDSeparator */
2084
0
                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2085
0
                    tmpLocaleID++;
2086
0
                }
2087
0
                i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2088
0
            }
2089
0
        }
2090
0
    }
2091
2092
    /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2093
    /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2094
/*
2095
    if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2096
        i=_getVariant(localeID+1, '@', variant, variantCapacity);
2097
    }
2098
*/
2099
0
    return u_terminateChars(variant, variantCapacity, i, err);
2100
0
}
2101
2102
U_CAPI int32_t  U_EXPORT2
2103
uloc_getName(const char* localeID,
2104
             char* name,
2105
             int32_t nameCapacity,
2106
             UErrorCode* err)
2107
0
{
2108
0
    return _canonicalize(localeID, name, nameCapacity, 0, err);
2109
0
}
2110
2111
U_CAPI int32_t  U_EXPORT2
2112
uloc_getBaseName(const char* localeID,
2113
                 char* name,
2114
                 int32_t nameCapacity,
2115
                 UErrorCode* err)
2116
0
{
2117
0
    return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2118
0
}
2119
2120
U_CAPI int32_t  U_EXPORT2
2121
uloc_canonicalize(const char* localeID,
2122
                  char* name,
2123
                  int32_t nameCapacity,
2124
                  UErrorCode* err)
2125
0
{
2126
0
    return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2127
0
}
2128
2129
U_CAPI const char*  U_EXPORT2
2130
uloc_getISO3Language(const char* localeID)
2131
0
{
2132
0
    int16_t offset;
2133
0
    char lang[ULOC_LANG_CAPACITY];
2134
0
    UErrorCode err = U_ZERO_ERROR;
2135
2136
0
    if (localeID == NULL)
2137
0
    {
2138
0
        localeID = uloc_getDefault();
2139
0
    }
2140
0
    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2141
0
    if (U_FAILURE(err))
2142
0
        return "";
2143
0
    offset = _findIndex(LANGUAGES, lang);
2144
0
    if (offset < 0)
2145
0
        return "";
2146
0
    return LANGUAGES_3[offset];
2147
0
}
2148
2149
U_CAPI const char*  U_EXPORT2
2150
uloc_getISO3Country(const char* localeID)
2151
0
{
2152
0
    int16_t offset;
2153
0
    char cntry[ULOC_LANG_CAPACITY];
2154
0
    UErrorCode err = U_ZERO_ERROR;
2155
2156
0
    if (localeID == NULL)
2157
0
    {
2158
0
        localeID = uloc_getDefault();
2159
0
    }
2160
0
    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2161
0
    if (U_FAILURE(err))
2162
0
        return "";
2163
0
    offset = _findIndex(COUNTRIES, cntry);
2164
0
    if (offset < 0)
2165
0
        return "";
2166
2167
0
    return COUNTRIES_3[offset];
2168
0
}
2169
2170
U_CAPI uint32_t  U_EXPORT2
2171
uloc_getLCID(const char* localeID)
2172
0
{
2173
0
    UErrorCode status = U_ZERO_ERROR;
2174
0
    char       langID[ULOC_FULLNAME_CAPACITY];
2175
0
    uint32_t   lcid = 0;
2176
2177
    /* Check for incomplete id. */
2178
0
    if (!localeID || uprv_strlen(localeID) < 2) {
2179
0
        return 0;
2180
0
    }
2181
2182
    // Attempt platform lookup if available
2183
0
    lcid = uprv_convertToLCIDPlatform(localeID);
2184
0
    if (lcid > 0)
2185
0
    {
2186
        // Windows found an LCID, return that
2187
0
        return lcid;
2188
0
    }
2189
2190
0
    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2191
0
    if (U_FAILURE(status)) {
2192
0
        return 0;
2193
0
    }
2194
2195
0
    if (uprv_strchr(localeID, '@')) {
2196
        // uprv_convertToLCID does not support keywords other than collation.
2197
        // Remove all keywords except collation.
2198
0
        int32_t len;
2199
0
        char collVal[ULOC_KEYWORDS_CAPACITY];
2200
0
        char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2201
2202
0
        len = uloc_getKeywordValue(localeID, "collation", collVal,
2203
0
            UPRV_LENGTHOF(collVal) - 1, &status);
2204
2205
0
        if (U_SUCCESS(status) && len > 0) {
2206
0
            collVal[len] = 0;
2207
2208
0
            len = uloc_getBaseName(localeID, tmpLocaleID,
2209
0
                UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2210
2211
0
            if (U_SUCCESS(status) && len > 0) {
2212
0
                tmpLocaleID[len] = 0;
2213
2214
0
                len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2215
0
                    UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2216
2217
0
                if (U_SUCCESS(status) && len > 0) {
2218
0
                    tmpLocaleID[len] = 0;
2219
0
                    return uprv_convertToLCID(langID, tmpLocaleID, &status);
2220
0
                }
2221
0
            }
2222
0
        }
2223
2224
        // fall through - all keywords are simply ignored
2225
0
        status = U_ZERO_ERROR;
2226
0
    }
2227
2228
0
    return uprv_convertToLCID(langID, localeID, &status);
2229
0
}
2230
2231
U_CAPI int32_t U_EXPORT2
2232
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2233
                UErrorCode *status)
2234
17.5k
{
2235
17.5k
    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2236
17.5k
}
2237
2238
/* ### Default locale **************************************************/
2239
2240
U_CAPI const char*  U_EXPORT2
2241
uloc_getDefault()
2242
0
{
2243
0
    return locale_get_default();
2244
0
}
2245
2246
U_CAPI void  U_EXPORT2
2247
uloc_setDefault(const char*   newDefaultLocale,
2248
             UErrorCode* err)
2249
0
{
2250
0
    if (U_FAILURE(*err))
2251
0
        return;
2252
    /* the error code isn't currently used for anything by this function*/
2253
2254
    /* propagate change to C++ */
2255
0
    locale_set_default(newDefaultLocale);
2256
0
}
2257
2258
/**
2259
 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2260
 * to an array of pointers to arrays of char.  All of these pointers are owned
2261
 * by ICU-- do not delete them, and do not write through them.  The array is
2262
 * terminated with a null pointer.
2263
 */
2264
U_CAPI const char* const*  U_EXPORT2
2265
uloc_getISOLanguages()
2266
0
{
2267
0
    return LANGUAGES;
2268
0
}
2269
2270
/**
2271
 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2272
 * pointer to an array of pointers to arrays of char.  All of these pointers are
2273
 * owned by ICU-- do not delete them, and do not write through them.  The array is
2274
 * terminated with a null pointer.
2275
 */
2276
U_CAPI const char* const*  U_EXPORT2
2277
uloc_getISOCountries()
2278
0
{
2279
0
    return COUNTRIES;
2280
0
}
2281
2282
2283
/* this function to be moved into cstring.c later */
2284
static char gDecimal = 0;
2285
2286
static /* U_CAPI */
2287
double
2288
/* U_EXPORT2 */
2289
0
_uloc_strtod(const char *start, char **end) {
2290
0
    char *decimal;
2291
0
    char *myEnd;
2292
0
    char buf[30];
2293
0
    double rv;
2294
0
    if (!gDecimal) {
2295
0
        char rep[5];
2296
        /* For machines that decide to change the decimal on you,
2297
        and try to be too smart with localization.
2298
        This normally should be just a '.'. */
2299
0
        sprintf(rep, "%+1.1f", 1.0);
2300
0
        gDecimal = rep[2];
2301
0
    }
2302
2303
0
    if(gDecimal == '.') {
2304
0
        return uprv_strtod(start, end); /* fall through to OS */
2305
0
    } else {
2306
0
        uprv_strncpy(buf, start, 29);
2307
0
        buf[29]=0;
2308
0
        decimal = uprv_strchr(buf, '.');
2309
0
        if(decimal) {
2310
0
            *decimal = gDecimal;
2311
0
        } else {
2312
0
            return uprv_strtod(start, end); /* no decimal point */
2313
0
        }
2314
0
        rv = uprv_strtod(buf, &myEnd);
2315
0
        if(end) {
2316
0
            *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2317
0
        }
2318
0
        return rv;
2319
0
    }
2320
0
}
2321
2322
typedef struct {
2323
    float q;
2324
    int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2325
    char locale[ULOC_FULLNAME_CAPACITY+1];
2326
} _acceptLangItem;
2327
2328
static int32_t U_CALLCONV
2329
uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2330
0
{
2331
0
    const _acceptLangItem *aa = (const _acceptLangItem*)a;
2332
0
    const _acceptLangItem *bb = (const _acceptLangItem*)b;
2333
2334
0
    int32_t rc = 0;
2335
0
    if(bb->q < aa->q) {
2336
0
        rc = -1;  /* A > B */
2337
0
    } else if(bb->q > aa->q) {
2338
0
        rc = 1;   /* A < B */
2339
0
    } else {
2340
0
        rc = 0;   /* A = B */
2341
0
    }
2342
2343
0
    if(rc==0) {
2344
0
        rc = uprv_stricmp(aa->locale, bb->locale);
2345
0
    }
2346
2347
#if defined(ULOC_DEBUG)
2348
    /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2349
    aa->locale, aa->q,
2350
    bb->locale, bb->q,
2351
    rc);*/
2352
#endif
2353
2354
0
    return rc;
2355
0
}
2356
2357
/*
2358
mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2359
*/
2360
2361
U_CAPI int32_t U_EXPORT2
2362
uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2363
                            const char *httpAcceptLanguage,
2364
                            UEnumeration* availableLocales,
2365
                            UErrorCode *status)
2366
0
{
2367
0
  MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
2368
0
    char tmp[ULOC_FULLNAME_CAPACITY +1];
2369
0
    int32_t n = 0;
2370
0
    const char *itemEnd;
2371
0
    const char *paramEnd;
2372
0
    const char *s;
2373
0
    const char *t;
2374
0
    int32_t res;
2375
0
    int32_t i;
2376
0
    int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2377
2378
0
    if(U_FAILURE(*status)) {
2379
0
        return -1;
2380
0
    }
2381
2382
0
    for(s=httpAcceptLanguage;s&&*s;) {
2383
0
        while(isspace(*s)) /* eat space at the beginning */
2384
0
            s++;
2385
0
        itemEnd=uprv_strchr(s,',');
2386
0
        paramEnd=uprv_strchr(s,';');
2387
0
        if(!itemEnd) {
2388
0
            itemEnd = httpAcceptLanguage+l; /* end of string */
2389
0
        }
2390
0
        if(paramEnd && paramEnd<itemEnd) {
2391
            /* semicolon (;) is closer than end (,) */
2392
0
            t = paramEnd+1;
2393
0
            if(*t=='q') {
2394
0
                t++;
2395
0
            }
2396
0
            while(isspace(*t)) {
2397
0
                t++;
2398
0
            }
2399
0
            if(*t=='=') {
2400
0
                t++;
2401
0
            }
2402
0
            while(isspace(*t)) {
2403
0
                t++;
2404
0
            }
2405
0
            items[n].q = (float)_uloc_strtod(t,NULL);
2406
0
        } else {
2407
            /* no semicolon - it's 1.0 */
2408
0
            items[n].q = 1.0f;
2409
0
            paramEnd = itemEnd;
2410
0
        }
2411
0
        items[n].dummy=0;
2412
        /* eat spaces prior to semi */
2413
0
        for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2414
0
            ;
2415
0
        int32_t slen = ((t+1)-s);
2416
0
        if(slen > ULOC_FULLNAME_CAPACITY) {
2417
0
          *status = U_BUFFER_OVERFLOW_ERROR;
2418
0
          return -1; // too big
2419
0
        }
2420
0
        uprv_strncpy(items[n].locale, s, slen);
2421
0
        items[n].locale[slen]=0; // terminate
2422
0
        int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
2423
0
        if(U_FAILURE(*status)) return -1;
2424
0
        if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
2425
            // canonicalization had an effect- copy back
2426
0
            uprv_strncpy(items[n].locale, tmp, clen);
2427
0
            items[n].locale[clen] = 0; // terminate
2428
0
        }
2429
#if defined(ULOC_DEBUG)
2430
        /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2431
#endif
2432
0
        n++;
2433
0
        s = itemEnd;
2434
0
        while(*s==',') { /* eat duplicate commas */
2435
0
            s++;
2436
0
        }
2437
0
        if(n>=items.getCapacity()) { // If we need more items
2438
0
          if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
2439
0
              *status = U_MEMORY_ALLOCATION_ERROR;
2440
0
              return -1;
2441
0
          }
2442
#if defined(ULOC_DEBUG)
2443
          fprintf(stderr,"malloced at size %d\n", items.getCapacity());
2444
#endif
2445
0
        }
2446
0
    }
2447
0
    uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2448
0
    if (U_FAILURE(*status)) {
2449
0
        return -1;
2450
0
    }
2451
0
    LocalMemory<const char*> strs(NULL);
2452
0
    if (strs.allocateInsteadAndReset(n) == NULL) {
2453
0
        *status = U_MEMORY_ALLOCATION_ERROR;
2454
0
        return -1;
2455
0
    }
2456
0
    for(i=0;i<n;i++) {
2457
#if defined(ULOC_DEBUG)
2458
        /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2459
#endif
2460
0
        strs[i]=items[i].locale;
2461
0
    }
2462
0
    res =  uloc_acceptLanguage(result, resultAvailable, outResult,
2463
0
                               strs.getAlias(), n, availableLocales, status);
2464
0
    return res;
2465
0
}
2466
2467
2468
U_CAPI int32_t U_EXPORT2
2469
uloc_acceptLanguage(char *result, int32_t resultAvailable,
2470
                    UAcceptResult *outResult, const char **acceptList,
2471
                    int32_t acceptListCount,
2472
                    UEnumeration* availableLocales,
2473
                    UErrorCode *status)
2474
0
{
2475
0
    int32_t i,j;
2476
0
    int32_t len;
2477
0
    int32_t maxLen=0;
2478
0
    char tmp[ULOC_FULLNAME_CAPACITY+1];
2479
0
    const char *l;
2480
0
    char **fallbackList;
2481
0
    if(U_FAILURE(*status)) {
2482
0
        return -1;
2483
0
    }
2484
0
    fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2485
0
    if(fallbackList==NULL) {
2486
0
        *status = U_MEMORY_ALLOCATION_ERROR;
2487
0
        return -1;
2488
0
    }
2489
0
    for(i=0;i<acceptListCount;i++) {
2490
#if defined(ULOC_DEBUG)
2491
        fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2492
#endif
2493
0
        while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2494
#if defined(ULOC_DEBUG)
2495
            fprintf(stderr,"  %s\n", l);
2496
#endif
2497
0
            len = (int32_t)uprv_strlen(l);
2498
0
            if(!uprv_strcmp(acceptList[i], l)) {
2499
0
                if(outResult) {
2500
0
                    *outResult = ULOC_ACCEPT_VALID;
2501
0
                }
2502
#if defined(ULOC_DEBUG)
2503
                fprintf(stderr, "MATCH! %s\n", l);
2504
#endif
2505
0
                if(len>0) {
2506
0
                    uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2507
0
                }
2508
0
                for(j=0;j<i;j++) {
2509
0
                    uprv_free(fallbackList[j]);
2510
0
                }
2511
0
                uprv_free(fallbackList);
2512
0
                return u_terminateChars(result, resultAvailable, len, status);
2513
0
            }
2514
0
            if(len>maxLen) {
2515
0
                maxLen = len;
2516
0
            }
2517
0
        }
2518
0
        uenum_reset(availableLocales, status);
2519
        /* save off parent info */
2520
0
        if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2521
0
            fallbackList[i] = uprv_strdup(tmp);
2522
0
        } else {
2523
0
            fallbackList[i]=0;
2524
0
        }
2525
0
    }
2526
2527
0
    for(maxLen--;maxLen>0;maxLen--) {
2528
0
        for(i=0;i<acceptListCount;i++) {
2529
0
            if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2530
#if defined(ULOC_DEBUG)
2531
                fprintf(stderr,"Try: [%s]", fallbackList[i]);
2532
#endif
2533
0
                while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2534
#if defined(ULOC_DEBUG)
2535
                    fprintf(stderr,"  %s\n", l);
2536
#endif
2537
0
                    len = (int32_t)uprv_strlen(l);
2538
0
                    if(!uprv_strcmp(fallbackList[i], l)) {
2539
0
                        if(outResult) {
2540
0
                            *outResult = ULOC_ACCEPT_FALLBACK;
2541
0
                        }
2542
#if defined(ULOC_DEBUG)
2543
                        fprintf(stderr, "fallback MATCH! %s\n", l);
2544
#endif
2545
0
                        if(len>0) {
2546
0
                            uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2547
0
                        }
2548
0
                        for(j=0;j<acceptListCount;j++) {
2549
0
                            uprv_free(fallbackList[j]);
2550
0
                        }
2551
0
                        uprv_free(fallbackList);
2552
0
                        return u_terminateChars(result, resultAvailable, len, status);
2553
0
                    }
2554
0
                }
2555
0
                uenum_reset(availableLocales, status);
2556
2557
0
                if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2558
0
                    uprv_free(fallbackList[i]);
2559
0
                    fallbackList[i] = uprv_strdup(tmp);
2560
0
                } else {
2561
0
                    uprv_free(fallbackList[i]);
2562
0
                    fallbackList[i]=0;
2563
0
                }
2564
0
            }
2565
0
        }
2566
0
        if(outResult) {
2567
0
            *outResult = ULOC_ACCEPT_FAILED;
2568
0
        }
2569
0
    }
2570
0
    for(i=0;i<acceptListCount;i++) {
2571
0
        uprv_free(fallbackList[i]);
2572
0
    }
2573
0
    uprv_free(fallbackList);
2574
0
    return -1;
2575
0
}
2576
2577
U_CAPI const char* U_EXPORT2
2578
uloc_toUnicodeLocaleKey(const char* keyword)
2579
0
{
2580
0
    const char* bcpKey = ulocimp_toBcpKey(keyword);
2581
0
    if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2582
        // unknown keyword, but syntax is fine..
2583
0
        return keyword;
2584
0
    }
2585
0
    return bcpKey;
2586
0
}
2587
2588
U_CAPI const char* U_EXPORT2
2589
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2590
0
{
2591
0
    const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2592
0
    if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2593
        // unknown keyword, but syntax is fine..
2594
0
        return value;
2595
0
    }
2596
0
    return bcpType;
2597
0
}
2598
2599
static UBool
2600
isWellFormedLegacyKey(const char* legacyKey)
2601
0
{
2602
0
    const char* p = legacyKey;
2603
0
    while (*p) {
2604
0
        if (!UPRV_ISALPHANUM(*p)) {
2605
0
            return FALSE;
2606
0
        }
2607
0
        p++;
2608
0
    }
2609
0
    return TRUE;
2610
0
}
2611
2612
static UBool
2613
isWellFormedLegacyType(const char* legacyType)
2614
0
{
2615
0
    const char* p = legacyType;
2616
0
    int32_t alphaNumLen = 0;
2617
0
    while (*p) {
2618
0
        if (*p == '_' || *p == '/' || *p == '-') {
2619
0
            if (alphaNumLen == 0) {
2620
0
                return FALSE;
2621
0
            }
2622
0
            alphaNumLen = 0;
2623
0
        } else if (UPRV_ISALPHANUM(*p)) {
2624
0
            alphaNumLen++;
2625
0
        } else {
2626
0
            return FALSE;
2627
0
        }
2628
0
        p++;
2629
0
    }
2630
0
    return (alphaNumLen != 0);
2631
0
}
2632
2633
U_CAPI const char* U_EXPORT2
2634
uloc_toLegacyKey(const char* keyword)
2635
0
{
2636
0
    const char* legacyKey = ulocimp_toLegacyKey(keyword);
2637
0
    if (legacyKey == NULL) {
2638
        // Checks if the specified locale key is well-formed with the legacy locale syntax.
2639
        //
2640
        // Note:
2641
        //  LDML/CLDR provides some definition of keyword syntax in
2642
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2643
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2644
        //  Keys can only consist of [0-9a-zA-Z].
2645
0
        if (isWellFormedLegacyKey(keyword)) {
2646
0
            return keyword;
2647
0
        }
2648
0
    }
2649
0
    return legacyKey;
2650
0
}
2651
2652
U_CAPI const char* U_EXPORT2
2653
uloc_toLegacyType(const char* keyword, const char* value)
2654
0
{
2655
0
    const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2656
0
    if (legacyType == NULL) {
2657
        // Checks if the specified locale type is well-formed with the legacy locale syntax.
2658
        //
2659
        // Note:
2660
        //  LDML/CLDR provides some definition of keyword syntax in
2661
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2662
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2663
        //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2664
        //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2665
0
        if (isWellFormedLegacyType(value)) {
2666
0
            return value;
2667
0
        }
2668
0
    }
2669
0
    return legacyType;
2670
0
}
2671
2672
/*eof*/