Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/icu/source/common/uloc.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
**********************************************************************
5
*   Copyright (C) 1997-2016, International Business Machines
6
*   Corporation and others.  All Rights Reserved.
7
**********************************************************************
8
*
9
* File ULOC.CPP
10
*
11
* Modification History:
12
*
13
*   Date        Name        Description
14
*   04/01/97    aliu        Creation.
15
*   08/21/98    stephen     JDK 1.2 sync
16
*   12/08/98    rtg         New Locale implementation and C API
17
*   03/15/99    damiba      overhaul.
18
*   04/06/99    stephen     changed setDefault() to realloc and copy
19
*   06/14/99    stephen     Changed calls to ures_open for new params
20
*   07/21/99    stephen     Modified setDefault() to propagate to C++
21
*   05/14/04    alan        7 years later: refactored, cleaned up, fixed bugs,
22
*                           brought canonicalization code into line with spec
23
*****************************************************************************/
24
25
/*
26
   POSIX's locale format, from putil.c: [no spaces]
27
28
     ll [ _CC ] [ . MM ] [ @ VV]
29
30
     l = lang, C = ctry, M = charmap, V = variant
31
*/
32
33
#include "unicode/utypes.h"
34
#include "unicode/ustring.h"
35
#include "unicode/uloc.h"
36
37
#include "putilimp.h"
38
#include "ustr_imp.h"
39
#include "ulocimp.h"
40
#include "umutex.h"
41
#include "cstring.h"
42
#include "cmemory.h"
43
#include "locmap.h"
44
#include "uarrsort.h"
45
#include "uenumimp.h"
46
#include "uassert.h"
47
#include "charstr.h"
48
49
#include <stdio.h> /* for sprintf */
50
51
U_NAMESPACE_USE
52
53
/* ### Declarations **************************************************/
54
55
/* Locale stuff from locid.cpp */
56
U_CFUNC void locale_set_default(const char *id);
57
U_CFUNC const char *locale_get_default(void);
58
U_CFUNC int32_t
59
locale_getKeywords(const char *localeID,
60
            char prev,
61
            char *keywords, int32_t keywordCapacity,
62
            char *values, int32_t valuesCapacity, int32_t *valLen,
63
            UBool valuesToo,
64
            UErrorCode *status);
65
66
/* ### Data tables **************************************************/
67
68
/**
69
 * Table of language codes, both 2- and 3-letter, with preference
70
 * given to 2-letter codes where possible.  Includes 3-letter codes
71
 * that lack a 2-letter equivalent.
72
 *
73
 * This list must be in sorted order.  This list is returned directly
74
 * to the user by some API.
75
 *
76
 * This list must be kept in sync with LANGUAGES_3, with corresponding
77
 * entries matched.
78
 *
79
 * This table should be terminated with a NULL entry, followed by a
80
 * second list, and another NULL entry.  The first list is visible to
81
 * user code when this array is returned by API.  The second list
82
 * contains codes we support, but do not expose through user API.
83
 *
84
 * Notes
85
 *
86
 * Tables updated per http://lcweb.loc.gov/standards/iso639-2/ to
87
 * include the revisions up to 2001/7/27 *CWB*
88
 *
89
 * The 3 character codes are the terminology codes like RFC 3066.  This
90
 * is compatible with prior ICU codes
91
 *
92
 * "in" "iw" "ji" "jw" & "sh" have been withdrawn but are still in the
93
 * table but now at the end of the table because 3 character codes are
94
 * duplicates.  This avoids bad searches going from 3 to 2 character
95
 * codes.
96
 *
97
 * The range qaa-qtz is reserved for local use
98
 */
99
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
100
/* ISO639 table version is 20150505 */
101
/* Subsequent hand addition of selected languages */
102
static const char * const LANGUAGES[] = {
103
    "aa",  "ab",  "ace", "ach", "ada", "ady", "ae",  "aeb",
104
    "af",  "afh", "agq", "ain", "ak",  "akk", "akz", "ale",
105
    "aln", "alt", "am",  "an",  "ang", "anp", "ar",  "arc",
106
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "as",
107
    "asa", "ase", "ast", "av",  "avk", "awa", "ay",  "az",
108
    "ba",  "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
109
    "be",  "bej", "bem", "bew", "bez", "bfd", "bfq", "bg",
110
    "bgn", "bho", "bi",  "bik", "bin", "bjn", "bkm", "bla",
111
    "bm",  "bn",  "bo",  "bpy", "bqi", "br",  "bra", "brh",
112
    "brx", "bs",  "bss", "bua", "bug", "bum", "byn", "byv",
113
    "ca",  "cad", "car", "cay", "cch", "ccp", "ce",  "ceb", "cgg",
114
    "ch",  "chb", "chg", "chk", "chm", "chn", "cho", "chp",
115
    "chr", "chy", "ckb", "co",  "cop", "cps", "cr",  "crh",
116
    "cs",  "csb", "cu",  "cv",  "cy",
117
    "da",  "dak", "dar", "dav", "de",  "del", "den", "dgr",
118
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "dv",
119
    "dyo", "dyu", "dz",  "dzg",
120
    "ebu", "ee",  "efi", "egl", "egy", "eka", "el",  "elx",
121
    "en",  "enm", "eo",  "es",  "esu", "et",  "eu",  "ewo",
122
    "ext",
123
    "fa",  "fan", "fat", "ff",  "fi",  "fil", "fit", "fj",
124
    "fo",  "fon", "fr",  "frc", "frm", "fro", "frp", "frr",
125
    "frs", "fur", "fy",
126
    "ga",  "gaa", "gag", "gan", "gay", "gba", "gbz", "gd",
127
    "gez", "gil", "gl",  "glk", "gmh", "gn",  "goh", "gom",
128
    "gon", "gor", "got", "grb", "grc", "gsw", "gu",  "guc",
129
    "gur", "guz", "gv",  "gwi",
130
    "ha",  "hai", "hak", "haw", "he",  "hi",  "hif", "hil",
131
    "hit", "hmn", "ho",  "hr",  "hsb", "hsn", "ht",  "hu",
132
    "hup", "hy",  "hz",
133
    "ia",  "iba", "ibb", "id",  "ie",  "ig",  "ii",  "ik",
134
    "ilo", "inh", "io",  "is",  "it",  "iu",  "izh",
135
    "ja",  "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
136
    "jv",
137
    "ka",  "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
138
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kg",  "kgp",
139
    "kha", "kho", "khq", "khw", "ki",  "kiu", "kj",  "kk",
140
    "kkj", "kl",  "kln", "km",  "kmb", "kn",  "ko",  "koi",
141
    "kok", "kos", "kpe", "kr",  "krc", "kri", "krj", "krl",
142
    "kru", "ks",  "ksb", "ksf", "ksh", "ku",  "kum", "kut",
143
    "kv",  "kw",  "ky",
144
    "la",  "lad", "lag", "lah", "lam", "lb",  "lez", "lfn",
145
    "lg",  "li",  "lij", "liv", "lkt", "lmo", "ln",  "lo",
146
    "lol", "loz", "lrc", "lt",  "ltg", "lu",  "lua", "lui",
147
    "lun", "luo", "lus", "luy", "lv",  "lzh", "lzz",
148
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
149
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mg",  "mga",
150
    "mgh", "mgo", "mh",  "mi",  "mic", "min", "mis", "mk",
151
    "ml",  "mn",  "mnc", "mni", "moh", "mos", "mr",  "mrj",
152
    "ms",  "mt",  "mua", "mul", "mus", "mwl", "mwr", "mwv",
153
    "my",  "mye", "myv", "mzn",
154
    "na",  "nan", "nap", "naq", "nb",  "nd",  "nds", "ne",
155
    "new", "ng",  "nia", "niu", "njo", "nl",  "nmg", "nn",
156
    "nnh", "no",  "nog", "non", "nov", "nqo", "nr",  "nso",
157
    "nus", "nv",  "nwc", "ny",  "nym", "nyn", "nyo", "nzi",
158
    "oc",  "oj",  "om",  "or",  "os",  "osa", "ota",
159
    "pa",  "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
160
    "pdt", "peo", "pfl", "phn", "pi",  "pl",  "pms", "pnt",
161
    "pon", "prg", "pro", "ps",  "pt",
162
    "qu",  "quc", "qug",
163
    "raj", "rap", "rar", "rgn", "rif", "rm",  "rn",  "ro",
164
    "rof", "rom", "rtm", "ru",  "rue", "rug", "rup",
165
    "rw",  "rwk",
166
    "sa",  "sad", "sah", "sam", "saq", "sas", "sat", "saz",
167
    "sba", "sbp", "sc",  "scn", "sco", "sd",  "sdc", "sdh",
168
    "se",  "see", "seh", "sei", "sel", "ses", "sg",  "sga",
169
    "sgs", "shi", "shn", "shu", "si",  "sid", "sk",
170
    "sl",  "sli", "sly", "sm",  "sma", "smj", "smn", "sms",
171
    "sn",  "snk", "so",  "sog", "sq",  "sr",  "srn", "srr",
172
    "ss",  "ssy", "st",  "stq", "su",  "suk", "sus", "sux",
173
    "sv",  "sw",  "swb", "swc", "syc", "syr", "szl",
174
    "ta",  "tcy", "te",  "tem", "teo", "ter", "tet", "tg",
175
    "th",  "ti",  "tig", "tiv", "tk",  "tkl", "tkr", "tl",
176
    "tlh", "tli", "tly", "tmh", "tn",  "to",  "tog", "tpi",
177
    "tr",  "tru", "trv", "ts",  "tsd", "tsi", "tt",  "ttt",
178
    "tum", "tvl", "tw",  "twq", "ty",  "tyv", "tzm",
179
    "udm", "ug",  "uga", "uk",  "umb", "und", "ur",  "uz",
180
    "vai", "ve",  "vec", "vep", "vi",  "vls", "vmf", "vo",
181
    "vot", "vro", "vun",
182
    "wa",  "wae", "wal", "war", "was", "wbp", "wo",  "wuu",
183
    "xal", "xh",  "xmf", "xog",
184
    "yao", "yap", "yav", "ybb", "yi",  "yo",  "yrl", "yue",
185
    "za",  "zap", "zbl", "zea", "zen", "zgh", "zh",  "zu",
186
    "zun", "zxx", "zza",
187
NULL,
188
    "in",  "iw",  "ji",  "jw",  "sh",    /* obsolete language codes */
189
NULL
190
};
191
192
static const char* const DEPRECATED_LANGUAGES[]={
193
    "in", "iw", "ji", "jw", NULL, NULL
194
};
195
static const char* const REPLACEMENT_LANGUAGES[]={
196
    "id", "he", "yi", "jv", NULL, NULL
197
};
198
199
/**
200
 * Table of 3-letter language codes.
201
 *
202
 * This is a lookup table used to convert 3-letter language codes to
203
 * their 2-letter equivalent, where possible.  It must be kept in sync
204
 * with LANGUAGES.  For all valid i, LANGUAGES[i] must refer to the
205
 * same language as LANGUAGES_3[i].  The commented-out lines are
206
 * copied from LANGUAGES to make eyeballing this baby easier.
207
 *
208
 * Where a 3-letter language code has no 2-letter equivalent, the
209
 * 3-letter code occupies both LANGUAGES[i] and LANGUAGES_3[i].
210
 *
211
 * This table should be terminated with a NULL entry, followed by a
212
 * second list, and another NULL entry.  The two lists correspond to
213
 * the two lists in LANGUAGES.
214
 */
215
/* Generated using org.unicode.cldr.icu.GenerateISO639LanguageTables */
216
/* ISO639 table version is 20150505 */
217
/* Subsequent hand addition of selected languages */
218
static const char * const LANGUAGES_3[] = {
219
    "aar", "abk", "ace", "ach", "ada", "ady", "ave", "aeb",
220
    "afr", "afh", "agq", "ain", "aka", "akk", "akz", "ale",
221
    "aln", "alt", "amh", "arg", "ang", "anp", "ara", "arc",
222
    "arn", "aro", "arp", "arq", "ars", "arw", "ary", "arz", "asm",
223
    "asa", "ase", "ast", "ava", "avk", "awa", "aym", "aze",
224
    "bak", "bal", "ban", "bar", "bas", "bax", "bbc", "bbj",
225
    "bel", "bej", "bem", "bew", "bez", "bfd", "bfq", "bul",
226
    "bgn", "bho", "bis", "bik", "bin", "bjn", "bkm", "bla",
227
    "bam", "ben", "bod", "bpy", "bqi", "bre", "bra", "brh",
228
    "brx", "bos", "bss", "bua", "bug", "bum", "byn", "byv",
229
    "cat", "cad", "car", "cay", "cch", "ccp", "che", "ceb", "cgg",
230
    "cha", "chb", "chg", "chk", "chm", "chn", "cho", "chp",
231
    "chr", "chy", "ckb", "cos", "cop", "cps", "cre", "crh",
232
    "ces", "csb", "chu", "chv", "cym",
233
    "dan", "dak", "dar", "dav", "deu", "del", "den", "dgr",
234
    "din", "dje", "doi", "dsb", "dtp", "dua", "dum", "div",
235
    "dyo", "dyu", "dzo", "dzg",
236
    "ebu", "ewe", "efi", "egl", "egy", "eka", "ell", "elx",
237
    "eng", "enm", "epo", "spa", "esu", "est", "eus", "ewo",
238
    "ext",
239
    "fas", "fan", "fat", "ful", "fin", "fil", "fit", "fij",
240
    "fao", "fon", "fra", "frc", "frm", "fro", "frp", "frr",
241
    "frs", "fur", "fry",
242
    "gle", "gaa", "gag", "gan", "gay", "gba", "gbz", "gla",
243
    "gez", "gil", "glg", "glk", "gmh", "grn", "goh", "gom",
244
    "gon", "gor", "got", "grb", "grc", "gsw", "guj", "guc",
245
    "gur", "guz", "glv", "gwi",
246
    "hau", "hai", "hak", "haw", "heb", "hin", "hif", "hil",
247
    "hit", "hmn", "hmo", "hrv", "hsb", "hsn", "hat", "hun",
248
    "hup", "hye", "her",
249
    "ina", "iba", "ibb", "ind", "ile", "ibo", "iii", "ipk",
250
    "ilo", "inh", "ido", "isl", "ita", "iku", "izh",
251
    "jpn", "jam", "jbo", "jgo", "jmc", "jpr", "jrb", "jut",
252
    "jav",
253
    "kat", "kaa", "kab", "kac", "kaj", "kam", "kaw", "kbd",
254
    "kbl", "kcg", "kde", "kea", "ken", "kfo", "kon", "kgp",
255
    "kha", "kho", "khq", "khw", "kik", "kiu", "kua", "kaz",
256
    "kkj", "kal", "kln", "khm", "kmb", "kan", "kor", "koi",
257
    "kok", "kos", "kpe", "kau", "krc", "kri", "krj", "krl",
258
    "kru", "kas", "ksb", "ksf", "ksh", "kur", "kum", "kut",
259
    "kom", "cor", "kir",
260
    "lat", "lad", "lag", "lah", "lam", "ltz", "lez", "lfn",
261
    "lug", "lim", "lij", "liv", "lkt", "lmo", "lin", "lao",
262
    "lol", "loz", "lrc", "lit", "ltg", "lub", "lua", "lui",
263
    "lun", "luo", "lus", "luy", "lav", "lzh", "lzz",
264
    "mad", "maf", "mag", "mai", "mak", "man", "mas", "mde",
265
    "mdf", "mdh", "mdr", "men", "mer", "mfe", "mlg", "mga",
266
    "mgh", "mgo", "mah", "mri", "mic", "min", "mis", "mkd",
267
    "mal", "mon", "mnc", "mni", "moh", "mos", "mar", "mrj",
268
    "msa", "mlt", "mua", "mul", "mus", "mwl", "mwr", "mwv",
269
    "mya", "mye", "myv", "mzn",
270
    "nau", "nan", "nap", "naq", "nob", "nde", "nds", "nep",
271
    "new", "ndo", "nia", "niu", "njo", "nld", "nmg", "nno",
272
    "nnh", "nor", "nog", "non", "nov", "nqo", "nbl", "nso",
273
    "nus", "nav", "nwc", "nya", "nym", "nyn", "nyo", "nzi",
274
    "oci", "oji", "orm", "ori", "oss", "osa", "ota",
275
    "pan", "pag", "pal", "pam", "pap", "pau", "pcd", "pdc",
276
    "pdt", "peo", "pfl", "phn", "pli", "pol", "pms", "pnt",
277
    "pon", "prg", "pro", "pus", "por",
278
    "que", "quc", "qug",
279
    "raj", "rap", "rar", "rgn", "rif", "roh", "run", "ron",
280
    "rof", "rom", "rtm", "rus", "rue", "rug", "rup",
281
    "kin", "rwk",
282
    "san", "sad", "sah", "sam", "saq", "sas", "sat", "saz",
283
    "sba", "sbp", "srd", "scn", "sco", "snd", "sdc", "sdh",
284
    "sme", "see", "seh", "sei", "sel", "ses", "sag", "sga",
285
    "sgs", "shi", "shn", "shu", "sin", "sid", "slk",
286
    "slv", "sli", "sly", "smo", "sma", "smj", "smn", "sms",
287
    "sna", "snk", "som", "sog", "sqi", "srp", "srn", "srr",
288
    "ssw", "ssy", "sot", "stq", "sun", "suk", "sus", "sux",
289
    "swe", "swa", "swb", "swc", "syc", "syr", "szl",
290
    "tam", "tcy", "tel", "tem", "teo", "ter", "tet", "tgk",
291
    "tha", "tir", "tig", "tiv", "tuk", "tkl", "tkr", "tgl",
292
    "tlh", "tli", "tly", "tmh", "tsn", "ton", "tog", "tpi",
293
    "tur", "tru", "trv", "tso", "tsd", "tsi", "tat", "ttt",
294
    "tum", "tvl", "twi", "twq", "tah", "tyv", "tzm",
295
    "udm", "uig", "uga", "ukr", "umb", "und", "urd", "uzb",
296
    "vai", "ven", "vec", "vep", "vie", "vls", "vmf", "vol",
297
    "vot", "vro", "vun",
298
    "wln", "wae", "wal", "war", "was", "wbp", "wol", "wuu",
299
    "xal", "xho", "xmf", "xog",
300
    "yao", "yap", "yav", "ybb", "yid", "yor", "yrl", "yue",
301
    "zha", "zap", "zbl", "zea", "zen", "zgh", "zho", "zul",
302
    "zun", "zxx", "zza",
303
NULL,
304
/*  "in",  "iw",  "ji",  "jw",  "sh",                          */
305
    "ind", "heb", "yid", "jaw", "srp",
306
NULL
307
};
308
309
/**
310
 * Table of 2-letter country codes.
311
 *
312
 * This list must be in sorted order.  This list is returned directly
313
 * to the user by some API.
314
 *
315
 * This list must be kept in sync with COUNTRIES_3, with corresponding
316
 * entries matched.
317
 *
318
 * This table should be terminated with a NULL entry, followed by a
319
 * second list, and another NULL entry.  The first list is visible to
320
 * user code when this array is returned by API.  The second list
321
 * contains codes we support, but do not expose through user API.
322
 *
323
 * Notes:
324
 *
325
 * ZR(ZAR) is now CD(COD) and FX(FXX) is PS(PSE) as per
326
 * http://www.evertype.com/standards/iso3166/iso3166-1-en.html added
327
 * new codes keeping the old ones for compatibility updated to include
328
 * 1999/12/03 revisions *CWB*
329
 *
330
 * RO(ROM) is now RO(ROU) according to
331
 * http://www.iso.org/iso/en/prods-services/iso3166ma/03updates-on-iso-3166/nlv3e-rou.html
332
 */
333
static const char * const COUNTRIES[] = {
334
    "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",
335
    "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",
336
    "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",
337
    "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",
338
    "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",
339
    "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",
340
    "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",
341
    "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",
342
    "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",
343
    "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",
344
    "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",
345
    "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",
346
    "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS",
347
    "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",
348
    "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",
349
    "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",
350
    "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",
351
    "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",
352
    "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",
353
    "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",
354
    "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",
355
    "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",
356
    "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",
357
    "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",
358
    "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",
359
    "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",
360
    "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",
361
    "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",
362
    "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",
363
    "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",
364
NULL,
365
    "AN",  "BU", "CS", "FX", "RO", "SU", "TP", "YD", "YU", "ZR",   /* obsolete country codes */
366
NULL
367
};
368
369
static const char* const DEPRECATED_COUNTRIES[] = {
370
    "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR", NULL, NULL /* deprecated country list */
371
};
372
static const char* const REPLACEMENT_COUNTRIES[] = {
373
/*  "AN", "BU", "CS", "DD", "DY", "FX", "HV", "NH", "RH", "SU", "TP", "UK", "VD", "YD", "YU", "ZR" */
374
    "CW", "MM", "RS", "DE", "BJ", "FR", "BF", "VU", "ZW", "RU", "TL", "GB", "VN", "YE", "RS", "CD", NULL, NULL  /* replacement country codes */
375
};
376
377
/**
378
 * Table of 3-letter country codes.
379
 *
380
 * This is a lookup table used to convert 3-letter country codes to
381
 * their 2-letter equivalent.  It must be kept in sync with COUNTRIES.
382
 * For all valid i, COUNTRIES[i] must refer to the same country as
383
 * COUNTRIES_3[i].  The commented-out lines are copied from COUNTRIES
384
 * to make eyeballing this baby easier.
385
 *
386
 * This table should be terminated with a NULL entry, followed by a
387
 * second list, and another NULL entry.  The two lists correspond to
388
 * the two lists in COUNTRIES.
389
 */
390
static const char * const COUNTRIES_3[] = {
391
/*  "AD",  "AE",  "AF",  "AG",  "AI",  "AL",  "AM",      */
392
    "AND", "ARE", "AFG", "ATG", "AIA", "ALB", "ARM",
393
/*  "AO",  "AQ",  "AR",  "AS",  "AT",  "AU",  "AW",  "AX",  "AZ",     */
394
    "AGO", "ATA", "ARG", "ASM", "AUT", "AUS", "ABW", "ALA", "AZE",
395
/*  "BA",  "BB",  "BD",  "BE",  "BF",  "BG",  "BH",  "BI",     */
396
    "BIH", "BRB", "BGD", "BEL", "BFA", "BGR", "BHR", "BDI",
397
/*  "BJ",  "BL",  "BM",  "BN",  "BO",  "BQ",  "BR",  "BS",  "BT",  "BV",     */
398
    "BEN", "BLM", "BMU", "BRN", "BOL", "BES", "BRA", "BHS", "BTN", "BVT",
399
/*  "BW",  "BY",  "BZ",  "CA",  "CC",  "CD",  "CF",  "CG",     */
400
    "BWA", "BLR", "BLZ", "CAN", "CCK", "COD", "CAF", "COG",
401
/*  "CH",  "CI",  "CK",  "CL",  "CM",  "CN",  "CO",  "CR",     */
402
    "CHE", "CIV", "COK", "CHL", "CMR", "CHN", "COL", "CRI",
403
/*  "CU",  "CV",  "CW",  "CX",  "CY",  "CZ",  "DE",  "DJ",  "DK",     */
404
    "CUB", "CPV", "CUW", "CXR", "CYP", "CZE", "DEU", "DJI", "DNK",
405
/*  "DM",  "DO",  "DZ",  "EC",  "EE",  "EG",  "EH",  "ER",     */
406
    "DMA", "DOM", "DZA", "ECU", "EST", "EGY", "ESH", "ERI",
407
/*  "ES",  "ET",  "FI",  "FJ",  "FK",  "FM",  "FO",  "FR",     */
408
    "ESP", "ETH", "FIN", "FJI", "FLK", "FSM", "FRO", "FRA",
409
/*  "GA",  "GB",  "GD",  "GE",  "GF",  "GG",  "GH",  "GI",  "GL",     */
410
    "GAB", "GBR", "GRD", "GEO", "GUF", "GGY", "GHA", "GIB", "GRL",
411
/*  "GM",  "GN",  "GP",  "GQ",  "GR",  "GS",  "GT",  "GU",     */
412
    "GMB", "GIN", "GLP", "GNQ", "GRC", "SGS", "GTM", "GUM",
413
/*  "GW",  "GY",  "HK",  "HM",  "HN",  "HR",  "HT",  "HU",     */
414
    "GNB", "GUY", "HKG", "HMD", "HND", "HRV", "HTI", "HUN",
415
/*  "ID",  "IE",  "IL",  "IM",  "IN",  "IO",  "IQ",  "IR",  "IS" */
416
    "IDN", "IRL", "ISR", "IMN", "IND", "IOT", "IRQ", "IRN", "ISL",
417
/*  "IT",  "JE",  "JM",  "JO",  "JP",  "KE",  "KG",  "KH",  "KI",     */
418
    "ITA", "JEY", "JAM", "JOR", "JPN", "KEN", "KGZ", "KHM", "KIR",
419
/*  "KM",  "KN",  "KP",  "KR",  "KW",  "KY",  "KZ",  "LA",     */
420
    "COM", "KNA", "PRK", "KOR", "KWT", "CYM", "KAZ", "LAO",
421
/*  "LB",  "LC",  "LI",  "LK",  "LR",  "LS",  "LT",  "LU",     */
422
    "LBN", "LCA", "LIE", "LKA", "LBR", "LSO", "LTU", "LUX",
423
/*  "LV",  "LY",  "MA",  "MC",  "MD",  "ME",  "MF",  "MG",  "MH",  "MK",     */
424
    "LVA", "LBY", "MAR", "MCO", "MDA", "MNE", "MAF", "MDG", "MHL", "MKD",
425
/*  "ML",  "MM",  "MN",  "MO",  "MP",  "MQ",  "MR",  "MS",     */
426
    "MLI", "MMR", "MNG", "MAC", "MNP", "MTQ", "MRT", "MSR",
427
/*  "MT",  "MU",  "MV",  "MW",  "MX",  "MY",  "MZ",  "NA",     */
428
    "MLT", "MUS", "MDV", "MWI", "MEX", "MYS", "MOZ", "NAM",
429
/*  "NC",  "NE",  "NF",  "NG",  "NI",  "NL",  "NO",  "NP",     */
430
    "NCL", "NER", "NFK", "NGA", "NIC", "NLD", "NOR", "NPL",
431
/*  "NR",  "NU",  "NZ",  "OM",  "PA",  "PE",  "PF",  "PG",     */
432
    "NRU", "NIU", "NZL", "OMN", "PAN", "PER", "PYF", "PNG",
433
/*  "PH",  "PK",  "PL",  "PM",  "PN",  "PR",  "PS",  "PT",     */
434
    "PHL", "PAK", "POL", "SPM", "PCN", "PRI", "PSE", "PRT",
435
/*  "PW",  "PY",  "QA",  "RE",  "RO",  "RS",  "RU",  "RW",  "SA",     */
436
    "PLW", "PRY", "QAT", "REU", "ROU", "SRB", "RUS", "RWA", "SAU",
437
/*  "SB",  "SC",  "SD",  "SE",  "SG",  "SH",  "SI",  "SJ",     */
438
    "SLB", "SYC", "SDN", "SWE", "SGP", "SHN", "SVN", "SJM",
439
/*  "SK",  "SL",  "SM",  "SN",  "SO",  "SR",  "SS",  "ST",  "SV",     */
440
    "SVK", "SLE", "SMR", "SEN", "SOM", "SUR", "SSD", "STP", "SLV",
441
/*  "SX",  "SY",  "SZ",  "TC",  "TD",  "TF",  "TG",  "TH",  "TJ",     */
442
    "SXM", "SYR", "SWZ", "TCA", "TCD", "ATF", "TGO", "THA", "TJK",
443
/*  "TK",  "TL",  "TM",  "TN",  "TO",  "TR",  "TT",  "TV",     */
444
    "TKL", "TLS", "TKM", "TUN", "TON", "TUR", "TTO", "TUV",
445
/*  "TW",  "TZ",  "UA",  "UG",  "UM",  "US",  "UY",  "UZ",     */
446
    "TWN", "TZA", "UKR", "UGA", "UMI", "USA", "URY", "UZB",
447
/*  "VA",  "VC",  "VE",  "VG",  "VI",  "VN",  "VU",  "WF",     */
448
    "VAT", "VCT", "VEN", "VGB", "VIR", "VNM", "VUT", "WLF",
449
/*  "WS",  "YE",  "YT",  "ZA",  "ZM",  "ZW",          */
450
    "WSM", "YEM", "MYT", "ZAF", "ZMB", "ZWE",
451
NULL,
452
/*  "AN",  "BU",  "CS",  "FX",  "RO", "SU",  "TP",  "YD",  "YU",  "ZR" */
453
    "ANT", "BUR", "SCG", "FXX", "ROM", "SUN", "TMP", "YMD", "YUG", "ZAR",
454
NULL
455
};
456
457
typedef struct CanonicalizationMap {
458
    const char *id;          /* input ID */
459
    const char *canonicalID; /* canonicalized output ID */
460
    const char *keyword;     /* keyword, or NULL if none */
461
    const char *value;       /* keyword value, or NULL if kw==NULL */
462
} CanonicalizationMap;
463
464
/**
465
 * A map to canonicalize locale IDs.  This handles a variety of
466
 * different semantic kinds of transformations.
467
 */
468
static const CanonicalizationMap CANONICALIZE_MAP[] = {
469
    { "",               "en_US_POSIX", NULL, NULL }, /* .NET name */
470
    { "c",              "en_US_POSIX", NULL, NULL }, /* POSIX name */
471
    { "posix",          "en_US_POSIX", NULL, NULL }, /* POSIX name (alias of C) */
472
    { "art_LOJBAN",     "jbo", NULL, NULL }, /* registered name */
473
    { "az_AZ_CYRL",     "az_Cyrl_AZ", NULL, NULL }, /* .NET name */
474
    { "az_AZ_LATN",     "az_Latn_AZ", NULL, NULL }, /* .NET name */
475
    { "ca_ES_PREEURO",  "ca_ES", "currency", "ESP" },
476
    { "de__PHONEBOOK",  "de", "collation", "phonebook" }, /* Old ICU name */
477
    { "de_AT_PREEURO",  "de_AT", "currency", "ATS" },
478
    { "de_DE_PREEURO",  "de_DE", "currency", "DEM" },
479
    { "de_LU_PREEURO",  "de_LU", "currency", "LUF" },
480
    { "el_GR_PREEURO",  "el_GR", "currency", "GRD" },
481
    { "en_BE_PREEURO",  "en_BE", "currency", "BEF" },
482
    { "en_IE_PREEURO",  "en_IE", "currency", "IEP" },
483
    { "es__TRADITIONAL", "es", "collation", "traditional" }, /* Old ICU name */
484
    { "es_ES_PREEURO",  "es_ES", "currency", "ESP" },
485
    { "eu_ES_PREEURO",  "eu_ES", "currency", "ESP" },
486
    { "fi_FI_PREEURO",  "fi_FI", "currency", "FIM" },
487
    { "fr_BE_PREEURO",  "fr_BE", "currency", "BEF" },
488
    { "fr_FR_PREEURO",  "fr_FR", "currency", "FRF" },
489
    { "fr_LU_PREEURO",  "fr_LU", "currency", "LUF" },
490
    { "ga_IE_PREEURO",  "ga_IE", "currency", "IEP" },
491
    { "gl_ES_PREEURO",  "gl_ES", "currency", "ESP" },
492
    { "hi__DIRECT",     "hi", "collation", "direct" }, /* Old ICU name */
493
    { "it_IT_PREEURO",  "it_IT", "currency", "ITL" },
494
    { "ja_JP_TRADITIONAL", "ja_JP", "calendar", "japanese" }, /* Old ICU name */
495
    { "nb_NO_NY",       "nn_NO", NULL, NULL },  /* "markus said this was ok" :-) */
496
    { "nl_BE_PREEURO",  "nl_BE", "currency", "BEF" },
497
    { "nl_NL_PREEURO",  "nl_NL", "currency", "NLG" },
498
    { "pt_PT_PREEURO",  "pt_PT", "currency", "PTE" },
499
    { "sr_SP_CYRL",     "sr_Cyrl_RS", NULL, NULL }, /* .NET name */
500
    { "sr_SP_LATN",     "sr_Latn_RS", NULL, NULL }, /* .NET name */
501
    { "sr_YU_CYRILLIC", "sr_Cyrl_RS", NULL, NULL }, /* Linux name */
502
    { "th_TH_TRADITIONAL", "th_TH", "calendar", "buddhist" }, /* Old ICU name */
503
    { "uz_UZ_CYRILLIC", "uz_Cyrl_UZ", NULL, NULL }, /* Linux name */
504
    { "uz_UZ_CYRL",     "uz_Cyrl_UZ", NULL, NULL }, /* .NET name */
505
    { "uz_UZ_LATN",     "uz_Latn_UZ", NULL, NULL }, /* .NET name */
506
    { "zh_CHS",         "zh_Hans", NULL, NULL }, /* .NET name */
507
    { "zh_CHT",         "zh_Hant", NULL, NULL }, /* .NET name */
508
    { "zh_GAN",         "gan", NULL, NULL }, /* registered name */
509
    { "zh_GUOYU",       "zh", NULL, NULL }, /* registered name */
510
    { "zh_HAKKA",       "hak", NULL, NULL }, /* registered name */
511
    { "zh_MIN_NAN",     "nan", NULL, NULL }, /* registered name */
512
    { "zh_WUU",         "wuu", NULL, NULL }, /* registered name */
513
    { "zh_XIANG",       "hsn", NULL, NULL }, /* registered name */
514
    { "zh_YUE",         "yue", NULL, NULL }, /* registered name */
515
};
516
517
typedef struct VariantMap {
518
    const char *variant;          /* input ID */
519
    const char *keyword;     /* keyword, or NULL if none */
520
    const char *value;       /* keyword value, or NULL if kw==NULL */
521
} VariantMap;
522
523
static const VariantMap VARIANT_MAP[] = {
524
    { "EURO",   "currency", "EUR" },
525
    { "PINYIN", "collation", "pinyin" }, /* Solaris variant */
526
    { "STROKE", "collation", "stroke" }  /* Solaris variant */
527
};
528
529
/* ### BCP47 Conversion *******************************************/
530
/* Test if the locale id has BCP47 u extension and does not have '@' */
531
30
#define _hasBCP47Extension(id) (id && uprv_strstr(id, "@") == NULL && getShortestSubtagLength(localeID) == 1)
532
/* Converts the BCP47 id to Unicode id. Does nothing to id if conversion fails */
533
#define _ConvertBCP47(finalID, id, buffer, length,err) \
534
0
        if (uloc_forLanguageTag(id, buffer, length, NULL, err) <= 0 ||  \
535
0
                U_FAILURE(*err) || *err == U_STRING_NOT_TERMINATED_WARNING) { \
536
0
            finalID=id; \
537
0
            if (*err == U_STRING_NOT_TERMINATED_WARNING) { *err = U_BUFFER_OVERFLOW_ERROR; } \
538
0
        } else { \
539
0
            finalID=buffer; \
540
0
        }
541
/* Gets the size of the shortest subtag in the given localeID. */
542
30
static int32_t getShortestSubtagLength(const char *localeID) {
543
30
    int32_t localeIDLength = static_cast<int32_t>(uprv_strlen(localeID));
544
30
    int32_t length = localeIDLength;
545
30
    int32_t tmpLength = 0;
546
30
    int32_t i;
547
30
    UBool reset = TRUE;
548
30
549
186
    for (i = 0; i < localeIDLength; i++) {
550
156
        if (localeID[i] != '_' && localeID[i] != '-') {
551
126
            if (reset) {
552
60
                tmpLength = 0;
553
60
                reset = FALSE;
554
60
            }
555
126
            tmpLength++;
556
126
        } else {
557
30
            if (tmpLength != 0 && tmpLength < length) {
558
30
                length = tmpLength;
559
30
            }
560
30
            reset = TRUE;
561
30
        }
562
156
    }
563
30
564
30
    return length;
565
30
}
566
567
/* ### Keywords **************************************************/
568
0
#define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9'))
569
0
#define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) )
570
/* Punctuation/symbols allowed in legacy key values */
571
0
#define UPRV_OK_VALUE_PUNCTUATION(c) ((c) == '_' || (c) == '-' || (c) == '+' || (c) == '/')
572
573
0
#define ULOC_KEYWORD_BUFFER_LEN 25
574
0
#define ULOC_MAX_NO_KEYWORDS 25
575
576
U_CAPI const char * U_EXPORT2
577
24
locale_getKeywordsStart(const char *localeID) {
578
24
    const char *result = NULL;
579
24
    if((result = uprv_strchr(localeID, '@')) != NULL) {
580
0
        return result;
581
0
    }
582
#if (U_CHARSET_FAMILY == U_EBCDIC_FAMILY)
583
    else {
584
        /* We do this because the @ sign is variant, and the @ sign used on one
585
        EBCDIC machine won't be compiled the same way on other EBCDIC based
586
        machines. */
587
        static const uint8_t ebcdicSigns[] = { 0x7C, 0x44, 0x66, 0x80, 0xAC, 0xAE, 0xAF, 0xB5, 0xEC, 0xEF, 0x00 };
588
        const uint8_t *charToFind = ebcdicSigns;
589
        while(*charToFind) {
590
            if((result = uprv_strchr(localeID, *charToFind)) != NULL) {
591
                return result;
592
            }
593
            charToFind++;
594
        }
595
    }
596
#endif
597
24
    return NULL;
598
24
}
599
600
/**
601
 * @param buf buffer of size [ULOC_KEYWORD_BUFFER_LEN]
602
 * @param keywordName incoming name to be canonicalized
603
 * @param status return status (keyword too long)
604
 * @return length of the keyword name
605
 */
606
static int32_t locale_canonKeywordName(char *buf, const char *keywordName, UErrorCode *status)
607
0
{
608
0
  int32_t keywordNameLen = 0;
609
0
610
0
  for (; *keywordName != 0; keywordName++) {
611
0
    if (!UPRV_ISALPHANUM(*keywordName)) {
612
0
      *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
613
0
      return 0;
614
0
    }
615
0
    if (keywordNameLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
616
0
      buf[keywordNameLen++] = uprv_tolower(*keywordName);
617
0
    } else {
618
0
      /* keyword name too long for internal buffer */
619
0
      *status = U_INTERNAL_PROGRAM_ERROR;
620
0
      return 0;
621
0
    }
622
0
  }
623
0
  if (keywordNameLen == 0) {
624
0
    *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name */
625
0
    return 0;
626
0
  }
627
0
  buf[keywordNameLen] = 0; /* terminate */
628
0
629
0
  return keywordNameLen;
630
0
}
631
632
typedef struct {
633
    char keyword[ULOC_KEYWORD_BUFFER_LEN];
634
    int32_t keywordLen;
635
    const char *valueStart;
636
    int32_t valueLen;
637
} KeywordStruct;
638
639
static int32_t U_CALLCONV
640
0
compareKeywordStructs(const void * /*context*/, const void *left, const void *right) {
641
0
    const char* leftString = ((const KeywordStruct *)left)->keyword;
642
0
    const char* rightString = ((const KeywordStruct *)right)->keyword;
643
0
    return uprv_strcmp(leftString, rightString);
644
0
}
645
646
/**
647
 * Both addKeyword and addValue must already be in canonical form.
648
 * Either both addKeyword and addValue are NULL, or neither is NULL.
649
 * If they are not NULL they must be zero terminated.
650
 * If addKeyword is not NULL is must have length small enough to fit in KeywordStruct.keyword.
651
 */
652
static int32_t
653
_getKeywords(const char *localeID,
654
             char prev,
655
             char *keywords, int32_t keywordCapacity,
656
             char *values, int32_t valuesCapacity, int32_t *valLen,
657
             UBool valuesToo,
658
             const char* addKeyword,
659
             const char* addValue,
660
             UErrorCode *status)
661
0
{
662
0
    KeywordStruct keywordList[ULOC_MAX_NO_KEYWORDS];
663
0
664
0
    int32_t maxKeywords = ULOC_MAX_NO_KEYWORDS;
665
0
    int32_t numKeywords = 0;
666
0
    const char* pos = localeID;
667
0
    const char* equalSign = NULL;
668
0
    const char* semicolon = NULL;
669
0
    int32_t i = 0, j, n;
670
0
    int32_t keywordsLen = 0;
671
0
    int32_t valuesLen = 0;
672
0
673
0
    if(prev == '@') { /* start of keyword definition */
674
0
        /* we will grab pairs, trim spaces, lowercase keywords, sort and return */
675
0
        do {
676
0
            UBool duplicate = FALSE;
677
0
            /* skip leading spaces */
678
0
            while(*pos == ' ') {
679
0
                pos++;
680
0
            }
681
0
            if (!*pos) { /* handle trailing "; " */
682
0
                break;
683
0
            }
684
0
            if(numKeywords == maxKeywords) {
685
0
                *status = U_INTERNAL_PROGRAM_ERROR;
686
0
                return 0;
687
0
            }
688
0
            equalSign = uprv_strchr(pos, '=');
689
0
            semicolon = uprv_strchr(pos, ';');
690
0
            /* lack of '=' [foo@currency] is illegal */
691
0
            /* ';' before '=' [foo@currency;collation=pinyin] is illegal */
692
0
            if(!equalSign || (semicolon && semicolon<equalSign)) {
693
0
                *status = U_INVALID_FORMAT_ERROR;
694
0
                return 0;
695
0
            }
696
0
            /* need to normalize both keyword and keyword name */
697
0
            if(equalSign - pos >= ULOC_KEYWORD_BUFFER_LEN) {
698
0
                /* keyword name too long for internal buffer */
699
0
                *status = U_INTERNAL_PROGRAM_ERROR;
700
0
                return 0;
701
0
            }
702
0
            for(i = 0, n = 0; i < equalSign - pos; ++i) {
703
0
                if (pos[i] != ' ') {
704
0
                    keywordList[numKeywords].keyword[n++] = uprv_tolower(pos[i]);
705
0
                }
706
0
            }
707
0
708
0
            /* zero-length keyword is an error. */
709
0
            if (n == 0) {
710
0
                *status = U_INVALID_FORMAT_ERROR;
711
0
                return 0;
712
0
            }
713
0
714
0
            keywordList[numKeywords].keyword[n] = 0;
715
0
            keywordList[numKeywords].keywordLen = n;
716
0
            /* now grab the value part. First we skip the '=' */
717
0
            equalSign++;
718
0
            /* then we leading spaces */
719
0
            while(*equalSign == ' ') {
720
0
                equalSign++;
721
0
            }
722
0
723
0
            /* Premature end or zero-length value */
724
0
            if (!*equalSign || equalSign == semicolon) {
725
0
                *status = U_INVALID_FORMAT_ERROR;
726
0
                return 0;
727
0
            }
728
0
729
0
            keywordList[numKeywords].valueStart = equalSign;
730
0
731
0
            pos = semicolon;
732
0
            i = 0;
733
0
            if(pos) {
734
0
                while(*(pos - i - 1) == ' ') {
735
0
                    i++;
736
0
                }
737
0
                keywordList[numKeywords].valueLen = (int32_t)(pos - equalSign - i);
738
0
                pos++;
739
0
            } else {
740
0
                i = (int32_t)uprv_strlen(equalSign);
741
0
                while(i && equalSign[i-1] == ' ') {
742
0
                    i--;
743
0
                }
744
0
                keywordList[numKeywords].valueLen = i;
745
0
            }
746
0
            /* If this is a duplicate keyword, then ignore it */
747
0
            for (j=0; j<numKeywords; ++j) {
748
0
                if (uprv_strcmp(keywordList[j].keyword, keywordList[numKeywords].keyword) == 0) {
749
0
                    duplicate = TRUE;
750
0
                    break;
751
0
                }
752
0
            }
753
0
            if (!duplicate) {
754
0
                ++numKeywords;
755
0
            }
756
0
        } while(pos);
757
0
758
0
        /* Handle addKeyword/addValue. */
759
0
        if (addKeyword != NULL) {
760
0
            UBool duplicate = FALSE;
761
0
            U_ASSERT(addValue != NULL);
762
0
            /* Search for duplicate; if found, do nothing. Explicit keyword
763
0
               overrides addKeyword. */
764
0
            for (j=0; j<numKeywords; ++j) {
765
0
                if (uprv_strcmp(keywordList[j].keyword, addKeyword) == 0) {
766
0
                    duplicate = TRUE;
767
0
                    break;
768
0
                }
769
0
            }
770
0
            if (!duplicate) {
771
0
                if (numKeywords == maxKeywords) {
772
0
                    *status = U_INTERNAL_PROGRAM_ERROR;
773
0
                    return 0;
774
0
                }
775
0
                uprv_strcpy(keywordList[numKeywords].keyword, addKeyword);
776
0
                keywordList[numKeywords].keywordLen = (int32_t)uprv_strlen(addKeyword);
777
0
                keywordList[numKeywords].valueStart = addValue;
778
0
                keywordList[numKeywords].valueLen = (int32_t)uprv_strlen(addValue);
779
0
                ++numKeywords;
780
0
            }
781
0
        } else {
782
0
            U_ASSERT(addValue == NULL);
783
0
        }
784
0
785
0
        /* now we have a list of keywords */
786
0
        /* we need to sort it */
787
0
        uprv_sortArray(keywordList, numKeywords, sizeof(KeywordStruct), compareKeywordStructs, NULL, FALSE, status);
788
0
789
0
        /* Now construct the keyword part */
790
0
        for(i = 0; i < numKeywords; i++) {
791
0
            if(keywordsLen + keywordList[i].keywordLen + 1< keywordCapacity) {
792
0
                uprv_strcpy(keywords+keywordsLen, keywordList[i].keyword);
793
0
                if(valuesToo) {
794
0
                    keywords[keywordsLen + keywordList[i].keywordLen] = '=';
795
0
                } else {
796
0
                    keywords[keywordsLen + keywordList[i].keywordLen] = 0;
797
0
                }
798
0
            }
799
0
            keywordsLen += keywordList[i].keywordLen + 1;
800
0
            if(valuesToo) {
801
0
                if(keywordsLen + keywordList[i].valueLen < keywordCapacity) {
802
0
                    uprv_strncpy(keywords+keywordsLen, keywordList[i].valueStart, keywordList[i].valueLen);
803
0
                }
804
0
                keywordsLen += keywordList[i].valueLen;
805
0
806
0
                if(i < numKeywords - 1) {
807
0
                    if(keywordsLen < keywordCapacity) {
808
0
                        keywords[keywordsLen] = ';';
809
0
                    }
810
0
                    keywordsLen++;
811
0
                }
812
0
            }
813
0
            if(values) {
814
0
                if(valuesLen + keywordList[i].valueLen + 1< valuesCapacity) {
815
0
                    uprv_strcpy(values+valuesLen, keywordList[i].valueStart);
816
0
                    values[valuesLen + keywordList[i].valueLen] = 0;
817
0
                }
818
0
                valuesLen += keywordList[i].valueLen + 1;
819
0
            }
820
0
        }
821
0
        if(values) {
822
0
            values[valuesLen] = 0;
823
0
            if(valLen) {
824
0
                *valLen = valuesLen;
825
0
            }
826
0
        }
827
0
        return u_terminateChars(keywords, keywordCapacity, keywordsLen, status);
828
0
    } else {
829
0
        return 0;
830
0
    }
831
0
}
832
833
U_CFUNC int32_t
834
locale_getKeywords(const char *localeID,
835
                   char prev,
836
                   char *keywords, int32_t keywordCapacity,
837
                   char *values, int32_t valuesCapacity, int32_t *valLen,
838
                   UBool valuesToo,
839
0
                   UErrorCode *status) {
840
0
    return _getKeywords(localeID, prev, keywords, keywordCapacity,
841
0
                        values, valuesCapacity, valLen, valuesToo,
842
0
                        NULL, NULL, status);
843
0
}
844
845
U_CAPI int32_t U_EXPORT2
846
uloc_getKeywordValue(const char* localeID,
847
                     const char* keywordName,
848
                     char* buffer, int32_t bufferCapacity,
849
                     UErrorCode* status)
850
0
{
851
0
    const char* startSearchHere = NULL;
852
0
    const char* nextSeparator = NULL;
853
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
854
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
855
0
    int32_t result = 0;
856
0
857
0
    if(status && U_SUCCESS(*status) && localeID) {
858
0
      char tempBuffer[ULOC_FULLNAME_CAPACITY];
859
0
      const char* tmpLocaleID;
860
0
861
0
      if (keywordName == NULL || keywordName[0] == 0) {
862
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
863
0
        return 0;
864
0
      }
865
0
866
0
      locale_canonKeywordName(keywordNameBuffer, keywordName, status);
867
0
      if(U_FAILURE(*status)) {
868
0
        return 0;
869
0
      }
870
0
871
0
      if (_hasBCP47Extension(localeID)) {
872
0
          _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
873
0
      } else {
874
0
          tmpLocaleID=localeID;
875
0
      }
876
0
877
0
      startSearchHere = locale_getKeywordsStart(tmpLocaleID);
878
0
      if(startSearchHere == NULL) {
879
0
          /* no keywords, return at once */
880
0
          return 0;
881
0
      }
882
0
883
0
      /* find the first keyword */
884
0
      while(startSearchHere) {
885
0
          const char* keyValueTail;
886
0
          int32_t keyValueLen;
887
0
888
0
          startSearchHere++; /* skip @ or ; */
889
0
          nextSeparator = uprv_strchr(startSearchHere, '=');
890
0
          if(!nextSeparator) {
891
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
892
0
              return 0;
893
0
          }
894
0
          /* strip leading & trailing spaces (TC decided to tolerate these) */
895
0
          while(*startSearchHere == ' ') {
896
0
              startSearchHere++;
897
0
          }
898
0
          keyValueTail = nextSeparator;
899
0
          while (keyValueTail > startSearchHere && *(keyValueTail-1) == ' ') {
900
0
              keyValueTail--;
901
0
          }
902
0
          /* now keyValueTail points to first char after the keyName */
903
0
          /* copy & normalize keyName from locale */
904
0
          if (startSearchHere == keyValueTail) {
905
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
906
0
              return 0;
907
0
          }
908
0
          keyValueLen = 0;
909
0
          while (startSearchHere < keyValueTail) {
910
0
            if (!UPRV_ISALPHANUM(*startSearchHere)) {
911
0
              *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
912
0
              return 0;
913
0
            }
914
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
915
0
              localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*startSearchHere++);
916
0
            } else {
917
0
              /* keyword name too long for internal buffer */
918
0
              *status = U_INTERNAL_PROGRAM_ERROR;
919
0
              return 0;
920
0
            }
921
0
          }
922
0
          localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
923
0
924
0
          startSearchHere = uprv_strchr(nextSeparator, ';');
925
0
926
0
          if(uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer) == 0) {
927
0
               /* current entry matches the keyword. */
928
0
             nextSeparator++; /* skip '=' */
929
0
              /* First strip leading & trailing spaces (TC decided to tolerate these) */
930
0
              while(*nextSeparator == ' ') {
931
0
                nextSeparator++;
932
0
              }
933
0
              keyValueTail = (startSearchHere)? startSearchHere: nextSeparator + uprv_strlen(nextSeparator);
934
0
              while(keyValueTail > nextSeparator && *(keyValueTail-1) == ' ') {
935
0
                keyValueTail--;
936
0
              }
937
0
              /* Now copy the value, but check well-formedness */
938
0
              if (nextSeparator == keyValueTail) {
939
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value name in passed-in locale */
940
0
                return 0;
941
0
              }
942
0
              keyValueLen = 0;
943
0
              while (nextSeparator < keyValueTail) {
944
0
                if (!UPRV_ISALPHANUM(*nextSeparator) && !UPRV_OK_VALUE_PUNCTUATION(*nextSeparator)) {
945
0
                  *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
946
0
                  return 0;
947
0
                }
948
0
                if (keyValueLen < bufferCapacity) {
949
0
                  /* Should we lowercase value to return here? Tests expect as-is. */
950
0
                  buffer[keyValueLen++] = *nextSeparator++;
951
0
                } else { /* keep advancing so we return correct length in case of overflow */
952
0
                  keyValueLen++;
953
0
                  nextSeparator++;
954
0
                }
955
0
              }
956
0
              result = u_terminateChars(buffer, bufferCapacity, keyValueLen, status);
957
0
              return result;
958
0
          }
959
0
      }
960
0
    }
961
0
    return 0;
962
0
}
963
964
U_CAPI int32_t U_EXPORT2
965
uloc_setKeywordValue(const char* keywordName,
966
                     const char* keywordValue,
967
                     char* buffer, int32_t bufferCapacity,
968
                     UErrorCode* status)
969
0
{
970
0
    /* TODO: sorting. removal. */
971
0
    int32_t keywordNameLen;
972
0
    int32_t keywordValueLen;
973
0
    int32_t bufLen;
974
0
    int32_t needLen = 0;
975
0
    char keywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
976
0
    char keywordValueBuffer[ULOC_KEYWORDS_CAPACITY+1];
977
0
    char localeKeywordNameBuffer[ULOC_KEYWORD_BUFFER_LEN];
978
0
    int32_t rc;
979
0
    char* nextSeparator = NULL;
980
0
    char* nextEqualsign = NULL;
981
0
    char* startSearchHere = NULL;
982
0
    char* keywordStart = NULL;
983
0
    CharString updatedKeysAndValues;
984
0
    int32_t updatedKeysAndValuesLen;
985
0
    UBool handledInputKeyAndValue = FALSE;
986
0
    char keyValuePrefix = '@';
987
0
988
0
    if(U_FAILURE(*status)) {
989
0
        return -1;
990
0
    }
991
0
    if (keywordName == NULL || keywordName[0] == 0 || bufferCapacity <= 1) {
992
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
993
0
        return 0;
994
0
    }
995
0
    bufLen = (int32_t)uprv_strlen(buffer);
996
0
    if(bufferCapacity<bufLen) {
997
0
        /* The capacity is less than the length?! Is this NULL terminated? */
998
0
        *status = U_ILLEGAL_ARGUMENT_ERROR;
999
0
        return 0;
1000
0
    }
1001
0
    keywordNameLen = locale_canonKeywordName(keywordNameBuffer, keywordName, status);
1002
0
    if(U_FAILURE(*status)) {
1003
0
        return 0;
1004
0
    }
1005
0
1006
0
    keywordValueLen = 0;
1007
0
    if(keywordValue) {
1008
0
        while (*keywordValue != 0) {
1009
0
            if (!UPRV_ISALPHANUM(*keywordValue) && !UPRV_OK_VALUE_PUNCTUATION(*keywordValue)) {
1010
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed key value */
1011
0
                return 0;
1012
0
            }
1013
0
            if (keywordValueLen < ULOC_KEYWORDS_CAPACITY) {
1014
0
                /* Should we force lowercase in value to set? */
1015
0
                keywordValueBuffer[keywordValueLen++] = *keywordValue++;
1016
0
            } else {
1017
0
                /* keywordValue too long for internal buffer */
1018
0
                *status = U_INTERNAL_PROGRAM_ERROR;
1019
0
                return 0;
1020
0
            }
1021
0
        }
1022
0
    }
1023
0
    keywordValueBuffer[keywordValueLen] = 0; /* terminate */
1024
0
1025
0
    startSearchHere = (char*)locale_getKeywordsStart(buffer);
1026
0
    if(startSearchHere == NULL || (startSearchHere[1]==0)) {
1027
0
        if(keywordValueLen == 0) { /* no keywords = nothing to remove */
1028
0
            return bufLen;
1029
0
        }
1030
0
1031
0
        needLen = bufLen+1+keywordNameLen+1+keywordValueLen;
1032
0
        if(startSearchHere) { /* had a single @ */
1033
0
            needLen--; /* already had the @ */
1034
0
            /* startSearchHere points at the @ */
1035
0
        } else {
1036
0
            startSearchHere=buffer+bufLen;
1037
0
        }
1038
0
        if(needLen >= bufferCapacity) {
1039
0
            *status = U_BUFFER_OVERFLOW_ERROR;
1040
0
            return needLen; /* no change */
1041
0
        }
1042
0
        *startSearchHere++ = '@';
1043
0
        uprv_strcpy(startSearchHere, keywordNameBuffer);
1044
0
        startSearchHere += keywordNameLen;
1045
0
        *startSearchHere++ = '=';
1046
0
        uprv_strcpy(startSearchHere, keywordValueBuffer);
1047
0
        return needLen;
1048
0
    } /* end shortcut - no @ */
1049
0
1050
0
    keywordStart = startSearchHere;
1051
0
    /* search for keyword */
1052
0
    while(keywordStart) {
1053
0
        const char* keyValueTail;
1054
0
        int32_t keyValueLen;
1055
0
1056
0
        keywordStart++; /* skip @ or ; */
1057
0
        nextEqualsign = uprv_strchr(keywordStart, '=');
1058
0
        if (!nextEqualsign) {
1059
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* key must have =value */
1060
0
            return 0;
1061
0
        }
1062
0
        /* strip leading & trailing spaces (TC decided to tolerate these) */
1063
0
        while(*keywordStart == ' ') {
1064
0
            keywordStart++;
1065
0
        }
1066
0
        keyValueTail = nextEqualsign;
1067
0
        while (keyValueTail > keywordStart && *(keyValueTail-1) == ' ') {
1068
0
            keyValueTail--;
1069
0
        }
1070
0
        /* now keyValueTail points to first char after the keyName */
1071
0
        /* copy & normalize keyName from locale */
1072
0
        if (keywordStart == keyValueTail) {
1073
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty keyword name in passed-in locale */
1074
0
            return 0;
1075
0
        }
1076
0
        keyValueLen = 0;
1077
0
        while (keywordStart < keyValueTail) {
1078
0
            if (!UPRV_ISALPHANUM(*keywordStart)) {
1079
0
                *status = U_ILLEGAL_ARGUMENT_ERROR; /* malformed keyword name */
1080
0
                return 0;
1081
0
            }
1082
0
            if (keyValueLen < ULOC_KEYWORD_BUFFER_LEN - 1) {
1083
0
                localeKeywordNameBuffer[keyValueLen++] = uprv_tolower(*keywordStart++);
1084
0
            } else {
1085
0
                /* keyword name too long for internal buffer */
1086
0
                *status = U_INTERNAL_PROGRAM_ERROR;
1087
0
                return 0;
1088
0
            }
1089
0
        }
1090
0
        localeKeywordNameBuffer[keyValueLen] = 0; /* terminate */
1091
0
1092
0
        nextSeparator = uprv_strchr(nextEqualsign, ';');
1093
0
1094
0
        /* start processing the value part */
1095
0
        nextEqualsign++; /* skip '=' */
1096
0
        /* First strip leading & trailing spaces (TC decided to tolerate these) */
1097
0
        while(*nextEqualsign == ' ') {
1098
0
            nextEqualsign++;
1099
0
        }
1100
0
        keyValueTail = (nextSeparator)? nextSeparator: nextEqualsign + uprv_strlen(nextEqualsign);
1101
0
        while(keyValueTail > nextEqualsign && *(keyValueTail-1) == ' ') {
1102
0
            keyValueTail--;
1103
0
        }
1104
0
        if (nextEqualsign == keyValueTail) {
1105
0
            *status = U_ILLEGAL_ARGUMENT_ERROR; /* empty key value in passed-in locale */
1106
0
            return 0;
1107
0
        }
1108
0
1109
0
        rc = uprv_strcmp(keywordNameBuffer, localeKeywordNameBuffer);
1110
0
        if(rc == 0) {
1111
0
            /* Current entry matches the input keyword. Update the entry */
1112
0
            if(keywordValueLen > 0) { /* updating a value */
1113
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1114
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1115
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1116
0
                updatedKeysAndValues.append('=', *status);
1117
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1118
0
            } /* else removing this entry, don't emit anything */
1119
0
            handledInputKeyAndValue = TRUE;
1120
0
        } else {
1121
0
           /* input keyword sorts earlier than current entry, add before current entry */
1122
0
            if (rc < 0 && keywordValueLen > 0 && !handledInputKeyAndValue) {
1123
0
                /* insert new entry at this location */
1124
0
                updatedKeysAndValues.append(keyValuePrefix, *status);
1125
0
                keyValuePrefix = ';'; /* for any subsequent key-value pair */
1126
0
                updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1127
0
                updatedKeysAndValues.append('=', *status);
1128
0
                updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1129
0
                handledInputKeyAndValue = TRUE;
1130
0
            }
1131
0
            /* copy the current entry */
1132
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1133
0
            keyValuePrefix = ';'; /* for any subsequent key-value pair */
1134
0
            updatedKeysAndValues.append(localeKeywordNameBuffer, keyValueLen, *status);
1135
0
            updatedKeysAndValues.append('=', *status);
1136
0
            updatedKeysAndValues.append(nextEqualsign, keyValueTail-nextEqualsign, *status);
1137
0
        }
1138
0
        if (!nextSeparator && keywordValueLen > 0 && !handledInputKeyAndValue) {
1139
0
            /* append new entry at the end, it sorts later than existing entries */
1140
0
            updatedKeysAndValues.append(keyValuePrefix, *status);
1141
0
            /* skip keyValuePrefix update, no subsequent key-value pair */
1142
0
            updatedKeysAndValues.append(keywordNameBuffer, keywordNameLen, *status);
1143
0
            updatedKeysAndValues.append('=', *status);
1144
0
            updatedKeysAndValues.append(keywordValueBuffer, keywordValueLen, *status);
1145
0
            handledInputKeyAndValue = TRUE;
1146
0
        }
1147
0
        keywordStart = nextSeparator;
1148
0
    } /* end loop searching */
1149
0
1150
0
    /* Any error from updatedKeysAndValues.append above would be internal and not due to
1151
0
     * problems with the passed-in locale. So if we did encounter problems with the
1152
0
     * passed-in locale above, those errors took precedence and overrode any error
1153
0
     * status from updatedKeysAndValues.append, and also caused a return of 0. If there
1154
0
     * are errors here they are from updatedKeysAndValues.append; they do cause an
1155
0
     * error return but the passed-in locale is unmodified and the original bufLen is
1156
0
     * returned.
1157
0
     */
1158
0
    if (!handledInputKeyAndValue || U_FAILURE(*status)) {
1159
0
        /* if input key/value specified removal of a keyword not present in locale, or
1160
0
         * there was an error in CharString.append, leave original locale alone. */
1161
0
        return bufLen;
1162
0
    }
1163
0
1164
0
    updatedKeysAndValuesLen = updatedKeysAndValues.length();
1165
0
    /* needLen = length of the part before '@' + length of updated key-value part including '@' */
1166
0
    needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
1167
0
    if(needLen >= bufferCapacity) {
1168
0
        *status = U_BUFFER_OVERFLOW_ERROR;
1169
0
        return needLen; /* no change */
1170
0
    }
1171
0
    if (updatedKeysAndValuesLen > 0) {
1172
0
        uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
1173
0
    }
1174
0
    buffer[needLen]=0;
1175
0
    return needLen;
1176
0
}
1177
1178
/* ### ID parsing implementation **************************************************/
1179
1180
132
#define _isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
1181
1182
/*returns TRUE if one of the special prefixes is here (s=string)
1183
  'x-' or 'i-' */
1184
66
#define _isIDPrefix(s) (_isPrefixLetter(s[0])&&_isIDSeparator(s[1]))
1185
1186
/* Dot terminates it because of POSIX form  where dot precedes the codepage
1187
 * except for variant
1188
 */
1189
1.00k
#define _isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
1190
1191
0
static char* _strnchr(const char* str, int32_t len, char c) {
1192
0
    U_ASSERT(str != 0 && len >= 0);
1193
0
    while (len-- != 0) {
1194
0
        char d = *str;
1195
0
        if (d == c) {
1196
0
            return (char*) str;
1197
0
        } else if (d == 0) {
1198
0
            break;
1199
0
        }
1200
0
        ++str;
1201
0
    }
1202
0
    return NULL;
1203
0
}
1204
1205
/**
1206
 * Lookup 'key' in the array 'list'.  The array 'list' should contain
1207
 * a NULL entry, followed by more entries, and a second NULL entry.
1208
 *
1209
 * The 'list' param should be LANGUAGES, LANGUAGES_3, COUNTRIES, or
1210
 * COUNTRIES_3.
1211
 */
1212
static int16_t _findIndex(const char* const* list, const char* key)
1213
0
{
1214
0
    const char* const* anchor = list;
1215
0
    int32_t pass = 0;
1216
0
1217
0
    /* Make two passes through two NULL-terminated arrays at 'list' */
1218
0
    while (pass++ < 2) {
1219
0
        while (*list) {
1220
0
            if (uprv_strcmp(key, *list) == 0) {
1221
0
                return (int16_t)(list - anchor);
1222
0
            }
1223
0
            list++;
1224
0
        }
1225
0
        ++list;     /* skip final NULL *CWB*/
1226
0
    }
1227
0
    return -1;
1228
0
}
1229
1230
/* count the length of src while copying it to dest; return strlen(src) */
1231
static inline int32_t
1232
0
_copyCount(char *dest, int32_t destCapacity, const char *src) {
1233
0
    const char *anchor;
1234
0
    char c;
1235
0
1236
0
    anchor=src;
1237
0
    for(;;) {
1238
0
        if((c=*src)==0) {
1239
0
            return (int32_t)(src-anchor);
1240
0
        }
1241
0
        if(destCapacity<=0) {
1242
0
            return (int32_t)((src-anchor)+uprv_strlen(src));
1243
0
        }
1244
0
        ++src;
1245
0
        *dest++=c;
1246
0
        --destCapacity;
1247
0
    }
1248
0
}
1249
1250
U_CFUNC const char*
1251
0
uloc_getCurrentCountryID(const char* oldID){
1252
0
    int32_t offset = _findIndex(DEPRECATED_COUNTRIES, oldID);
1253
0
    if (offset >= 0) {
1254
0
        return REPLACEMENT_COUNTRIES[offset];
1255
0
    }
1256
0
    return oldID;
1257
0
}
1258
U_CFUNC const char*
1259
0
uloc_getCurrentLanguageID(const char* oldID){
1260
0
    int32_t offset = _findIndex(DEPRECATED_LANGUAGES, oldID);
1261
0
    if (offset >= 0) {
1262
0
        return REPLACEMENT_LANGUAGES[offset];
1263
0
    }
1264
0
    return oldID;
1265
0
}
1266
/*
1267
 * the internal functions _getLanguage(), _getCountry(), _getVariant()
1268
 * avoid duplicating code to handle the earlier locale ID pieces
1269
 * in the functions for the later ones by
1270
 * setting the *pEnd pointer to where they stopped parsing
1271
 *
1272
 * TODO try to use this in Locale
1273
 */
1274
U_CFUNC int32_t
1275
ulocimp_getLanguage(const char *localeID,
1276
                    char *language, int32_t languageCapacity,
1277
66
                    const char **pEnd) {
1278
66
    int32_t i=0;
1279
66
    int32_t offset;
1280
66
    char lang[4]={ 0, 0, 0, 0 }; /* temporary buffer to hold language code for searching */
1281
66
1282
66
    /* if it starts with i- or x- then copy that prefix */
1283
66
    if(_isIDPrefix(localeID)) {
1284
0
        if(i<languageCapacity) {
1285
0
            language[i]=(char)uprv_tolower(*localeID);
1286
0
        }
1287
0
        if(i<languageCapacity) {
1288
0
            language[i+1]='-';
1289
0
        }
1290
0
        i+=2;
1291
0
        localeID+=2;
1292
0
    }
1293
66
1294
66
    /* copy the language as far as possible and count its length */
1295
198
    while(!_isTerminator(*localeID) && !_isIDSeparator(*localeID)) {
1296
132
        if(i<languageCapacity) {
1297
60
            language[i]=(char)uprv_tolower(*localeID);
1298
60
        }
1299
132
        if(i<3) {
1300
132
            U_ASSERT(i>=0);
1301
132
            lang[i]=(char)uprv_tolower(*localeID);
1302
132
        }
1303
132
        i++;
1304
132
        localeID++;
1305
132
    }
1306
66
1307
66
    if(i==3) {
1308
0
        /* convert 3 character code to 2 character code if possible *CWB*/
1309
0
        offset=_findIndex(LANGUAGES_3, lang);
1310
0
        if(offset>=0) {
1311
0
            i=_copyCount(language, languageCapacity, LANGUAGES[offset]);
1312
0
        }
1313
0
    }
1314
66
1315
66
    if(pEnd!=NULL) {
1316
54
        *pEnd=localeID;
1317
54
    }
1318
66
    return i;
1319
66
}
1320
1321
U_CFUNC int32_t
1322
ulocimp_getScript(const char *localeID,
1323
                  char *script, int32_t scriptCapacity,
1324
                  const char **pEnd)
1325
54
{
1326
54
    int32_t idLen = 0;
1327
54
1328
54
    if (pEnd != NULL) {
1329
45
        *pEnd = localeID;
1330
45
    }
1331
54
1332
54
    /* copy the second item as far as possible and count its length */
1333
186
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])
1334
186
            && uprv_isASCIILetter(localeID[idLen])) {
1335
132
        idLen++;
1336
132
    }
1337
54
1338
54
    /* If it's exactly 4 characters long, then it's a script and not a country. */
1339
54
    if (idLen == 4) {
1340
12
        int32_t i;
1341
12
        if (pEnd != NULL) {
1342
9
            *pEnd = localeID+idLen;
1343
9
        }
1344
12
        if(idLen > scriptCapacity) {
1345
3
            idLen = scriptCapacity;
1346
3
        }
1347
12
        if (idLen >= 1) {
1348
9
            script[0]=(char)uprv_toupper(*(localeID++));
1349
9
        }
1350
39
        for (i = 1; i < idLen; i++) {
1351
27
            script[i]=(char)uprv_tolower(*(localeID++));
1352
27
        }
1353
12
    }
1354
42
    else {
1355
42
        idLen = 0;
1356
42
    }
1357
54
    return idLen;
1358
54
}
1359
1360
U_CFUNC int32_t
1361
ulocimp_getCountry(const char *localeID,
1362
                   char *country, int32_t countryCapacity,
1363
                   const char **pEnd)
1364
42
{
1365
42
    int32_t idLen=0;
1366
42
    char cnty[ULOC_COUNTRY_CAPACITY]={ 0, 0, 0, 0 };
1367
42
    int32_t offset;
1368
42
1369
42
    /* copy the country as far as possible and count its length */
1370
120
    while(!_isTerminator(localeID[idLen]) && !_isIDSeparator(localeID[idLen])) {
1371
78
        if(idLen<(ULOC_COUNTRY_CAPACITY-1)) {   /*CWB*/
1372
78
            cnty[idLen]=(char)uprv_toupper(localeID[idLen]);
1373
78
        }
1374
78
        idLen++;
1375
78
    }
1376
42
1377
42
    /* the country should be either length 2 or 3 */
1378
42
    if (idLen == 2 || idLen == 3) {
1379
39
        UBool gotCountry = FALSE;
1380
39
        /* convert 3 character code to 2 character code if possible *CWB*/
1381
39
        if(idLen==3) {
1382
0
            offset=_findIndex(COUNTRIES_3, cnty);
1383
0
            if(offset>=0) {
1384
0
                idLen=_copyCount(country, countryCapacity, COUNTRIES[offset]);
1385
0
                gotCountry = TRUE;
1386
0
            }
1387
0
        }
1388
39
        if (!gotCountry) {
1389
39
            int32_t i = 0;
1390
117
            for (i = 0; i < idLen; i++) {
1391
78
                if (i < countryCapacity) {
1392
42
                    country[i]=(char)uprv_toupper(localeID[i]);
1393
42
                }
1394
78
            }
1395
39
        }
1396
39
        localeID+=idLen;
1397
39
    } else {
1398
3
        idLen = 0;
1399
3
    }
1400
42
1401
42
    if(pEnd!=NULL) {
1402
33
        *pEnd=localeID;
1403
33
    }
1404
42
1405
42
    return idLen;
1406
42
}
1407
1408
/**
1409
 * @param needSeparator if true, then add leading '_' if any variants
1410
 * are added to 'variant'
1411
 */
1412
static int32_t
1413
_getVariantEx(const char *localeID,
1414
              char prev,
1415
              char *variant, int32_t variantCapacity,
1416
0
              UBool needSeparator) {
1417
0
    int32_t i=0;
1418
0
1419
0
    /* get one or more variant tags and separate them with '_' */
1420
0
    if(_isIDSeparator(prev)) {
1421
0
        /* get a variant string after a '-' or '_' */
1422
0
        while(!_isTerminator(*localeID)) {
1423
0
            if (needSeparator) {
1424
0
                if (i<variantCapacity) {
1425
0
                    variant[i] = '_';
1426
0
                }
1427
0
                ++i;
1428
0
                needSeparator = FALSE;
1429
0
            }
1430
0
            if(i<variantCapacity) {
1431
0
                variant[i]=(char)uprv_toupper(*localeID);
1432
0
                if(variant[i]=='-') {
1433
0
                    variant[i]='_';
1434
0
                }
1435
0
            }
1436
0
            i++;
1437
0
            localeID++;
1438
0
        }
1439
0
    }
1440
0
1441
0
    /* if there is no variant tag after a '-' or '_' then look for '@' */
1442
0
    if(i==0) {
1443
0
        if(prev=='@') {
1444
0
            /* keep localeID */
1445
0
        } else if((localeID=locale_getKeywordsStart(localeID))!=NULL) {
1446
0
            ++localeID; /* point after the '@' */
1447
0
        } else {
1448
0
            return 0;
1449
0
        }
1450
0
        while(!_isTerminator(*localeID)) {
1451
0
            if (needSeparator) {
1452
0
                if (i<variantCapacity) {
1453
0
                    variant[i] = '_';
1454
0
                }
1455
0
                ++i;
1456
0
                needSeparator = FALSE;
1457
0
            }
1458
0
            if(i<variantCapacity) {
1459
0
                variant[i]=(char)uprv_toupper(*localeID);
1460
0
                if(variant[i]=='-' || variant[i]==',') {
1461
0
                    variant[i]='_';
1462
0
                }
1463
0
            }
1464
0
            i++;
1465
0
            localeID++;
1466
0
        }
1467
0
    }
1468
0
1469
0
    return i;
1470
0
}
1471
1472
static int32_t
1473
_getVariant(const char *localeID,
1474
            char prev,
1475
0
            char *variant, int32_t variantCapacity) {
1476
0
    return _getVariantEx(localeID, prev, variant, variantCapacity, FALSE);
1477
0
}
1478
1479
/**
1480
 * Delete ALL instances of a variant from the given list of one or
1481
 * more variants.  Example: "FOO_EURO_BAR_EURO" => "FOO_BAR".
1482
 * @param variants the source string of one or more variants,
1483
 * separated by '_'.  This will be MODIFIED IN PLACE.  Not zero
1484
 * terminated; if it is, trailing zero will NOT be maintained.
1485
 * @param variantsLen length of variants
1486
 * @param toDelete variant to delete, without separators, e.g.  "EURO"
1487
 * or "PREEURO"; not zero terminated
1488
 * @param toDeleteLen length of toDelete
1489
 * @return number of characters deleted from variants
1490
 */
1491
static int32_t
1492
_deleteVariant(char* variants, int32_t variantsLen,
1493
               const char* toDelete, int32_t toDeleteLen)
1494
0
{
1495
0
    int32_t delta = 0; /* number of chars deleted */
1496
0
    for (;;) {
1497
0
        UBool flag = FALSE;
1498
0
        if (variantsLen < toDeleteLen) {
1499
0
            return delta;
1500
0
        }
1501
0
        if (uprv_strncmp(variants, toDelete, toDeleteLen) == 0 &&
1502
0
            (variantsLen == toDeleteLen ||
1503
0
             (flag=(variants[toDeleteLen] == '_'))))
1504
0
        {
1505
0
            int32_t d = toDeleteLen + (flag?1:0);
1506
0
            variantsLen -= d;
1507
0
            delta += d;
1508
0
            if (variantsLen > 0) {
1509
0
                uprv_memmove(variants, variants+d, variantsLen);
1510
0
            }
1511
0
        } else {
1512
0
            char* p = _strnchr(variants, variantsLen, '_');
1513
0
            if (p == NULL) {
1514
0
                return delta;
1515
0
            }
1516
0
            ++p;
1517
0
            variantsLen -= (int32_t)(p - variants);
1518
0
            variants = p;
1519
0
        }
1520
0
    }
1521
0
}
1522
1523
/* Keyword enumeration */
1524
1525
typedef struct UKeywordsContext {
1526
    char* keywords;
1527
    char* current;
1528
} UKeywordsContext;
1529
1530
U_CDECL_BEGIN
1531
1532
static void U_CALLCONV
1533
0
uloc_kw_closeKeywords(UEnumeration *enumerator) {
1534
0
    uprv_free(((UKeywordsContext *)enumerator->context)->keywords);
1535
0
    uprv_free(enumerator->context);
1536
0
    uprv_free(enumerator);
1537
0
}
1538
1539
static int32_t U_CALLCONV
1540
0
uloc_kw_countKeywords(UEnumeration *en, UErrorCode * /*status*/) {
1541
0
    char *kw = ((UKeywordsContext *)en->context)->keywords;
1542
0
    int32_t result = 0;
1543
0
    while(*kw) {
1544
0
        result++;
1545
0
        kw += uprv_strlen(kw)+1;
1546
0
    }
1547
0
    return result;
1548
0
}
1549
1550
static const char * U_CALLCONV
1551
uloc_kw_nextKeyword(UEnumeration* en,
1552
                    int32_t* resultLength,
1553
0
                    UErrorCode* /*status*/) {
1554
0
    const char* result = ((UKeywordsContext *)en->context)->current;
1555
0
    int32_t len = 0;
1556
0
    if(*result) {
1557
0
        len = (int32_t)uprv_strlen(((UKeywordsContext *)en->context)->current);
1558
0
        ((UKeywordsContext *)en->context)->current += len+1;
1559
0
    } else {
1560
0
        result = NULL;
1561
0
    }
1562
0
    if (resultLength) {
1563
0
        *resultLength = len;
1564
0
    }
1565
0
    return result;
1566
0
}
1567
1568
static void U_CALLCONV
1569
uloc_kw_resetKeywords(UEnumeration* en,
1570
0
                      UErrorCode* /*status*/) {
1571
0
    ((UKeywordsContext *)en->context)->current = ((UKeywordsContext *)en->context)->keywords;
1572
0
}
1573
1574
U_CDECL_END
1575
1576
1577
static const UEnumeration gKeywordsEnum = {
1578
    NULL,
1579
    NULL,
1580
    uloc_kw_closeKeywords,
1581
    uloc_kw_countKeywords,
1582
    uenum_unextDefault,
1583
    uloc_kw_nextKeyword,
1584
    uloc_kw_resetKeywords
1585
};
1586
1587
U_CAPI UEnumeration* U_EXPORT2
1588
uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status)
1589
0
{
1590
0
    UKeywordsContext *myContext = NULL;
1591
0
    UEnumeration *result = NULL;
1592
0
1593
0
    if(U_FAILURE(*status)) {
1594
0
        return NULL;
1595
0
    }
1596
0
    result = (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
1597
0
    /* Null pointer test */
1598
0
    if (result == NULL) {
1599
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1600
0
        return NULL;
1601
0
    }
1602
0
    uprv_memcpy(result, &gKeywordsEnum, sizeof(UEnumeration));
1603
0
    myContext = static_cast<UKeywordsContext *>(uprv_malloc(sizeof(UKeywordsContext)));
1604
0
    if (myContext == NULL) {
1605
0
        *status = U_MEMORY_ALLOCATION_ERROR;
1606
0
        uprv_free(result);
1607
0
        return NULL;
1608
0
    }
1609
0
    myContext->keywords = (char *)uprv_malloc(keywordListSize+1);
1610
0
    uprv_memcpy(myContext->keywords, keywordList, keywordListSize);
1611
0
    myContext->keywords[keywordListSize] = 0;
1612
0
    myContext->current = myContext->keywords;
1613
0
    result->context = myContext;
1614
0
    return result;
1615
0
}
1616
1617
U_CAPI UEnumeration* U_EXPORT2
1618
uloc_openKeywords(const char* localeID,
1619
                        UErrorCode* status)
1620
6
{
1621
6
    int32_t i=0;
1622
6
    char keywords[256];
1623
6
    int32_t keywordsCapacity = 256;
1624
6
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1625
6
    const char* tmpLocaleID;
1626
6
1627
6
    if(status==NULL || U_FAILURE(*status)) {
1628
0
        return 0;
1629
0
    }
1630
6
1631
6
    if (_hasBCP47Extension(localeID)) {
1632
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), status);
1633
6
    } else {
1634
6
        if (localeID==NULL) {
1635
0
           localeID=uloc_getDefault();
1636
0
        }
1637
6
        tmpLocaleID=localeID;
1638
6
    }
1639
6
1640
6
    /* Skip the language */
1641
6
    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
1642
6
    if(_isIDSeparator(*tmpLocaleID)) {
1643
6
        const char *scriptID;
1644
6
        /* Skip the script if available */
1645
6
        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
1646
6
        if(scriptID != tmpLocaleID+1) {
1647
0
            /* Found optional script */
1648
0
            tmpLocaleID = scriptID;
1649
0
        }
1650
6
        /* Skip the Country */
1651
6
        if (_isIDSeparator(*tmpLocaleID)) {
1652
6
            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &tmpLocaleID);
1653
6
            if(_isIDSeparator(*tmpLocaleID)) {
1654
0
                _getVariant(tmpLocaleID+1, *tmpLocaleID, NULL, 0);
1655
0
            }
1656
6
        }
1657
6
    }
1658
6
1659
6
    /* keywords are located after '@' */
1660
6
    if((tmpLocaleID = locale_getKeywordsStart(tmpLocaleID)) != NULL) {
1661
0
        i=locale_getKeywords(tmpLocaleID+1, '@', keywords, keywordsCapacity, NULL, 0, NULL, FALSE, status);
1662
0
    }
1663
6
1664
6
    if(i) {
1665
0
        return uloc_openKeywordList(keywords, i, status);
1666
6
    } else {
1667
6
        return NULL;
1668
6
    }
1669
6
}
1670
1671
1672
/* bit-flags for 'options' parameter of _canonicalize */
1673
0
#define _ULOC_STRIP_KEYWORDS 0x2
1674
12
#define _ULOC_CANONICALIZE   0x1
1675
1676
72
#define OPTION_SET(options, mask) ((options & mask) != 0)
1677
1678
static const char i_default[] = {'i', '-', 'd', 'e', 'f', 'a', 'u', 'l', 't'};
1679
12
#define I_DEFAULT_LENGTH UPRV_LENGTHOF(i_default)
1680
1681
/**
1682
 * Canonicalize the given localeID, to level 1 or to level 2,
1683
 * depending on the options.  To specify level 1, pass in options=0.
1684
 * To specify level 2, pass in options=_ULOC_CANONICALIZE.
1685
 *
1686
 * This is the code underlying uloc_getName and uloc_canonicalize.
1687
 */
1688
static int32_t
1689
_canonicalize(const char* localeID,
1690
              char* result,
1691
              int32_t resultCapacity,
1692
              uint32_t options,
1693
12
              UErrorCode* err) {
1694
12
    int32_t j, len, fieldCount=0, scriptSize=0, variantSize=0, nameCapacity;
1695
12
    char localeBuffer[ULOC_FULLNAME_CAPACITY];
1696
12
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
1697
12
    const char* origLocaleID;
1698
12
    const char* tmpLocaleID;
1699
12
    const char* keywordAssign = NULL;
1700
12
    const char* separatorIndicator = NULL;
1701
12
    const char* addKeyword = NULL;
1702
12
    const char* addValue = NULL;
1703
12
    char* name;
1704
12
    char* variant = NULL; /* pointer into name, or NULL */
1705
12
1706
12
    if (U_FAILURE(*err)) {
1707
0
        return 0;
1708
0
    }
1709
12
1710
12
    if (_hasBCP47Extension(localeID)) {
1711
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
1712
12
    } else {
1713
12
        if (localeID==NULL) {
1714
0
           localeID=uloc_getDefault();
1715
0
        }
1716
12
        tmpLocaleID=localeID;
1717
12
    }
1718
12
1719
12
    origLocaleID=tmpLocaleID;
1720
12
1721
12
    /* if we are doing a full canonicalization, then put results in
1722
12
       localeBuffer, if necessary; otherwise send them to result. */
1723
12
    if (/*OPTION_SET(options, _ULOC_CANONICALIZE) &&*/
1724
12
        (result == NULL || resultCapacity < (int32_t)sizeof(localeBuffer))) {
1725
0
        name = localeBuffer;
1726
0
        nameCapacity = (int32_t)sizeof(localeBuffer);
1727
12
    } else {
1728
12
        name = result;
1729
12
        nameCapacity = resultCapacity;
1730
12
    }
1731
12
1732
12
    /* get all pieces, one after another, and separate with '_' */
1733
12
    len=ulocimp_getLanguage(tmpLocaleID, name, nameCapacity, &tmpLocaleID);
1734
12
1735
12
    if(len == I_DEFAULT_LENGTH && uprv_strncmp(origLocaleID, i_default, len) == 0) {
1736
0
        const char *d = uloc_getDefault();
1737
0
1738
0
        len = (int32_t)uprv_strlen(d);
1739
0
1740
0
        if (name != NULL) {
1741
0
            uprv_strncpy(name, d, len);
1742
0
        }
1743
12
    } else if(_isIDSeparator(*tmpLocaleID)) {
1744
12
        const char *scriptID;
1745
12
1746
12
        ++fieldCount;
1747
12
        if(len<nameCapacity) {
1748
12
            name[len]='_';
1749
12
        }
1750
12
        ++len;
1751
12
1752
12
        scriptSize=ulocimp_getScript(tmpLocaleID+1,
1753
12
            (len<nameCapacity ? name+len : NULL), nameCapacity-len, &scriptID);
1754
12
        if(scriptSize > 0) {
1755
3
            /* Found optional script */
1756
3
            tmpLocaleID = scriptID;
1757
3
            ++fieldCount;
1758
3
            len+=scriptSize;
1759
3
            if (_isIDSeparator(*tmpLocaleID)) {
1760
0
                /* If there is something else, then we add the _ */
1761
0
                if(len<nameCapacity) {
1762
0
                    name[len]='_';
1763
0
                }
1764
0
                ++len;
1765
0
            }
1766
3
        }
1767
12
1768
12
        if (_isIDSeparator(*tmpLocaleID)) {
1769
9
            const char *cntryID;
1770
9
            int32_t cntrySize = ulocimp_getCountry(tmpLocaleID+1,
1771
9
                (len<nameCapacity ? name+len : NULL), nameCapacity-len, &cntryID);
1772
9
            if (cntrySize > 0) {
1773
9
                /* Found optional country */
1774
9
                tmpLocaleID = cntryID;
1775
9
                len+=cntrySize;
1776
9
            }
1777
9
            if(_isIDSeparator(*tmpLocaleID)) {
1778
0
                /* If there is something else, then we add the _  if we found country before. */
1779
0
                if (cntrySize >= 0 && ! _isIDSeparator(*(tmpLocaleID+1)) ) {
1780
0
                    ++fieldCount;
1781
0
                    if(len<nameCapacity) {
1782
0
                        name[len]='_';
1783
0
                    }
1784
0
                    ++len;
1785
0
                }
1786
0
1787
0
                variantSize = _getVariant(tmpLocaleID+1, *tmpLocaleID,
1788
0
                    (len<nameCapacity ? name+len : NULL), nameCapacity-len);
1789
0
                if (variantSize > 0) {
1790
0
                    variant = len<nameCapacity ? name+len : NULL;
1791
0
                    len += variantSize;
1792
0
                    tmpLocaleID += variantSize + 1; /* skip '_' and variant */
1793
0
                }
1794
0
            }
1795
9
        }
1796
12
    }
1797
12
1798
12
    /* Copy POSIX-style charset specifier, if any [mr.utf8] */
1799
12
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) && *tmpLocaleID == '.') {
1800
0
        UBool done = FALSE;
1801
0
        do {
1802
0
            char c = *tmpLocaleID;
1803
0
            switch (c) {
1804
0
            case 0:
1805
0
            case '@':
1806
0
                done = TRUE;
1807
0
                break;
1808
0
            default:
1809
0
                if (len<nameCapacity) {
1810
0
                    name[len] = c;
1811
0
                }
1812
0
                ++len;
1813
0
                ++tmpLocaleID;
1814
0
                break;
1815
0
            }
1816
0
        } while (!done);
1817
0
    }
1818
12
1819
12
    /* Scan ahead to next '@' and determine if it is followed by '=' and/or ';'
1820
12
       After this, tmpLocaleID either points to '@' or is NULL */
1821
12
    if ((tmpLocaleID=locale_getKeywordsStart(tmpLocaleID))!=NULL) {
1822
0
        keywordAssign = uprv_strchr(tmpLocaleID, '=');
1823
0
        separatorIndicator = uprv_strchr(tmpLocaleID, ';');
1824
0
    }
1825
12
1826
12
    /* Copy POSIX-style variant, if any [mr@FOO] */
1827
12
    if (!OPTION_SET(options, _ULOC_CANONICALIZE) &&
1828
12
        tmpLocaleID != NULL && keywordAssign == NULL) {
1829
0
        for (;;) {
1830
0
            char c = *tmpLocaleID;
1831
0
            if (c == 0) {
1832
0
                break;
1833
0
            }
1834
0
            if (len<nameCapacity) {
1835
0
                name[len] = c;
1836
0
            }
1837
0
            ++len;
1838
0
            ++tmpLocaleID;
1839
0
        }
1840
0
    }
1841
12
1842
12
    if (OPTION_SET(options, _ULOC_CANONICALIZE)) {
1843
12
        /* Handle @FOO variant if @ is present and not followed by = */
1844
12
        if (tmpLocaleID!=NULL && keywordAssign==NULL) {
1845
0
            int32_t posixVariantSize;
1846
0
            /* Add missing '_' if needed */
1847
0
            if (fieldCount < 2 || (fieldCount < 3 && scriptSize > 0)) {
1848
0
                do {
1849
0
                    if(len<nameCapacity) {
1850
0
                        name[len]='_';
1851
0
                    }
1852
0
                    ++len;
1853
0
                    ++fieldCount;
1854
0
                } while(fieldCount<2);
1855
0
            }
1856
0
            posixVariantSize = _getVariantEx(tmpLocaleID+1, '@', name+len, nameCapacity-len,
1857
0
                                             (UBool)(variantSize > 0));
1858
0
            if (posixVariantSize > 0) {
1859
0
                if (variant == NULL) {
1860
0
                    variant = name+len;
1861
0
                }
1862
0
                len += posixVariantSize;
1863
0
                variantSize += posixVariantSize;
1864
0
            }
1865
0
        }
1866
12
1867
12
        /* Handle generic variants first */
1868
12
        if (variant) {
1869
0
            for (j=0; j<UPRV_LENGTHOF(VARIANT_MAP); j++) {
1870
0
                const char* variantToCompare = VARIANT_MAP[j].variant;
1871
0
                int32_t n = (int32_t)uprv_strlen(variantToCompare);
1872
0
                int32_t variantLen = _deleteVariant(variant, uprv_min(variantSize, (nameCapacity-len)), variantToCompare, n);
1873
0
                len -= variantLen;
1874
0
                if (variantLen > 0) {
1875
0
                    if (len > 0 && name[len-1] == '_') { /* delete trailing '_' */
1876
0
                        --len;
1877
0
                    }
1878
0
                    addKeyword = VARIANT_MAP[j].keyword;
1879
0
                    addValue = VARIANT_MAP[j].value;
1880
0
                    break;
1881
0
                }
1882
0
            }
1883
0
            if (len > 0 && len <= nameCapacity && name[len-1] == '_') { /* delete trailing '_' */
1884
0
                --len;
1885
0
            }
1886
0
        }
1887
12
1888
12
        /* Look up the ID in the canonicalization map */
1889
564
        for (j=0; j<UPRV_LENGTHOF(CANONICALIZE_MAP); j++) {
1890
552
            const char* id = CANONICALIZE_MAP[j].id;
1891
552
            int32_t n = (int32_t)uprv_strlen(id);
1892
552
            if (len == n && uprv_strncmp(name, id, n) == 0) {
1893
0
                if (n == 0 && tmpLocaleID != NULL) {
1894
0
                    break; /* Don't remap "" if keywords present */
1895
0
                }
1896
0
                len = _copyCount(name, nameCapacity, CANONICALIZE_MAP[j].canonicalID);
1897
0
                if (CANONICALIZE_MAP[j].keyword) {
1898
0
                    addKeyword = CANONICALIZE_MAP[j].keyword;
1899
0
                    addValue = CANONICALIZE_MAP[j].value;
1900
0
                }
1901
0
                break;
1902
0
            }
1903
552
        }
1904
12
    }
1905
12
1906
12
    if (!OPTION_SET(options, _ULOC_STRIP_KEYWORDS)) {
1907
12
        if (tmpLocaleID!=NULL && keywordAssign!=NULL &&
1908
12
            (!separatorIndicator || separatorIndicator > keywordAssign)) {
1909
0
            if(len<nameCapacity) {
1910
0
                name[len]='@';
1911
0
            }
1912
0
            ++len;
1913
0
            ++fieldCount;
1914
0
            len += _getKeywords(tmpLocaleID+1, '@', (len<nameCapacity ? name+len : NULL), nameCapacity-len,
1915
0
                                NULL, 0, NULL, TRUE, addKeyword, addValue, err);
1916
12
        } else if (addKeyword != NULL) {
1917
0
            U_ASSERT(addValue != NULL && len < nameCapacity);
1918
0
            /* inelegant but works -- later make _getKeywords do this? */
1919
0
            len += _copyCount(name+len, nameCapacity-len, "@");
1920
0
            len += _copyCount(name+len, nameCapacity-len, addKeyword);
1921
0
            len += _copyCount(name+len, nameCapacity-len, "=");
1922
0
            len += _copyCount(name+len, nameCapacity-len, addValue);
1923
0
        }
1924
12
    }
1925
12
1926
12
    if (U_SUCCESS(*err) && result != NULL && name == localeBuffer) {
1927
0
        uprv_strncpy(result, localeBuffer, (len > resultCapacity) ? resultCapacity : len);
1928
0
    }
1929
12
1930
12
    return u_terminateChars(result, resultCapacity, len, err);
1931
12
}
1932
1933
/* ### ID parsing API **************************************************/
1934
1935
U_CAPI int32_t  U_EXPORT2
1936
uloc_getParent(const char*    localeID,
1937
               char* parent,
1938
               int32_t parentCapacity,
1939
               UErrorCode* err)
1940
0
{
1941
0
    const char *lastUnderscore;
1942
0
    int32_t i;
1943
0
1944
0
    if (U_FAILURE(*err))
1945
0
        return 0;
1946
0
1947
0
    if (localeID == NULL)
1948
0
        localeID = uloc_getDefault();
1949
0
1950
0
    lastUnderscore=uprv_strrchr(localeID, '_');
1951
0
    if(lastUnderscore!=NULL) {
1952
0
        i=(int32_t)(lastUnderscore-localeID);
1953
0
    } else {
1954
0
        i=0;
1955
0
    }
1956
0
1957
0
    if(i>0 && parent != localeID) {
1958
0
        uprv_memcpy(parent, localeID, uprv_min(i, parentCapacity));
1959
0
    }
1960
0
    return u_terminateChars(parent, parentCapacity, i, err);
1961
0
}
1962
1963
U_CAPI int32_t U_EXPORT2
1964
uloc_getLanguage(const char*    localeID,
1965
         char* language,
1966
         int32_t languageCapacity,
1967
         UErrorCode* err)
1968
12
{
1969
12
    /* uloc_getLanguage will return a 2 character iso-639 code if one exists. *CWB*/
1970
12
    int32_t i=0;
1971
12
1972
12
    if (err==NULL || U_FAILURE(*err)) {
1973
0
        return 0;
1974
0
    }
1975
12
1976
12
    if(localeID==NULL) {
1977
0
        localeID=uloc_getDefault();
1978
0
    }
1979
12
1980
12
    i=ulocimp_getLanguage(localeID, language, languageCapacity, NULL);
1981
12
    return u_terminateChars(language, languageCapacity, i, err);
1982
12
}
1983
1984
U_CAPI int32_t U_EXPORT2
1985
uloc_getScript(const char*    localeID,
1986
         char* script,
1987
         int32_t scriptCapacity,
1988
         UErrorCode* err)
1989
9
{
1990
9
    int32_t i=0;
1991
9
1992
9
    if(err==NULL || U_FAILURE(*err)) {
1993
0
        return 0;
1994
0
    }
1995
9
1996
9
    if(localeID==NULL) {
1997
0
        localeID=uloc_getDefault();
1998
0
    }
1999
9
2000
9
    /* skip the language */
2001
9
    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
2002
9
    if(_isIDSeparator(*localeID)) {
2003
9
        i=ulocimp_getScript(localeID+1, script, scriptCapacity, NULL);
2004
9
    }
2005
9
    return u_terminateChars(script, scriptCapacity, i, err);
2006
9
}
2007
2008
U_CAPI int32_t  U_EXPORT2
2009
uloc_getCountry(const char* localeID,
2010
            char* country,
2011
            int32_t countryCapacity,
2012
            UErrorCode* err)
2013
9
{
2014
9
    int32_t i=0;
2015
9
2016
9
    if(err==NULL || U_FAILURE(*err)) {
2017
0
        return 0;
2018
0
    }
2019
9
2020
9
    if(localeID==NULL) {
2021
0
        localeID=uloc_getDefault();
2022
0
    }
2023
9
2024
9
    /* Skip the language */
2025
9
    ulocimp_getLanguage(localeID, NULL, 0, &localeID);
2026
9
    if(_isIDSeparator(*localeID)) {
2027
9
        const char *scriptID;
2028
9
        /* Skip the script if available */
2029
9
        ulocimp_getScript(localeID+1, NULL, 0, &scriptID);
2030
9
        if(scriptID != localeID+1) {
2031
3
            /* Found optional script */
2032
3
            localeID = scriptID;
2033
3
        }
2034
9
        if(_isIDSeparator(*localeID)) {
2035
9
            i=ulocimp_getCountry(localeID+1, country, countryCapacity, NULL);
2036
9
        }
2037
9
    }
2038
9
    return u_terminateChars(country, countryCapacity, i, err);
2039
9
}
2040
2041
U_CAPI int32_t  U_EXPORT2
2042
uloc_getVariant(const char* localeID,
2043
                char* variant,
2044
                int32_t variantCapacity,
2045
                UErrorCode* err)
2046
12
{
2047
12
    char tempBuffer[ULOC_FULLNAME_CAPACITY];
2048
12
    const char* tmpLocaleID;
2049
12
    int32_t i=0;
2050
12
2051
12
    if(err==NULL || U_FAILURE(*err)) {
2052
0
        return 0;
2053
0
    }
2054
12
2055
12
    if (_hasBCP47Extension(localeID)) {
2056
0
        _ConvertBCP47(tmpLocaleID, localeID, tempBuffer, sizeof(tempBuffer), err);
2057
12
    } else {
2058
12
        if (localeID==NULL) {
2059
0
           localeID=uloc_getDefault();
2060
0
        }
2061
12
        tmpLocaleID=localeID;
2062
12
    }
2063
12
2064
12
    /* Skip the language */
2065
12
    ulocimp_getLanguage(tmpLocaleID, NULL, 0, &tmpLocaleID);
2066
12
    if(_isIDSeparator(*tmpLocaleID)) {
2067
12
        const char *scriptID;
2068
12
        /* Skip the script if available */
2069
12
        ulocimp_getScript(tmpLocaleID+1, NULL, 0, &scriptID);
2070
12
        if(scriptID != tmpLocaleID+1) {
2071
0
            /* Found optional script */
2072
0
            tmpLocaleID = scriptID;
2073
0
        }
2074
12
        /* Skip the Country */
2075
12
        if (_isIDSeparator(*tmpLocaleID)) {
2076
12
            const char *cntryID;
2077
12
            ulocimp_getCountry(tmpLocaleID+1, NULL, 0, &cntryID);
2078
12
            if (cntryID != tmpLocaleID+1) {
2079
12
                /* Found optional country */
2080
12
                tmpLocaleID = cntryID;
2081
12
            }
2082
12
            if(_isIDSeparator(*tmpLocaleID)) {
2083
0
                /* If there was no country ID, skip a possible extra IDSeparator */
2084
0
                if (tmpLocaleID != cntryID && _isIDSeparator(tmpLocaleID[1])) {
2085
0
                    tmpLocaleID++;
2086
0
                }
2087
0
                i=_getVariant(tmpLocaleID+1, *tmpLocaleID, variant, variantCapacity);
2088
0
            }
2089
12
        }
2090
12
    }
2091
12
2092
12
    /* removed by weiv. We don't want to handle POSIX variants anymore. Use canonicalization function */
2093
12
    /* if we do not have a variant tag yet then try a POSIX variant after '@' */
2094
12
/*
2095
12
    if(!haveVariant && (localeID=uprv_strrchr(localeID, '@'))!=NULL) {
2096
12
        i=_getVariant(localeID+1, '@', variant, variantCapacity);
2097
12
    }
2098
12
*/
2099
12
    return u_terminateChars(variant, variantCapacity, i, err);
2100
12
}
2101
2102
U_CAPI int32_t  U_EXPORT2
2103
uloc_getName(const char* localeID,
2104
             char* name,
2105
             int32_t nameCapacity,
2106
             UErrorCode* err)
2107
0
{
2108
0
    return _canonicalize(localeID, name, nameCapacity, 0, err);
2109
0
}
2110
2111
U_CAPI int32_t  U_EXPORT2
2112
uloc_getBaseName(const char* localeID,
2113
                 char* name,
2114
                 int32_t nameCapacity,
2115
                 UErrorCode* err)
2116
0
{
2117
0
    return _canonicalize(localeID, name, nameCapacity, _ULOC_STRIP_KEYWORDS, err);
2118
0
}
2119
2120
U_CAPI int32_t  U_EXPORT2
2121
uloc_canonicalize(const char* localeID,
2122
                  char* name,
2123
                  int32_t nameCapacity,
2124
                  UErrorCode* err)
2125
12
{
2126
12
    return _canonicalize(localeID, name, nameCapacity, _ULOC_CANONICALIZE, err);
2127
12
}
2128
2129
U_CAPI const char*  U_EXPORT2
2130
uloc_getISO3Language(const char* localeID)
2131
0
{
2132
0
    int16_t offset;
2133
0
    char lang[ULOC_LANG_CAPACITY];
2134
0
    UErrorCode err = U_ZERO_ERROR;
2135
0
2136
0
    if (localeID == NULL)
2137
0
    {
2138
0
        localeID = uloc_getDefault();
2139
0
    }
2140
0
    uloc_getLanguage(localeID, lang, ULOC_LANG_CAPACITY, &err);
2141
0
    if (U_FAILURE(err))
2142
0
        return "";
2143
0
    offset = _findIndex(LANGUAGES, lang);
2144
0
    if (offset < 0)
2145
0
        return "";
2146
0
    return LANGUAGES_3[offset];
2147
0
}
2148
2149
U_CAPI const char*  U_EXPORT2
2150
uloc_getISO3Country(const char* localeID)
2151
0
{
2152
0
    int16_t offset;
2153
0
    char cntry[ULOC_LANG_CAPACITY];
2154
0
    UErrorCode err = U_ZERO_ERROR;
2155
0
2156
0
    if (localeID == NULL)
2157
0
    {
2158
0
        localeID = uloc_getDefault();
2159
0
    }
2160
0
    uloc_getCountry(localeID, cntry, ULOC_LANG_CAPACITY, &err);
2161
0
    if (U_FAILURE(err))
2162
0
        return "";
2163
0
    offset = _findIndex(COUNTRIES, cntry);
2164
0
    if (offset < 0)
2165
0
        return "";
2166
0
2167
0
    return COUNTRIES_3[offset];
2168
0
}
2169
2170
U_CAPI uint32_t  U_EXPORT2
2171
uloc_getLCID(const char* localeID)
2172
0
{
2173
0
    UErrorCode status = U_ZERO_ERROR;
2174
0
    char       langID[ULOC_FULLNAME_CAPACITY];
2175
0
    uint32_t   lcid = 0;
2176
0
2177
0
    /* Check for incomplete id. */
2178
0
    if (!localeID || uprv_strlen(localeID) < 2) {
2179
0
        return 0;
2180
0
    }
2181
0
2182
0
    // Attempt platform lookup if available
2183
0
    lcid = uprv_convertToLCIDPlatform(localeID);
2184
0
    if (lcid > 0)
2185
0
    {
2186
0
        // Windows found an LCID, return that
2187
0
        return lcid;
2188
0
    }
2189
0
2190
0
    uloc_getLanguage(localeID, langID, sizeof(langID), &status);
2191
0
    if (U_FAILURE(status)) {
2192
0
        return 0;
2193
0
    }
2194
0
2195
0
    if (uprv_strchr(localeID, '@')) {
2196
0
        // uprv_convertToLCID does not support keywords other than collation.
2197
0
        // Remove all keywords except collation.
2198
0
        int32_t len;
2199
0
        char collVal[ULOC_KEYWORDS_CAPACITY];
2200
0
        char tmpLocaleID[ULOC_FULLNAME_CAPACITY];
2201
0
2202
0
        len = uloc_getKeywordValue(localeID, "collation", collVal,
2203
0
            UPRV_LENGTHOF(collVal) - 1, &status);
2204
0
2205
0
        if (U_SUCCESS(status) && len > 0) {
2206
0
            collVal[len] = 0;
2207
0
2208
0
            len = uloc_getBaseName(localeID, tmpLocaleID,
2209
0
                UPRV_LENGTHOF(tmpLocaleID) - 1, &status);
2210
0
2211
0
            if (U_SUCCESS(status) && len > 0) {
2212
0
                tmpLocaleID[len] = 0;
2213
0
2214
0
                len = uloc_setKeywordValue("collation", collVal, tmpLocaleID,
2215
0
                    UPRV_LENGTHOF(tmpLocaleID) - len - 1, &status);
2216
0
2217
0
                if (U_SUCCESS(status) && len > 0) {
2218
0
                    tmpLocaleID[len] = 0;
2219
0
                    return uprv_convertToLCID(langID, tmpLocaleID, &status);
2220
0
                }
2221
0
            }
2222
0
        }
2223
0
2224
0
        // fall through - all keywords are simply ignored
2225
0
        status = U_ZERO_ERROR;
2226
0
    }
2227
0
2228
0
    return uprv_convertToLCID(langID, localeID, &status);
2229
0
}
2230
2231
U_CAPI int32_t U_EXPORT2
2232
uloc_getLocaleForLCID(uint32_t hostid, char *locale, int32_t localeCapacity,
2233
                UErrorCode *status)
2234
0
{
2235
0
    return uprv_convertToPosix(hostid, locale, localeCapacity, status);
2236
0
}
2237
2238
/* ### Default locale **************************************************/
2239
2240
U_CAPI const char*  U_EXPORT2
2241
uloc_getDefault()
2242
0
{
2243
0
    return locale_get_default();
2244
0
}
2245
2246
U_CAPI void  U_EXPORT2
2247
uloc_setDefault(const char*   newDefaultLocale,
2248
             UErrorCode* err)
2249
0
{
2250
0
    if (U_FAILURE(*err))
2251
0
        return;
2252
0
    /* the error code isn't currently used for anything by this function*/
2253
0
2254
0
    /* propagate change to C++ */
2255
0
    locale_set_default(newDefaultLocale);
2256
0
}
2257
2258
/**
2259
 * Returns a list of all 2-letter language codes defined in ISO 639.  This is a pointer
2260
 * to an array of pointers to arrays of char.  All of these pointers are owned
2261
 * by ICU-- do not delete them, and do not write through them.  The array is
2262
 * terminated with a null pointer.
2263
 */
2264
U_CAPI const char* const*  U_EXPORT2
2265
uloc_getISOLanguages()
2266
0
{
2267
0
    return LANGUAGES;
2268
0
}
2269
2270
/**
2271
 * Returns a list of all 2-letter country codes defined in ISO 639.  This is a
2272
 * pointer to an array of pointers to arrays of char.  All of these pointers are
2273
 * owned by ICU-- do not delete them, and do not write through them.  The array is
2274
 * terminated with a null pointer.
2275
 */
2276
U_CAPI const char* const*  U_EXPORT2
2277
uloc_getISOCountries()
2278
0
{
2279
0
    return COUNTRIES;
2280
0
}
2281
2282
2283
/* this function to be moved into cstring.c later */
2284
static char gDecimal = 0;
2285
2286
static /* U_CAPI */
2287
double
2288
/* U_EXPORT2 */
2289
0
_uloc_strtod(const char *start, char **end) {
2290
0
    char *decimal;
2291
0
    char *myEnd;
2292
0
    char buf[30];
2293
0
    double rv;
2294
0
    if (!gDecimal) {
2295
0
        char rep[5];
2296
0
        /* For machines that decide to change the decimal on you,
2297
0
        and try to be too smart with localization.
2298
0
        This normally should be just a '.'. */
2299
0
        sprintf(rep, "%+1.1f", 1.0);
2300
0
        gDecimal = rep[2];
2301
0
    }
2302
0
2303
0
    if(gDecimal == '.') {
2304
0
        return uprv_strtod(start, end); /* fall through to OS */
2305
0
    } else {
2306
0
        uprv_strncpy(buf, start, 29);
2307
0
        buf[29]=0;
2308
0
        decimal = uprv_strchr(buf, '.');
2309
0
        if(decimal) {
2310
0
            *decimal = gDecimal;
2311
0
        } else {
2312
0
            return uprv_strtod(start, end); /* no decimal point */
2313
0
        }
2314
0
        rv = uprv_strtod(buf, &myEnd);
2315
0
        if(end) {
2316
0
            *end = (char*)(start+(myEnd-buf)); /* cast away const (to follow uprv_strtod API.) */
2317
0
        }
2318
0
        return rv;
2319
0
    }
2320
0
}
2321
2322
typedef struct {
2323
    float q;
2324
    int32_t dummy;  /* to avoid uninitialized memory copy from qsort */
2325
    char locale[ULOC_FULLNAME_CAPACITY+1];
2326
} _acceptLangItem;
2327
2328
static int32_t U_CALLCONV
2329
uloc_acceptLanguageCompare(const void * /*context*/, const void *a, const void *b)
2330
0
{
2331
0
    const _acceptLangItem *aa = (const _acceptLangItem*)a;
2332
0
    const _acceptLangItem *bb = (const _acceptLangItem*)b;
2333
0
2334
0
    int32_t rc = 0;
2335
0
    if(bb->q < aa->q) {
2336
0
        rc = -1;  /* A > B */
2337
0
    } else if(bb->q > aa->q) {
2338
0
        rc = 1;   /* A < B */
2339
0
    } else {
2340
0
        rc = 0;   /* A = B */
2341
0
    }
2342
0
2343
0
    if(rc==0) {
2344
0
        rc = uprv_stricmp(aa->locale, bb->locale);
2345
0
    }
2346
0
2347
#if defined(ULOC_DEBUG)
2348
    /*  fprintf(stderr, "a:[%s:%g], b:[%s:%g] -> %d\n",
2349
    aa->locale, aa->q,
2350
    bb->locale, bb->q,
2351
    rc);*/
2352
#endif
2353
2354
0
    return rc;
2355
0
}
2356
2357
/*
2358
mt-mt, ja;q=0.76, en-us;q=0.95, en;q=0.92, en-gb;q=0.89, fr;q=0.87, iu-ca;q=0.84, iu;q=0.82, ja-jp;q=0.79, mt;q=0.97, de-de;q=0.74, de;q=0.71, es;q=0.68, it-it;q=0.66, it;q=0.63, vi-vn;q=0.61, vi;q=0.58, nl-nl;q=0.55, nl;q=0.53
2359
*/
2360
2361
U_CAPI int32_t U_EXPORT2
2362
uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable, UAcceptResult *outResult,
2363
                            const char *httpAcceptLanguage,
2364
                            UEnumeration* availableLocales,
2365
                            UErrorCode *status)
2366
0
{
2367
0
  MaybeStackArray<_acceptLangItem, 4> items; // Struct for collecting items.
2368
0
    char tmp[ULOC_FULLNAME_CAPACITY +1];
2369
0
    int32_t n = 0;
2370
0
    const char *itemEnd;
2371
0
    const char *paramEnd;
2372
0
    const char *s;
2373
0
    const char *t;
2374
0
    int32_t res;
2375
0
    int32_t i;
2376
0
    int32_t l = (int32_t)uprv_strlen(httpAcceptLanguage);
2377
0
2378
0
    if(U_FAILURE(*status)) {
2379
0
        return -1;
2380
0
    }
2381
0
2382
0
    for(s=httpAcceptLanguage;s&&*s;) {
2383
0
        while(isspace(*s)) /* eat space at the beginning */
2384
0
            s++;
2385
0
        itemEnd=uprv_strchr(s,',');
2386
0
        paramEnd=uprv_strchr(s,';');
2387
0
        if(!itemEnd) {
2388
0
            itemEnd = httpAcceptLanguage+l; /* end of string */
2389
0
        }
2390
0
        if(paramEnd && paramEnd<itemEnd) {
2391
0
            /* semicolon (;) is closer than end (,) */
2392
0
            t = paramEnd+1;
2393
0
            if(*t=='q') {
2394
0
                t++;
2395
0
            }
2396
0
            while(isspace(*t)) {
2397
0
                t++;
2398
0
            }
2399
0
            if(*t=='=') {
2400
0
                t++;
2401
0
            }
2402
0
            while(isspace(*t)) {
2403
0
                t++;
2404
0
            }
2405
0
            items[n].q = (float)_uloc_strtod(t,NULL);
2406
0
        } else {
2407
0
            /* no semicolon - it's 1.0 */
2408
0
            items[n].q = 1.0f;
2409
0
            paramEnd = itemEnd;
2410
0
        }
2411
0
        items[n].dummy=0;
2412
0
        /* eat spaces prior to semi */
2413
0
        for(t=(paramEnd-1);(paramEnd>s)&&isspace(*t);t--)
2414
0
            ;
2415
0
        int32_t slen = ((t+1)-s);
2416
0
        if(slen > ULOC_FULLNAME_CAPACITY) {
2417
0
          *status = U_BUFFER_OVERFLOW_ERROR;
2418
0
          return -1; // too big
2419
0
        }
2420
0
        uprv_strncpy(items[n].locale, s, slen);
2421
0
        items[n].locale[slen]=0; // terminate
2422
0
        int32_t clen = uloc_canonicalize(items[n].locale, tmp, UPRV_LENGTHOF(tmp)-1, status);
2423
0
        if(U_FAILURE(*status)) return -1;
2424
0
        if((clen!=slen) || (uprv_strncmp(items[n].locale, tmp, slen))) {
2425
0
            // canonicalization had an effect- copy back
2426
0
            uprv_strncpy(items[n].locale, tmp, clen);
2427
0
            items[n].locale[clen] = 0; // terminate
2428
0
        }
2429
#if defined(ULOC_DEBUG)
2430
        /*fprintf(stderr,"%d: s <%s> q <%g>\n", n, j[n].locale, j[n].q);*/
2431
#endif
2432
        n++;
2433
0
        s = itemEnd;
2434
0
        while(*s==',') { /* eat duplicate commas */
2435
0
            s++;
2436
0
        }
2437
0
        if(n>=items.getCapacity()) { // If we need more items
2438
0
          if(NULL == items.resize(items.getCapacity()*2, items.getCapacity())) {
2439
0
              *status = U_MEMORY_ALLOCATION_ERROR;
2440
0
              return -1;
2441
0
          }
2442
#if defined(ULOC_DEBUG)
2443
          fprintf(stderr,"malloced at size %d\n", items.getCapacity());
2444
#endif
2445
        }
2446
0
    }
2447
0
    uprv_sortArray(items.getAlias(), n, sizeof(items[0]), uloc_acceptLanguageCompare, NULL, TRUE, status);
2448
0
    if (U_FAILURE(*status)) {
2449
0
        return -1;
2450
0
    }
2451
0
    LocalMemory<const char*> strs(NULL);
2452
0
    if (strs.allocateInsteadAndReset(n) == NULL) {
2453
0
        *status = U_MEMORY_ALLOCATION_ERROR;
2454
0
        return -1;
2455
0
    }
2456
0
    for(i=0;i<n;i++) {
2457
#if defined(ULOC_DEBUG)
2458
        /*fprintf(stderr,"%d: s <%s> q <%g>\n", i, j[i].locale, j[i].q);*/
2459
#endif
2460
        strs[i]=items[i].locale;
2461
0
    }
2462
0
    res =  uloc_acceptLanguage(result, resultAvailable, outResult,
2463
0
                               strs.getAlias(), n, availableLocales, status);
2464
0
    return res;
2465
0
}
2466
2467
2468
U_CAPI int32_t U_EXPORT2
2469
uloc_acceptLanguage(char *result, int32_t resultAvailable,
2470
                    UAcceptResult *outResult, const char **acceptList,
2471
                    int32_t acceptListCount,
2472
                    UEnumeration* availableLocales,
2473
                    UErrorCode *status)
2474
0
{
2475
0
    int32_t i,j;
2476
0
    int32_t len;
2477
0
    int32_t maxLen=0;
2478
0
    char tmp[ULOC_FULLNAME_CAPACITY+1];
2479
0
    const char *l;
2480
0
    char **fallbackList;
2481
0
    if(U_FAILURE(*status)) {
2482
0
        return -1;
2483
0
    }
2484
0
    fallbackList = static_cast<char **>(uprv_malloc((size_t)(sizeof(fallbackList[0])*acceptListCount)));
2485
0
    if(fallbackList==NULL) {
2486
0
        *status = U_MEMORY_ALLOCATION_ERROR;
2487
0
        return -1;
2488
0
    }
2489
0
    for(i=0;i<acceptListCount;i++) {
2490
#if defined(ULOC_DEBUG)
2491
        fprintf(stderr,"%02d: %s\n", i, acceptList[i]);
2492
#endif
2493
0
        while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2494
#if defined(ULOC_DEBUG)
2495
            fprintf(stderr,"  %s\n", l);
2496
#endif
2497
0
            len = (int32_t)uprv_strlen(l);
2498
0
            if(!uprv_strcmp(acceptList[i], l)) {
2499
0
                if(outResult) {
2500
0
                    *outResult = ULOC_ACCEPT_VALID;
2501
0
                }
2502
#if defined(ULOC_DEBUG)
2503
                fprintf(stderr, "MATCH! %s\n", l);
2504
#endif
2505
0
                if(len>0) {
2506
0
                    uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2507
0
                }
2508
0
                for(j=0;j<i;j++) {
2509
0
                    uprv_free(fallbackList[j]);
2510
0
                }
2511
0
                uprv_free(fallbackList);
2512
0
                return u_terminateChars(result, resultAvailable, len, status);
2513
0
            }
2514
0
            if(len>maxLen) {
2515
0
                maxLen = len;
2516
0
            }
2517
0
        }
2518
0
        uenum_reset(availableLocales, status);
2519
0
        /* save off parent info */
2520
0
        if(uloc_getParent(acceptList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2521
0
            fallbackList[i] = uprv_strdup(tmp);
2522
0
        } else {
2523
0
            fallbackList[i]=0;
2524
0
        }
2525
0
    }
2526
0
2527
0
    for(maxLen--;maxLen>0;maxLen--) {
2528
0
        for(i=0;i<acceptListCount;i++) {
2529
0
            if(fallbackList[i] && ((int32_t)uprv_strlen(fallbackList[i])==maxLen)) {
2530
#if defined(ULOC_DEBUG)
2531
                fprintf(stderr,"Try: [%s]", fallbackList[i]);
2532
#endif
2533
0
                while((l=uenum_next(availableLocales, NULL, status)) != NULL) {
2534
#if defined(ULOC_DEBUG)
2535
                    fprintf(stderr,"  %s\n", l);
2536
#endif
2537
0
                    len = (int32_t)uprv_strlen(l);
2538
0
                    if(!uprv_strcmp(fallbackList[i], l)) {
2539
0
                        if(outResult) {
2540
0
                            *outResult = ULOC_ACCEPT_FALLBACK;
2541
0
                        }
2542
#if defined(ULOC_DEBUG)
2543
                        fprintf(stderr, "fallback MATCH! %s\n", l);
2544
#endif
2545
0
                        if(len>0) {
2546
0
                            uprv_strncpy(result, l, uprv_min(len, resultAvailable));
2547
0
                        }
2548
0
                        for(j=0;j<acceptListCount;j++) {
2549
0
                            uprv_free(fallbackList[j]);
2550
0
                        }
2551
0
                        uprv_free(fallbackList);
2552
0
                        return u_terminateChars(result, resultAvailable, len, status);
2553
0
                    }
2554
0
                }
2555
0
                uenum_reset(availableLocales, status);
2556
0
2557
0
                if(uloc_getParent(fallbackList[i], tmp, UPRV_LENGTHOF(tmp), status)!=0) {
2558
0
                    uprv_free(fallbackList[i]);
2559
0
                    fallbackList[i] = uprv_strdup(tmp);
2560
0
                } else {
2561
0
                    uprv_free(fallbackList[i]);
2562
0
                    fallbackList[i]=0;
2563
0
                }
2564
0
            }
2565
0
        }
2566
0
        if(outResult) {
2567
0
            *outResult = ULOC_ACCEPT_FAILED;
2568
0
        }
2569
0
    }
2570
0
    for(i=0;i<acceptListCount;i++) {
2571
0
        uprv_free(fallbackList[i]);
2572
0
    }
2573
0
    uprv_free(fallbackList);
2574
0
    return -1;
2575
0
}
2576
2577
U_CAPI const char* U_EXPORT2
2578
uloc_toUnicodeLocaleKey(const char* keyword)
2579
0
{
2580
0
    const char* bcpKey = ulocimp_toBcpKey(keyword);
2581
0
    if (bcpKey == NULL && ultag_isUnicodeLocaleKey(keyword, -1)) {
2582
0
        // unknown keyword, but syntax is fine..
2583
0
        return keyword;
2584
0
    }
2585
0
    return bcpKey;
2586
0
}
2587
2588
U_CAPI const char* U_EXPORT2
2589
uloc_toUnicodeLocaleType(const char* keyword, const char* value)
2590
0
{
2591
0
    const char* bcpType = ulocimp_toBcpType(keyword, value, NULL, NULL);
2592
0
    if (bcpType == NULL && ultag_isUnicodeLocaleType(value, -1)) {
2593
0
        // unknown keyword, but syntax is fine..
2594
0
        return value;
2595
0
    }
2596
0
    return bcpType;
2597
0
}
2598
2599
static UBool
2600
isWellFormedLegacyKey(const char* legacyKey)
2601
0
{
2602
0
    const char* p = legacyKey;
2603
0
    while (*p) {
2604
0
        if (!UPRV_ISALPHANUM(*p)) {
2605
0
            return FALSE;
2606
0
        }
2607
0
        p++;
2608
0
    }
2609
0
    return TRUE;
2610
0
}
2611
2612
static UBool
2613
isWellFormedLegacyType(const char* legacyType)
2614
0
{
2615
0
    const char* p = legacyType;
2616
0
    int32_t alphaNumLen = 0;
2617
0
    while (*p) {
2618
0
        if (*p == '_' || *p == '/' || *p == '-') {
2619
0
            if (alphaNumLen == 0) {
2620
0
                return FALSE;
2621
0
            }
2622
0
            alphaNumLen = 0;
2623
0
        } else if (UPRV_ISALPHANUM(*p)) {
2624
0
            alphaNumLen++;
2625
0
        } else {
2626
0
            return FALSE;
2627
0
        }
2628
0
        p++;
2629
0
    }
2630
0
    return (alphaNumLen != 0);
2631
0
}
2632
2633
U_CAPI const char* U_EXPORT2
2634
uloc_toLegacyKey(const char* keyword)
2635
0
{
2636
0
    const char* legacyKey = ulocimp_toLegacyKey(keyword);
2637
0
    if (legacyKey == NULL) {
2638
0
        // Checks if the specified locale key is well-formed with the legacy locale syntax.
2639
0
        //
2640
0
        // Note:
2641
0
        //  LDML/CLDR provides some definition of keyword syntax in
2642
0
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2643
0
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2644
0
        //  Keys can only consist of [0-9a-zA-Z].
2645
0
        if (isWellFormedLegacyKey(keyword)) {
2646
0
            return keyword;
2647
0
        }
2648
0
    }
2649
0
    return legacyKey;
2650
0
}
2651
2652
U_CAPI const char* U_EXPORT2
2653
uloc_toLegacyType(const char* keyword, const char* value)
2654
0
{
2655
0
    const char* legacyType = ulocimp_toLegacyType(keyword, value, NULL, NULL);
2656
0
    if (legacyType == NULL) {
2657
0
        // Checks if the specified locale type is well-formed with the legacy locale syntax.
2658
0
        //
2659
0
        // Note:
2660
0
        //  LDML/CLDR provides some definition of keyword syntax in
2661
0
        //  * http://www.unicode.org/reports/tr35/#Unicode_locale_identifier and
2662
0
        //  * http://www.unicode.org/reports/tr35/#Old_Locale_Extension_Syntax
2663
0
        //  Values (types) can only consist of [0-9a-zA-Z], plus for legacy values
2664
0
        //  we allow [/_-+] in the middle (e.g. "Etc/GMT+1", "Asia/Tel_Aviv")
2665
0
        if (isWellFormedLegacyType(value)) {
2666
0
            return value;
2667
0
        }
2668
0
    }
2669
0
    return legacyType;
2670
0
}
2671
2672
/*eof*/