Coverage Report

Created: 2025-07-11 06:23

/src/icu/source/common/locmap.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 **********************************************************************
5
 *   Copyright (C) 1996-2016, International Business Machines
6
 *   Corporation and others.  All Rights Reserved.
7
 **********************************************************************
8
 *
9
 * Provides functionality for mapping between
10
 * LCID and Posix IDs or ICU locale to codepage
11
 *
12
 * Note: All classes and code in this file are
13
 *       intended for internal use only.
14
 *
15
 * Methods of interest:
16
 *   unsigned long convertToLCID(const char*);
17
 *   const char* convertToPosix(unsigned long);
18
 *
19
 * Kathleen Wilson, 4/30/96
20
 *
21
 *  Date        Name        Description
22
 *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
23
 *                          setId() method and safety check against 
24
 *                          MAX_ID_LENGTH.
25
 * 04/23/99     stephen     Added C wrapper for convertToPosix.
26
 * 09/18/00     george      Removed the memory leaks.
27
 * 08/23/01     george      Convert to C
28
 */
29
30
#include "locmap.h"
31
#include "cstring.h"
32
#include "cmemory.h"
33
#include "unicode/uloc.h"
34
35
#if U_PLATFORM == U_PF_WINDOWS && defined(_MSC_VER) && (_MSC_VER >= 1500)
36
/*
37
 * TODO: It seems like we should widen this to
38
 * either U_PLATFORM_USES_ONLY_WIN32_API (includes MinGW)
39
 * or U_PLATFORM_HAS_WIN32_API (includes MinGW and Cygwin)
40
 * but those use gcc and won't have defined(_MSC_VER).
41
 * We might need to #include some Windows header and test for some version macro from there.
42
 * Or call some Windows function and see what it returns.
43
 */
44
#define USE_WINDOWS_LCID_MAPPING_API
45
#include <windows.h>
46
#include <winnls.h>
47
#endif
48
49
/*
50
 * Note:
51
 * The mapping from Win32 locale ID numbers to POSIX locale strings should
52
 * be the faster one.
53
 *
54
 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
55
 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
56
 */
57
58
/*
59
////////////////////////////////////////////////
60
//
61
// Internal Classes for LCID <--> POSIX Mapping
62
//
63
/////////////////////////////////////////////////
64
*/
65
66
typedef struct ILcidPosixElement
67
{
68
    const uint32_t hostID;
69
    const char * const posixID;
70
} ILcidPosixElement;
71
72
typedef struct ILcidPosixMap
73
{
74
    const uint32_t numRegions;
75
    const struct ILcidPosixElement* const regionMaps;
76
} ILcidPosixMap;
77
78
79
/*
80
/////////////////////////////////////////////////
81
//
82
// Easy macros to make the LCID <--> POSIX Mapping
83
//
84
/////////////////////////////////////////////////
85
*/
86
87
/**
88
 * The standard one language/one country mapping for LCID.
89
 * The first element must be the language, and the following
90
 * elements are the language with the country.
91
 * @param hostID LCID in host format such as 0x044d
92
 * @param languageID posix ID of just the language such as 'de'
93
 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
94
 */
95
#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
96
static const ILcidPosixElement locmap_ ## languageID [] = { \
97
    {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
98
    {hostID, #posixID}, \
99
};
100
101
/**
102
 * Define a subtable by ID
103
 * @param id the POSIX ID, either a language or language_TERRITORY
104
 */
105
#define ILCID_POSIX_SUBTABLE(id) \
106
static const ILcidPosixElement locmap_ ## id [] =
107
108
109
/**
110
 * Create the map for the posixID. This macro supposes that the language string
111
 * name is the same as the global variable name, and that the first element
112
 * in the ILcidPosixElement is just the language.
113
 * @param _posixID the full POSIX ID for this entry.
114
 */
115
#define ILCID_POSIX_MAP(_posixID) \
116
    {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
117
118
/*
119
////////////////////////////////////////////
120
//
121
// Create the table of LCID to POSIX Mapping
122
// None of it should be dynamically created.
123
//
124
// Keep static locale variables inside the function so that
125
// it can be created properly during static init.
126
//
127
// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier 
128
//       (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
129
//
130
//       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
131
//       maintained for support of older Windows version.
132
//       Update: Windows 7 (091130)
133
//
134
// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
135
//       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
136
//       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
137
//       to support other keywords in this mapping data, we must update the implementation.
138
////////////////////////////////////////////
139
*/
140
141
// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as 
142
// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
143
144
ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
145
146
ILCID_POSIX_SUBTABLE(ar) {
147
    {0x01,   "ar"},
148
    {0x3801, "ar_AE"},
149
    {0x3c01, "ar_BH"},
150
    {0x1401, "ar_DZ"},
151
    {0x0c01, "ar_EG"},
152
    {0x0801, "ar_IQ"},
153
    {0x2c01, "ar_JO"},
154
    {0x3401, "ar_KW"},
155
    {0x3001, "ar_LB"},
156
    {0x1001, "ar_LY"},
157
    {0x1801, "ar_MA"},
158
    {0x1801, "ar_MO"},
159
    {0x2001, "ar_OM"},
160
    {0x4001, "ar_QA"},
161
    {0x0401, "ar_SA"},
162
    {0x2801, "ar_SY"},
163
    {0x1c01, "ar_TN"},
164
    {0x2401, "ar_YE"}
165
};
166
167
ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
168
ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
169
ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
170
171
ILCID_POSIX_SUBTABLE(az) {
172
    {0x2c,   "az"},
173
    {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
174
    {0x742c, "az_Cyrl"},  /* Cyrillic based */
175
    {0x042c, "az_Latn_AZ"}, /* Latin based */
176
    {0x782c, "az_Latn"}, /* Latin based */
177
    {0x042c, "az_AZ"} /* Latin based */
178
};
179
180
ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
181
ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
182
183
/*ILCID_POSIX_SUBTABLE(ber) {
184
    {0x5f,   "ber"},
185
    {0x045f, "ber_Arab_DZ"},
186
    {0x045f, "ber_Arab"},
187
    {0x085f, "ber_Latn_DZ"},
188
    {0x085f, "ber_Latn"}
189
};*/
190
191
ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
192
193
ILCID_POSIX_ELEMENT_ARRAY(0x0466, bin, bin_NG)
194
195
ILCID_POSIX_SUBTABLE(bn) {
196
    {0x45,   "bn"},
197
    {0x0845, "bn_BD"},
198
    {0x0445, "bn_IN"}
199
};
200
201
ILCID_POSIX_SUBTABLE(bo) {
202
    {0x51,   "bo"},
203
    {0x0851, "bo_BT"},
204
    {0x0451, "bo_CN"},
205
    {0x0c51, "dz_BT"}
206
};
207
208
ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
209
210
ILCID_POSIX_SUBTABLE(ca) {
211
    {0x03,   "ca"},
212
    {0x0403, "ca_ES"},
213
    {0x0803, "ca_ES_VALENCIA"}
214
};
215
216
ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
217
ILCID_POSIX_ELEMENT_ARRAY(0x045c, chr,chr_US)
218
219
// ICU has chosen different names for these.
220
ILCID_POSIX_SUBTABLE(ckb) {
221
    {0x92,   "ckb"},
222
    {0x7c92, "ckb_Arab"},
223
    {0x0492, "ckb_Arab_IQ"}
224
};
225
226
/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
227
ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
228
229
ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
230
ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
231
232
// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
233
ILCID_POSIX_SUBTABLE(de) {
234
    {0x07,   "de"},
235
    {0x0c07, "de_AT"},
236
    {0x0807, "de_CH"},
237
    {0x0407, "de_DE"},
238
    {0x1407, "de_LI"},
239
    {0x1007, "de_LU"},
240
    {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
241
    {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
242
};
243
244
ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
245
ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
246
247
// Windows uses an empty string for 'invariant'
248
ILCID_POSIX_SUBTABLE(en) {
249
    {0x09,   "en"},
250
    {0x0c09, "en_AU"},
251
    {0x2809, "en_BZ"},
252
    {0x1009, "en_CA"},
253
    {0x0809, "en_GB"},
254
    {0x3c09, "en_HK"},
255
    {0x3809, "en_ID"},
256
    {0x1809, "en_IE"},
257
    {0x4009, "en_IN"},
258
    {0x2009, "en_JM"},
259
    {0x4409, "en_MY"},
260
    {0x1409, "en_NZ"},
261
    {0x3409, "en_PH"},
262
    {0x4809, "en_SG"},
263
    {0x2C09, "en_TT"},
264
    {0x0409, "en_US"},
265
    {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
266
    {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
267
    {0x1c09, "en_ZA"},
268
    {0x3009, "en_ZW"},
269
    {0x2409, "en_029"},
270
    {0x0409, "en_AS"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
271
    {0x0409, "en_GU"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
272
    {0x0409, "en_MH"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
273
    {0x0409, "en_MP"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
274
    {0x0409, "en_UM"}   /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
275
};
276
277
ILCID_POSIX_SUBTABLE(en_US_POSIX) {
278
    {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
279
};
280
281
// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
282
ILCID_POSIX_SUBTABLE(es) {
283
    {0x0a,   "es"},
284
    {0x2c0a, "es_AR"},
285
    {0x400a, "es_BO"},
286
    {0x340a, "es_CL"},
287
    {0x240a, "es_CO"},
288
    {0x140a, "es_CR"},
289
    {0x5c0a, "es_CU"},
290
    {0x1c0a, "es_DO"},
291
    {0x300a, "es_EC"},
292
    {0x0c0a, "es_ES"},      /*Modern sort.*/
293
    {0x100a, "es_GT"},
294
    {0x480a, "es_HN"},
295
    {0x080a, "es_MX"},
296
    {0x4c0a, "es_NI"},
297
    {0x180a, "es_PA"},
298
    {0x280a, "es_PE"},
299
    {0x500a, "es_PR"},
300
    {0x3c0a, "es_PY"},
301
    {0x440a, "es_SV"},
302
    {0x540a, "es_US"},
303
    {0x380a, "es_UY"},
304
    {0x200a, "es_VE"},
305
    {0x580a, "es_419"},
306
    {0x040a, "es_ES@collation=traditional"},
307
    {0x040a, "es@collation=traditional"}        // Windows will treat this as es-ES@collation=traditional
308
};
309
310
ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
311
ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
312
313
/* ISO-639 doesn't distinguish between Persian and Dari.*/
314
ILCID_POSIX_SUBTABLE(fa) {
315
    {0x29,   "fa"},
316
    {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
317
    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
318
};
319
320
321
/* duplicate for roundtripping */
322
ILCID_POSIX_SUBTABLE(fa_AF) {
323
    {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
324
    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
325
};
326
327
ILCID_POSIX_SUBTABLE(ff) {
328
    {0x67,   "ff"},
329
    {0x7c67, "ff_Latn"},
330
    {0x0867, "ff_Latn_SN"},
331
    {0x0467, "ff_NG"}
332
};
333
334
ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
335
ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
336
ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
337
338
ILCID_POSIX_SUBTABLE(fr) {
339
    {0x0c,   "fr"},
340
    {0x080c, "fr_BE"},
341
    {0x0c0c, "fr_CA"},
342
    {0x240c, "fr_CD"},
343
    {0x240c, "fr_CG"},
344
    {0x100c, "fr_CH"},
345
    {0x300c, "fr_CI"},
346
    {0x2c0c, "fr_CM"},
347
    {0x040c, "fr_FR"},
348
    {0x3c0c, "fr_HT"},
349
    {0x140c, "fr_LU"},
350
    {0x380c, "fr_MA"},
351
    {0x180c, "fr_MC"},
352
    {0x340c, "fr_ML"},
353
    {0x200c, "fr_RE"},
354
    {0x280c, "fr_SN"},
355
    {0xe40c, "fr_015"},
356
    {0x1c0c, "fr_029"}
357
};
358
359
ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
360
361
ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
362
363
ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
364
    {0x3c,   "ga"},
365
    {0x083c, "ga_IE"},
366
    {0x043c, "gd_GB"}
367
};
368
369
ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
370
    {0x91,   "gd"},
371
    {0x0491, "gd_GB"}
372
};
373
374
ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
375
ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
376
ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
377
ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
378
379
ILCID_POSIX_SUBTABLE(ha) {
380
    {0x68,   "ha"},
381
    {0x7c68, "ha_Latn"},
382
    {0x0468, "ha_Latn_NG"},
383
};
384
385
ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
386
ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
387
ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
388
389
/* This LCID is really four different locales.*/
390
ILCID_POSIX_SUBTABLE(hr) {
391
    {0x1a,   "hr"},
392
    {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
393
    {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
394
    {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
395
    {0x781a, "bs"},     /* Bosnian */
396
    {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
397
    {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
398
    {0x101a, "hr_BA"},  /* Croatian in Bosnia */
399
    {0x041a, "hr_HR"},  /* Croatian*/
400
    {0x2c1a, "sr_Latn_ME"},
401
    {0x241a, "sr_Latn_RS"},
402
    {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
403
    {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
404
    {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
405
    {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
406
    {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
407
    {0x301a, "sr_Cyrl_ME"},
408
    {0x281a, "sr_Cyrl_RS"},
409
    {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
410
    {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
411
};
412
413
ILCID_POSIX_SUBTABLE(hsb) {
414
    {0x2E,   "hsb"},
415
    {0x042E, "hsb_DE"},
416
    {0x082E, "dsb_DE"},
417
    {0x7C2E, "dsb"},
418
};
419
420
ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
421
ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
422
ILCID_POSIX_ELEMENT_ARRAY(0x0469, ibb, ibb_NG)
423
ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
424
ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
425
ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
426
ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
427
428
ILCID_POSIX_SUBTABLE(it) {
429
    {0x10,   "it"},
430
    {0x0810, "it_CH"},
431
    {0x0410, "it_IT"}
432
};
433
434
ILCID_POSIX_SUBTABLE(iu) {
435
    {0x5d,   "iu"},
436
    {0x045d, "iu_Cans_CA"},
437
    {0x785d, "iu_Cans"},
438
    {0x085d, "iu_Latn_CA"},
439
    {0x7c5d, "iu_Latn"}
440
};
441
442
ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
443
ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
444
ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
445
ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
446
ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
447
ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
448
ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
449
450
ILCID_POSIX_SUBTABLE(ko) {
451
    {0x12,   "ko"},
452
    {0x0812, "ko_KP"},
453
    {0x0412, "ko_KR"}
454
};
455
456
ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
457
ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
458
459
ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */
460
    {0x60,   "ks"},
461
    {0x0860, "ks_IN"},              /* Documentation doesn't mention script */
462
    {0x0460, "ks_Arab_IN"},
463
    {0x0860, "ks_Deva_IN"}
464
};
465
466
ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
467
ILCID_POSIX_ELEMENT_ARRAY(0x0476, la, la_IT)   /* TODO: Verify the country */
468
ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
469
ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
470
ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
471
ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
472
ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
473
ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
474
ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
475
476
ILCID_POSIX_SUBTABLE(mn) {
477
    {0x50,   "mn"},
478
    {0x0450, "mn_MN"},
479
    {0x7c50, "mn_Mong"},
480
    {0x0850, "mn_Mong_CN"},
481
    {0x0850, "mn_CN"},
482
    {0x7850, "mn_Cyrl"},
483
    {0x0c50, "mn_Mong_MN"}
484
};
485
486
ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
487
ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
488
ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
489
490
ILCID_POSIX_SUBTABLE(ms) {
491
    {0x3e,   "ms"},
492
    {0x083e, "ms_BN"},   /* Brunei Darussalam*/
493
    {0x043e, "ms_MY"}    /* Malaysia*/
494
};
495
496
ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
497
ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
498
499
ILCID_POSIX_SUBTABLE(ne) {
500
    {0x61,   "ne"},
501
    {0x0861, "ne_IN"},   /* India*/
502
    {0x0461, "ne_NP"}    /* Nepal*/
503
};
504
505
ILCID_POSIX_SUBTABLE(nl) {
506
    {0x13,   "nl"},
507
    {0x0813, "nl_BE"},
508
    {0x0413, "nl_NL"}
509
};
510
511
/* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
512
// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
513
ILCID_POSIX_SUBTABLE(no) {
514
    {0x14,   "no"},     /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */ 
515
    {0x7c14, "nb"},     /* really nb */
516
    {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
517
    {0x0414, "no_NO"},  /* really nb_NO */
518
    {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
519
    {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
520
    {0x0814, "no_NO_NY"}/* really nn_NO */
521
};
522
523
ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
524
ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
525
526
ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
527
    {0x72,   "om"},
528
    {0x0472, "om_ET"},
529
    {0x0472, "gaz_ET"}
530
};
531
532
/* Declared as or_IN to get around compiler errors*/
533
ILCID_POSIX_SUBTABLE(or_IN) {
534
    {0x48,   "or"},
535
    {0x0448, "or_IN"},
536
};
537
538
539
ILCID_POSIX_SUBTABLE(pa) {
540
    {0x46,   "pa"},
541
    {0x0446, "pa_IN"},
542
    {0x0846, "pa_PK"},
543
    {0x0846, "pa_Arab_PK"}
544
};
545
546
ILCID_POSIX_ELEMENT_ARRAY(0x0479, pap, pap_AN)
547
ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
548
ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
549
550
ILCID_POSIX_SUBTABLE(pt) {
551
    {0x16,   "pt"},
552
    {0x0416, "pt_BR"},
553
    {0x0816, "pt_PT"}
554
};
555
556
ILCID_POSIX_SUBTABLE(qu) {
557
    {0x6b,   "qu"},
558
    {0x046b, "qu_BO"},
559
    {0x086b, "qu_EC"},
560
    {0x0C6b, "qu_PE"},
561
    {0x046b, "quz_BO"},
562
    {0x086b, "quz_EC"},
563
    {0x0C6b, "quz_PE"}
564
};
565
566
ILCID_POSIX_SUBTABLE(quc) {
567
    {0x93,   "quc"},
568
    {0x0493, "quc_CO"},
569
    /*
570
        "quc_Latn_GT" is an exceptional case. Language ID of "quc"
571
        is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
572
        under the group of "qut". "qut" is a retired ISO 639-3 language
573
        code for West Central Quiche, and merged to "quc".
574
        It looks Windows previously reserved "qut" for K'iche', but,
575
        decided to use "quc" when adding a locale for K'iche' (Guatemala).
576
577
        This data structure used here assumes language ID bits in
578
        LCID is unique for alphabetic language code. But this is not true
579
        for "quc_Latn_GT". If we don't have the data below, LCID look up
580
        by alphabetic locale ID (POSIX) will fail. The same entry is found
581
        under "qut" below, which is required for reverse look up.
582
    */
583
    {0x0486, "quc_Latn_GT"}
584
};
585
586
ILCID_POSIX_SUBTABLE(qut) {
587
    {0x86,   "qut"},
588
    {0x0486, "qut_GT"},
589
    /*
590
        See the note in "quc" above.
591
    */
592
    {0x0486, "quc_Latn_GT"}
593
};
594
595
ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
596
597
ILCID_POSIX_SUBTABLE(ro) {
598
    {0x18,   "ro"},
599
    {0x0418, "ro_RO"},
600
    {0x0818, "ro_MD"}
601
};
602
603
// TODO: This is almost certainly 'wrong'.  0 in Windows is a synonym for LOCALE_USER_DEFAULT.
604
// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
605
// (Except that it's not invariant in ICU)
606
ILCID_POSIX_SUBTABLE(root) {
607
    {0x00,   "root"}
608
};
609
610
ILCID_POSIX_SUBTABLE(ru) {
611
    {0x19,   "ru"},
612
    {0x0419, "ru_RU"},
613
    {0x0819, "ru_MD"}
614
};
615
616
ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
617
ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
618
ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
619
620
ILCID_POSIX_SUBTABLE(sd) {
621
    {0x59,   "sd"},
622
    {0x0459, "sd_IN"},
623
    {0x0459, "sd_Deva_IN"},
624
    {0x0859, "sd_PK"}
625
};
626
627
ILCID_POSIX_SUBTABLE(se) {
628
    {0x3b,   "se"},
629
    {0x0c3b, "se_FI"},
630
    {0x043b, "se_NO"},
631
    {0x083b, "se_SE"},
632
    {0x783b, "sma"},
633
    {0x183b, "sma_NO"},
634
    {0x1c3b, "sma_SE"},
635
    {0x7c3b, "smj"},
636
    {0x703b, "smn"},
637
    {0x743b, "sms"},
638
    {0x103b, "smj_NO"},
639
    {0x143b, "smj_SE"},
640
    {0x243b, "smn_FI"},
641
    {0x203b, "sms_FI"},
642
};
643
644
ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
645
ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
646
ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
647
648
ILCID_POSIX_SUBTABLE(so) { /* TODO: Verify the country */
649
    {0x77,   "so"},
650
    {0x0477, "so_ET"},
651
    {0x0477, "so_SO"}
652
};
653
654
ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
655
ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
656
657
ILCID_POSIX_SUBTABLE(sv) {
658
    {0x1d,   "sv"},
659
    {0x081d, "sv_FI"},
660
    {0x041d, "sv_SE"}
661
};
662
663
ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
664
ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
665
666
ILCID_POSIX_SUBTABLE(ta) {
667
    {0x49,   "ta"},
668
    {0x0449, "ta_IN"},
669
    {0x0849, "ta_LK"}
670
};
671
672
ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
673
674
/* Cyrillic based by default */
675
ILCID_POSIX_SUBTABLE(tg) {
676
    {0x28,   "tg"},
677
    {0x7c28, "tg_Cyrl"},
678
    {0x0428, "tg_Cyrl_TJ"}
679
};
680
681
ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
682
683
ILCID_POSIX_SUBTABLE(ti) {
684
    {0x73,   "ti"},
685
    {0x0873, "ti_ER"},
686
    {0x0473, "ti_ET"}
687
};
688
689
ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
690
691
ILCID_POSIX_SUBTABLE(tn) {
692
    {0x32,   "tn"},
693
    {0x0832, "tn_BW"},
694
    {0x0432, "tn_ZA"}
695
};
696
697
ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
698
ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
699
ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
700
701
ILCID_POSIX_SUBTABLE(tzm) {
702
    {0x5f,   "tzm"},
703
    {0x7c5f, "tzm_Latn"},
704
    {0x085f, "tzm_Latn_DZ"},
705
    {0x105f, "tzm_Tfng_MA"},
706
    {0x045f, "tzm_Arab_MA"},
707
    {0x045f, "tmz"}
708
};
709
710
ILCID_POSIX_SUBTABLE(ug) {
711
    {0x80,   "ug"},
712
    {0x0480, "ug_CN"},
713
    {0x0480, "ug_Arab_CN"}
714
};
715
716
ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
717
718
ILCID_POSIX_SUBTABLE(ur) {
719
    {0x20,   "ur"},
720
    {0x0820, "ur_IN"},
721
    {0x0420, "ur_PK"}
722
};
723
724
ILCID_POSIX_SUBTABLE(uz) {
725
    {0x43,   "uz"},
726
    {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
727
    {0x7843, "uz_Cyrl"},  /* Cyrillic based */
728
    {0x0843, "uz_UZ"},  /* Cyrillic based */
729
    {0x0443, "uz_Latn_UZ"}, /* Latin based */
730
    {0x7c43, "uz_Latn"} /* Latin based */
731
};
732
733
ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
734
    {0x33,   "ve"},
735
    {0x0433, "ve_ZA"},
736
    {0x0433, "ven_ZA"}
737
};
738
739
ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
740
ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
741
ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
742
ILCID_POSIX_ELEMENT_ARRAY(0x043d, yi, yi)
743
ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
744
745
// Windows & ICU tend to different names for some of these
746
// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
747
ILCID_POSIX_SUBTABLE(zh) {
748
    {0x0004, "zh_Hans"},
749
    {0x7804, "zh"},
750
    {0x0804, "zh_CN"},
751
    {0x0804, "zh_Hans_CN"},
752
    {0x0c04, "zh_Hant_HK"},
753
    {0x0c04, "zh_HK"},
754
    {0x1404, "zh_Hant_MO"},
755
    {0x1404, "zh_MO"},
756
    {0x1004, "zh_Hans_SG"},
757
    {0x1004, "zh_SG"},
758
    {0x0404, "zh_Hant_TW"},
759
    {0x7c04, "zh_Hant"},
760
    {0x0404, "zh_TW"},
761
    {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
762
    {0x30404,"zh_TW"},          /* Bopomofo order */
763
    {0x20004,"zh@collation=stroke"},
764
    {0x20404,"zh_Hant@collation=stroke"},
765
    {0x20404,"zh_Hant_TW@collation=stroke"},
766
    {0x20404,"zh_TW@collation=stroke"},
767
    {0x20804,"zh_Hans@collation=stroke"},
768
    {0x20804,"zh_Hans_CN@collation=stroke"},
769
    {0x20804,"zh_CN@collation=stroke"}
770
    // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
771
};
772
773
ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
774
775
/* This must be static and grouped by LCID. */
776
static const ILcidPosixMap gPosixIDmap[] = {
777
    ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
778
    ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
779
    ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
780
    ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
781
    ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
782
    ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
783
    ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
784
    ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
785
/*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */
786
    ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
787
    ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */
788
    ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
789
    ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
790
    ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
791
    ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
792
    ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
793
    ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */
794
    ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
795
    ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
796
    ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
797
    ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
798
    ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
799
    ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
800
    ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
801
    ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
802
    ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
803
    ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
804
    ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
805
    ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
806
    ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
807
    ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
808
    ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */
809
    ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
810
    ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
811
    ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
812
    ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
813
    ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */
814
    ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
815
    ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
816
    ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */
817
    ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
818
    ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
819
    ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
820
    ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
821
    ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
822
    ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
823
    ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
824
    ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
825
    ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
826
    ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */
827
    ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
828
    ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
829
    ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */
830
    ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
831
    ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
832
    ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
833
    ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
834
    ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
835
    ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
836
    ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
837
    ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
838
    ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
839
    ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
840
    ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
841
    ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
842
    ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
843
    ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
844
    ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
845
    ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
846
    ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
847
    ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
848
    ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
849
    ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
850
    ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
851
    ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
852
    ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
853
    ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
854
    ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
855
    ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
856
    ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
857
    ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
858
    ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
859
    ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
860
    ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
861
    ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
862
    ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
863
/*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
864
    ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
865
    ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
866
/*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
867
    ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
868
    ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
869
    ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
870
    ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
871
    ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
872
    ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
873
    ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */
874
    ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
875
    ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
876
    ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
877
    ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
878
    ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */
879
    ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
880
    ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
881
    ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
882
    ILCID_POSIX_MAP(root),  /*  root                          0x00 */
883
    ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
884
    ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
885
    ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
886
    ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
887
    ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
888
    ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
889
/*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
890
    ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
891
    ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
892
    ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
893
    ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
894
    ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
895
/*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
896
    ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */
897
    ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
898
    ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
899
    ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
900
    ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
901
    ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
902
    ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
903
    ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
904
    ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
905
    ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
906
    ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
907
    ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
908
    ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */
909
    ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
910
    ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */
911
    ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
912
    ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
913
    ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
914
    ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
915
    ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
916
    ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
917
    ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
918
    ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
919
    ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */
920
    ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
921
    ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
922
    ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
923
};
924
925
static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
926
927
/**
928
 * Do not call this function. It is called by hostID.
929
 * The function is not private because this struct must stay as a C struct,
930
 * and this is an internal class.
931
 */
932
static int32_t
933
idCmp(const char* id1, const char* id2)
934
0
{
935
0
    int32_t diffIdx = 0;
936
0
    while (*id1 == *id2 && *id1 != 0) {
937
0
        diffIdx++;
938
0
        id1++;
939
0
        id2++;
940
0
    }
941
0
    return diffIdx;
942
0
}
943
944
/**
945
 * Searches for a Windows LCID
946
 *
947
 * @param posixid the Posix style locale id.
948
 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
949
 *               no equivalent Windows LCID.
950
 * @return the LCID
951
 */
952
static uint32_t
953
getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
954
0
{
955
0
    int32_t bestIdx = 0;
956
0
    int32_t bestIdxDiff = 0;
957
0
    int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
958
0
    uint32_t idx;
959
960
0
    for (idx = 0; idx < this_0->numRegions; idx++ ) {
961
0
        int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
962
0
        if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
963
0
            if (posixIDlen == sameChars) {
964
                /* Exact match */
965
0
                return this_0->regionMaps[idx].hostID;
966
0
            }
967
0
            bestIdxDiff = sameChars;
968
0
            bestIdx = idx;
969
0
        }
970
0
    }
971
    /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
972
    /* We also have to make sure that sid and si and similar string subsets don't match. */
973
0
    if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
974
0
        && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
975
0
    {
976
0
        *status = U_USING_FALLBACK_WARNING;
977
0
        return this_0->regionMaps[bestIdx].hostID;
978
0
    }
979
980
    /*no match found */
981
0
    *status = U_ILLEGAL_ARGUMENT_ERROR;
982
0
    return this_0->regionMaps->hostID;
983
0
}
984
985
static const char*
986
getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
987
0
{
988
0
    uint32_t i;
989
0
    for (i = 0; i <= this_0->numRegions; i++)
990
0
    {
991
0
        if (this_0->regionMaps[i].hostID == hostID)
992
0
        {
993
0
            return this_0->regionMaps[i].posixID;
994
0
        }
995
0
    }
996
997
    /* If you get here, then no matching region was found,
998
       so return the language id with the wild card region. */
999
0
    return this_0->regionMaps[0].posixID;
1000
0
}
1001
1002
/*
1003
//////////////////////////////////////
1004
//
1005
// LCID --> POSIX
1006
//
1007
/////////////////////////////////////
1008
*/
1009
#ifdef USE_WINDOWS_LCID_MAPPING_API
1010
/*
1011
 * Various language tags needs to be changed:
1012
 * quz -> qu
1013
 * prs -> fa
1014
 */
1015
#define FIX_LANGUAGE_ID_TAG(buffer, len) \
1016
    if (len >= 3) { \
1017
        if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1018
            buffer[2] = 0; \
1019
            uprv_strcat(buffer, buffer+3); \
1020
        } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1021
            buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1022
            uprv_strcat(buffer, buffer+3); \
1023
        } \
1024
    }
1025
1026
#endif
1027
U_CAPI int32_t
1028
uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1029
0
{
1030
0
    uint16_t langID;
1031
0
    uint32_t localeIndex;
1032
0
    UBool bLookup = TRUE;
1033
0
    const char *pPosixID = NULL;
1034
1035
#ifdef USE_WINDOWS_LCID_MAPPING_API
1036
    // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1037
    // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1038
    // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1039
    // use the Windows API to resolve locale ID for this specific case.
1040
    if ((hostid & 0x3FF) != 0x92) {
1041
        int32_t tmpLen = 0;
1042
        UChar windowsLocaleName[LOCALE_NAME_MAX_LENGTH];  // ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH
1043
        char locName[LOCALE_NAME_MAX_LENGTH];             // ICU name can't be longer than Windows name
1044
1045
        // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1046
        tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
1047
        if (tmpLen > 1) {
1048
            int32_t i = 0;
1049
            // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
1050
            bLookup = FALSE;
1051
            for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1052
            {
1053
                locName[i] = (char)(windowsLocaleName[i]);
1054
1055
                // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1056
                // In such cases, we need special mapping data found in the hardcoded table
1057
                // in this source file.
1058
                if (windowsLocaleName[i] == L'_')
1059
                {
1060
                    // Keep the base locale, without variant
1061
                    // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1062
                    locName[i] = '\0';
1063
                    tmpLen = i;
1064
                    bLookup = TRUE;
1065
                    break;
1066
                }
1067
                else if (windowsLocaleName[i] == L'-')
1068
                {
1069
                    // Windows names use -, ICU uses _
1070
                    locName[i] = '_';
1071
                }
1072
                else if (windowsLocaleName[i] == L'\0')
1073
                {
1074
                    // No point in doing more work than necessary
1075
                    break;
1076
                }
1077
            }
1078
            // TODO: Need to understand this better, why isn't it an alias?
1079
            FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1080
            pPosixID = locName;
1081
        }
1082
    }
1083
#endif // USE_WINDOWS_LCID_MAPPING_API
1084
1085
0
    if (bLookup) {
1086
0
        const char *pCandidate = NULL;
1087
0
        langID = LANGUAGE_LCID(hostid);
1088
1089
0
        for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1090
0
            if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1091
0
                pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1092
0
                break;
1093
0
            }
1094
0
        }
1095
1096
        /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1097
           If a match in the hardcoded table is longer than the Windows locale name without
1098
           variant, we use the one as the result */
1099
0
        if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1100
0
            pPosixID = pCandidate;
1101
0
        }
1102
0
    }
1103
1104
0
    if (pPosixID) {
1105
0
        int32_t resLen = uprv_strlen(pPosixID);
1106
0
        int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1107
0
        uprv_memcpy(posixID, pPosixID, copyLen);
1108
0
        if (resLen < posixIDCapacity) {
1109
0
            posixID[resLen] = 0;
1110
0
            if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1111
0
                *status = U_ZERO_ERROR;
1112
0
            }
1113
0
        } else if (resLen == posixIDCapacity) {
1114
0
            *status = U_STRING_NOT_TERMINATED_WARNING;
1115
0
        } else {
1116
0
            *status = U_BUFFER_OVERFLOW_ERROR;
1117
0
        }
1118
0
        return resLen;
1119
0
    }
1120
1121
    /* no match found */
1122
0
    *status = U_ILLEGAL_ARGUMENT_ERROR;
1123
0
    return -1;
1124
0
}
1125
1126
/*
1127
//////////////////////////////////////
1128
//
1129
// POSIX --> LCID
1130
// This should only be called from uloc_getLCID.
1131
// The locale ID must be in canonical form.
1132
//
1133
/////////////////////////////////////
1134
*/
1135
U_CAPI uint32_t
1136
uprv_convertToLCIDPlatform(const char* localeID)
1137
0
{
1138
    // The purpose of this function is to leverage native platform name->lcid
1139
    // conversion functionality when available.
1140
#ifdef USE_WINDOWS_LCID_MAPPING_API
1141
    DWORD nameLCIDFlags = 0;
1142
    UErrorCode myStatus = U_ZERO_ERROR;
1143
1144
    // First check for a Windows name->LCID match, fall through to catch
1145
    // ICU special cases, but Windows may know it already.
1146
#if LOCALE_ALLOW_NEUTRAL_NAMES
1147
    nameLCIDFlags = LOCALE_ALLOW_NEUTRAL_NAMES;
1148
#endif /* LOCALE_ALLOW_NEUTRAL_NAMES */
1149
1150
    int32_t len;
1151
    char collVal[ULOC_KEYWORDS_CAPACITY] = {};
1152
    char baseName[ULOC_FULLNAME_CAPACITY] = {};
1153
    const char * mylocaleID = localeID;
1154
1155
    // Check any for keywords.
1156
    if (uprv_strchr(localeID, '@'))
1157
    {
1158
        len = uloc_getKeywordValue(localeID, "collation", collVal, UPRV_LENGTHOF(collVal) - 1, &myStatus);
1159
        if (U_SUCCESS(myStatus) && len > 0)
1160
        {
1161
            // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1162
            return 0;
1163
        }
1164
        else
1165
        {
1166
            // If the locale ID contains keywords other than collation, just use the base name.
1167
            len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, &myStatus);
1168
1169
            if (U_SUCCESS(myStatus) && len > 0)
1170
            {
1171
                baseName[len] = 0;
1172
                mylocaleID = baseName;
1173
            }
1174
        }
1175
    }
1176
1177
    char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1178
    // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
1179
    int32_t bcp47Len = uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, &myStatus);
1180
1181
    if (U_SUCCESS(myStatus))
1182
    {
1183
        // Need it to be UTF-16, not 8-bit
1184
        wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1185
        int32_t i;
1186
        for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1187
        {
1188
            if (asciiBCP47Tag[i] == '\0')
1189
            {
1190
                break;
1191
            }
1192
            else
1193
            {
1194
                // Copy the character
1195
                bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1196
            }
1197
        }
1198
1199
        if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1200
        {
1201
            // Ensure it's null terminated
1202
            bcp47Tag[i] = L'\0';
1203
            LCID lcid = LocaleNameToLCID(bcp47Tag, nameLCIDFlags);
1204
            if (lcid > 0)
1205
            {
1206
                // Found LCID from windows, return that one, unless its completely ambiguous
1207
                // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1208
                // for this process.
1209
                if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1210
                {
1211
                    return lcid;
1212
                }
1213
            }
1214
        }
1215
    }
1216
#endif /* USE_WINDOWS_LCID_MAPPING_API */
1217
1218
    // No found, or not implemented on platforms without native name->lcid conversion
1219
0
    return 0;
1220
0
}
1221
1222
U_CAPI uint32_t
1223
uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1224
0
{
1225
    // This function does the table lookup when native platform name->lcid conversion isn't available,
1226
    // or for locales that don't follow patterns the platform expects.
1227
0
    uint32_t   low    = 0;
1228
0
    uint32_t   high   = gLocaleCount;
1229
0
    uint32_t   mid;
1230
0
    uint32_t   oldmid = 0;
1231
0
    int32_t    compVal;
1232
1233
0
    uint32_t   value         = 0;
1234
0
    uint32_t   fallbackValue = (uint32_t)-1;
1235
0
    UErrorCode myStatus;
1236
0
    uint32_t   idx;
1237
1238
    /* Check for incomplete id. */
1239
0
    if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1240
0
        return 0;
1241
0
    }
1242
1243
    /*Binary search for the map entry for normal cases */
1244
1245
0
    while (high > low)  /*binary search*/{
1246
1247
0
        mid = (high+low) >> 1; /*Finds median*/
1248
1249
0
        if (mid == oldmid) 
1250
0
            break;
1251
1252
0
        compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1253
0
        if (compVal < 0){
1254
0
            high = mid;
1255
0
        }
1256
0
        else if (compVal > 0){
1257
0
            low = mid;
1258
0
        }
1259
0
        else /*we found it*/{
1260
0
            return getHostID(&gPosixIDmap[mid], posixID, status);
1261
0
        }
1262
0
        oldmid = mid;
1263
0
    }
1264
1265
    /*
1266
     * Sometimes we can't do a binary search on posixID because some LCIDs
1267
     * go to different locales.  We hit one of those special cases.
1268
     */
1269
0
    for (idx = 0; idx < gLocaleCount; idx++ ) {
1270
0
        myStatus = U_ZERO_ERROR;
1271
0
        value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1272
0
        if (myStatus == U_ZERO_ERROR) {
1273
0
            return value;
1274
0
        }
1275
0
        else if (myStatus == U_USING_FALLBACK_WARNING) {
1276
0
            fallbackValue = value;
1277
0
        }
1278
0
    }
1279
1280
0
    if (fallbackValue != (uint32_t)-1) {
1281
0
        *status = U_USING_FALLBACK_WARNING;
1282
0
        return fallbackValue;
1283
0
    }
1284
1285
    /* no match found */
1286
0
    *status = U_ILLEGAL_ARGUMENT_ERROR;
1287
0
    return 0;   /* return international (root) */
1288
0
}