Coverage Report

Created: 2025-06-24 06:43

/src/icu/source/common/locmap.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 **********************************************************************
5
 *   Copyright (C) 1996-2016, International Business Machines
6
 *   Corporation and others.  All Rights Reserved.
7
 **********************************************************************
8
 *
9
 * Provides functionality for mapping between
10
 * LCID and Posix IDs or ICU locale to codepage
11
 *
12
 * Note: All classes and code in this file are
13
 *       intended for internal use only.
14
 *
15
 * Methods of interest:
16
 *   unsigned long convertToLCID(const char*);
17
 *   const char* convertToPosix(unsigned long);
18
 *
19
 * Kathleen Wilson, 4/30/96
20
 *
21
 *  Date        Name        Description
22
 *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
23
 *                          setId() method and safety check against 
24
 *                          MAX_ID_LENGTH.
25
 * 04/23/99     stephen     Added C wrapper for convertToPosix.
26
 * 09/18/00     george      Removed the memory leaks.
27
 * 08/23/01     george      Convert to C
28
 */
29
30
#include "locmap.h"
31
#include "bytesinkutil.h"
32
#include "charstr.h"
33
#include "cstring.h"
34
#include "cmemory.h"
35
#include "ulocimp.h"
36
#include "unicode/uloc.h"
37
38
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
39
#include <windows.h>
40
#include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
41
#endif
42
43
/*
44
 * Note:
45
 * The mapping from Win32 locale ID numbers to POSIX locale strings should
46
 * be the faster one.
47
 *
48
 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
49
 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
50
 */
51
52
/*
53
////////////////////////////////////////////////
54
//
55
// Internal Classes for LCID <--> POSIX Mapping
56
//
57
/////////////////////////////////////////////////
58
*/
59
60
typedef struct ILcidPosixElement
61
{
62
    const uint32_t hostID;
63
    const char * const posixID;
64
} ILcidPosixElement;
65
66
typedef struct ILcidPosixMap
67
{
68
    const uint32_t numRegions;
69
    const struct ILcidPosixElement* const regionMaps;
70
} ILcidPosixMap;
71
72
73
/*
74
/////////////////////////////////////////////////
75
//
76
// Easy macros to make the LCID <--> POSIX Mapping
77
//
78
/////////////////////////////////////////////////
79
*/
80
81
/**
82
 * The standard one language/one country mapping for LCID.
83
 * The first element must be the language, and the following
84
 * elements are the language with the country.
85
 * @param hostID LCID in host format such as 0x044d
86
 * @param languageID posix ID of just the language such as 'de'
87
 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
88
 */
89
#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
90
static const ILcidPosixElement locmap_ ## languageID [] = { \
91
    {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
92
    {hostID, #posixID}, \
93
};
94
95
/**
96
 * Define a subtable by ID
97
 * @param id the POSIX ID, either a language or language_TERRITORY
98
 */
99
#define ILCID_POSIX_SUBTABLE(id) \
100
static const ILcidPosixElement locmap_ ## id [] =
101
102
103
/**
104
 * Create the map for the posixID. This macro supposes that the language string
105
 * name is the same as the global variable name, and that the first element
106
 * in the ILcidPosixElement is just the language.
107
 * @param _posixID the full POSIX ID for this entry.
108
 */
109
#define ILCID_POSIX_MAP(_posixID) \
110
    {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
111
112
/*
113
////////////////////////////////////////////
114
//
115
// Create the table of LCID to POSIX Mapping
116
// None of it should be dynamically created.
117
//
118
// Keep static locale variables inside the function so that
119
// it can be created properly during static init.
120
//
121
// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier 
122
//       (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
123
//
124
//       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
125
//       maintained for support of older Windows version.
126
//       Update: Windows 7 (091130)
127
//
128
// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
129
//       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
130
//       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
131
//       to support other keywords in this mapping data, we must update the implementation.
132
////////////////////////////////////////////
133
*/
134
135
// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as 
136
// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
137
138
ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
139
140
ILCID_POSIX_SUBTABLE(ar) {
141
    {0x01,   "ar"},
142
    {0x3801, "ar_AE"},
143
    {0x3c01, "ar_BH"},
144
    {0x1401, "ar_DZ"},
145
    {0x0c01, "ar_EG"},
146
    {0x0801, "ar_IQ"},
147
    {0x2c01, "ar_JO"},
148
    {0x3401, "ar_KW"},
149
    {0x3001, "ar_LB"},
150
    {0x1001, "ar_LY"},
151
    {0x1801, "ar_MA"},
152
    {0x1801, "ar_MO"},
153
    {0x2001, "ar_OM"},
154
    {0x4001, "ar_QA"},
155
    {0x0401, "ar_SA"},
156
    {0x2801, "ar_SY"},
157
    {0x1c01, "ar_TN"},
158
    {0x2401, "ar_YE"}
159
};
160
161
ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
162
ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
163
ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
164
165
ILCID_POSIX_SUBTABLE(az) {
166
    {0x2c,   "az"},
167
    {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
168
    {0x742c, "az_Cyrl"},  /* Cyrillic based */
169
    {0x042c, "az_Latn_AZ"}, /* Latin based */
170
    {0x782c, "az_Latn"}, /* Latin based */
171
    {0x042c, "az_AZ"} /* Latin based */
172
};
173
174
ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
175
ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
176
177
/*ILCID_POSIX_SUBTABLE(ber) {
178
    {0x5f,   "ber"},
179
    {0x045f, "ber_Arab_DZ"},
180
    {0x045f, "ber_Arab"},
181
    {0x085f, "ber_Latn_DZ"},
182
    {0x085f, "ber_Latn"}
183
};*/
184
185
ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
186
187
ILCID_POSIX_SUBTABLE(bin) {
188
    {0x66, "bin"},
189
    {0x0466, "bin_NG"}
190
};
191
192
ILCID_POSIX_SUBTABLE(bn) {
193
    {0x45,   "bn"},
194
    {0x0845, "bn_BD"},
195
    {0x0445, "bn_IN"}
196
};
197
198
ILCID_POSIX_SUBTABLE(bo) {
199
    {0x51,   "bo"},
200
    {0x0851, "bo_BT"},
201
    {0x0451, "bo_CN"},
202
    {0x0c51, "dz_BT"}
203
};
204
205
ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
206
207
ILCID_POSIX_SUBTABLE(ca) {
208
    {0x03,   "ca"},
209
    {0x0403, "ca_ES"},
210
    {0x0803, "ca_ES_VALENCIA"}
211
};
212
213
ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
214
215
ILCID_POSIX_SUBTABLE(chr) {
216
    {0x05c,  "chr"},
217
    {0x7c5c, "chr_Cher"},
218
    {0x045c, "chr_Cher_US"},
219
    {0x045c, "chr_US"}
220
};
221
222
// ICU has chosen different names for these.
223
ILCID_POSIX_SUBTABLE(ckb) {
224
    {0x92,   "ckb"},
225
    {0x7c92, "ckb_Arab"},
226
    {0x0492, "ckb_Arab_IQ"}
227
};
228
229
/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
230
ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
231
232
ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
233
ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
234
235
// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
236
ILCID_POSIX_SUBTABLE(de) {
237
    {0x07,   "de"},
238
    {0x0c07, "de_AT"},
239
    {0x0807, "de_CH"},
240
    {0x0407, "de_DE"},
241
    {0x1407, "de_LI"},
242
    {0x1007, "de_LU"},
243
    {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
244
    {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
245
};
246
247
ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
248
ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
249
250
// Windows uses an empty string for 'invariant'
251
ILCID_POSIX_SUBTABLE(en) {
252
    {0x09,   "en"},
253
    {0x0c09, "en_AU"},
254
    {0x2809, "en_BZ"},
255
    {0x1009, "en_CA"},
256
    {0x0809, "en_GB"},
257
    {0x3c09, "en_HK"},
258
    {0x3809, "en_ID"},
259
    {0x1809, "en_IE"},
260
    {0x4009, "en_IN"},
261
    {0x2009, "en_JM"},
262
    {0x4409, "en_MY"},
263
    {0x1409, "en_NZ"},
264
    {0x3409, "en_PH"},
265
    {0x4809, "en_SG"},
266
    {0x2C09, "en_TT"},
267
    {0x0409, "en_US"},
268
    {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
269
    {0x2409, "en_029"},
270
    {0x1c09, "en_ZA"},
271
    {0x3009, "en_ZW"},
272
    {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
273
    {0x0409, "en_AS"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
274
    {0x0409, "en_GU"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
275
    {0x0409, "en_MH"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
276
    {0x0409, "en_MP"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
277
    {0x0409, "en_UM"}   /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
278
};
279
280
ILCID_POSIX_SUBTABLE(en_US_POSIX) {
281
    {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
282
};
283
284
// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
285
ILCID_POSIX_SUBTABLE(es) {
286
    {0x0a,   "es"},
287
    {0x2c0a, "es_AR"},
288
    {0x400a, "es_BO"},
289
    {0x340a, "es_CL"},
290
    {0x240a, "es_CO"},
291
    {0x140a, "es_CR"},
292
    {0x5c0a, "es_CU"},
293
    {0x1c0a, "es_DO"},
294
    {0x300a, "es_EC"},
295
    {0x0c0a, "es_ES"},      /*Modern sort.*/
296
    {0x100a, "es_GT"},
297
    {0x480a, "es_HN"},
298
    {0x080a, "es_MX"},
299
    {0x4c0a, "es_NI"},
300
    {0x180a, "es_PA"},
301
    {0x280a, "es_PE"},
302
    {0x500a, "es_PR"},
303
    {0x3c0a, "es_PY"},
304
    {0x440a, "es_SV"},
305
    {0x540a, "es_US"},
306
    {0x380a, "es_UY"},
307
    {0x200a, "es_VE"},
308
    {0x580a, "es_419"},
309
    {0x040a, "es_ES@collation=traditional"},
310
    {0x040a, "es@collation=traditional"}        // Windows will treat this as es-ES@collation=traditional
311
};
312
313
ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
314
ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
315
316
/* ISO-639 doesn't distinguish between Persian and Dari.*/
317
ILCID_POSIX_SUBTABLE(fa) {
318
    {0x29,   "fa"},
319
    {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
320
    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
321
};
322
323
324
/* duplicate for roundtripping */
325
ILCID_POSIX_SUBTABLE(fa_AF) {
326
    {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
327
    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
328
};
329
330
ILCID_POSIX_SUBTABLE(ff) {
331
    {0x67,   "ff"},
332
    {0x7c67, "ff_Latn"},
333
    {0x0867, "ff_Latn_SN"},
334
    {0x0467, "ff_NG"}
335
};
336
337
ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
338
ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
339
ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
340
341
ILCID_POSIX_SUBTABLE(fr) {
342
    {0x0c,   "fr"},
343
    {0x080c, "fr_BE"},
344
    {0x0c0c, "fr_CA"},
345
    {0x240c, "fr_CD"},
346
    {0x240c, "fr_CG"},
347
    {0x100c, "fr_CH"},
348
    {0x300c, "fr_CI"},
349
    {0x2c0c, "fr_CM"},
350
    {0x040c, "fr_FR"},
351
    {0x3c0c, "fr_HT"},
352
    {0x140c, "fr_LU"},
353
    {0x380c, "fr_MA"},
354
    {0x180c, "fr_MC"},
355
    {0x340c, "fr_ML"},
356
    {0x200c, "fr_RE"},
357
    {0x280c, "fr_SN"},
358
    {0xe40c, "fr_015"},
359
    {0x1c0c, "fr_029"}
360
};
361
362
ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
363
364
ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
365
366
ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
367
    {0x3c,   "ga"},
368
    {0x083c, "ga_IE"},
369
    {0x043c, "gd_GB"}
370
};
371
372
ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
373
    {0x91,   "gd"},
374
    {0x0491, "gd_GB"}
375
};
376
377
ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
378
ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
379
ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
380
ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
381
382
ILCID_POSIX_SUBTABLE(ha) {
383
    {0x68,   "ha"},
384
    {0x7c68, "ha_Latn"},
385
    {0x0468, "ha_Latn_NG"},
386
};
387
388
ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
389
ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
390
ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
391
392
/* This LCID is really four different locales.*/
393
ILCID_POSIX_SUBTABLE(hr) {
394
    {0x1a,   "hr"},
395
    {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
396
    {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
397
    {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
398
    {0x781a, "bs"},     /* Bosnian */
399
    {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
400
    {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
401
    {0x101a, "hr_BA"},  /* Croatian in Bosnia */
402
    {0x041a, "hr_HR"},  /* Croatian*/
403
    {0x2c1a, "sr_Latn_ME"},
404
    {0x241a, "sr_Latn_RS"},
405
    {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
406
    {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
407
    {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
408
    {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
409
    {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
410
    {0x301a, "sr_Cyrl_ME"},
411
    {0x281a, "sr_Cyrl_RS"},
412
    {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
413
    {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
414
};
415
416
ILCID_POSIX_SUBTABLE(hsb) {
417
    {0x2E,   "hsb"},
418
    {0x042E, "hsb_DE"},
419
    {0x082E, "dsb_DE"},
420
    {0x7C2E, "dsb"},
421
};
422
423
ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
424
ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
425
426
ILCID_POSIX_SUBTABLE(ibb) {
427
    {0x69, "ibb"},
428
    {0x0469, "ibb_NG"}
429
};
430
431
ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
432
ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
433
ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
434
ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
435
436
ILCID_POSIX_SUBTABLE(it) {
437
    {0x10,   "it"},
438
    {0x0810, "it_CH"},
439
    {0x0410, "it_IT"}
440
};
441
442
ILCID_POSIX_SUBTABLE(iu) {
443
    {0x5d,   "iu"},
444
    {0x045d, "iu_Cans_CA"},
445
    {0x785d, "iu_Cans"},
446
    {0x085d, "iu_Latn_CA"},
447
    {0x7c5d, "iu_Latn"}
448
};
449
450
ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
451
ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
452
ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
453
ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
454
ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
455
ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
456
ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
457
458
ILCID_POSIX_SUBTABLE(ko) {
459
    {0x12,   "ko"},
460
    {0x0812, "ko_KP"},
461
    {0x0412, "ko_KR"}
462
};
463
464
ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
465
ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
466
467
ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */
468
    {0x60,   "ks"},
469
    {0x0460, "ks_Arab_IN"},
470
    {0x0860, "ks_Deva_IN"}
471
};
472
473
ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
474
475
ILCID_POSIX_SUBTABLE(la) {
476
    {0x76,   "la"},
477
    {0x0476, "la_001"},
478
    {0x0476, "la_IT"}       /*Left in for compatibility*/
479
};
480
481
ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
482
ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
483
ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
484
ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
485
ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
486
ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
487
ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
488
489
ILCID_POSIX_SUBTABLE(mn) {
490
    {0x50,   "mn"},
491
    {0x0450, "mn_MN"},
492
    {0x7c50, "mn_Mong"},
493
    {0x0850, "mn_Mong_CN"},
494
    {0x0850, "mn_CN"},
495
    {0x7850, "mn_Cyrl"},
496
    {0x0c50, "mn_Mong_MN"}
497
};
498
499
ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
500
ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
501
ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
502
503
ILCID_POSIX_SUBTABLE(ms) {
504
    {0x3e,   "ms"},
505
    {0x083e, "ms_BN"},   /* Brunei Darussalam*/
506
    {0x043e, "ms_MY"}    /* Malaysia*/
507
};
508
509
ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
510
ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
511
512
ILCID_POSIX_SUBTABLE(ne) {
513
    {0x61,   "ne"},
514
    {0x0861, "ne_IN"},   /* India*/
515
    {0x0461, "ne_NP"}    /* Nepal*/
516
};
517
518
ILCID_POSIX_SUBTABLE(nl) {
519
    {0x13,   "nl"},
520
    {0x0813, "nl_BE"},
521
    {0x0413, "nl_NL"}
522
};
523
524
/* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
525
// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
526
ILCID_POSIX_SUBTABLE(no) {
527
    {0x14,   "no"},     /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */ 
528
    {0x7c14, "nb"},     /* really nb */
529
    {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
530
    {0x0414, "no_NO"},  /* really nb_NO */
531
    {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
532
    {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
533
    {0x0814, "no_NO_NY"}/* really nn_NO */
534
};
535
536
ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
537
ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
538
539
ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
540
    {0x72,   "om"},
541
    {0x0472, "om_ET"},
542
    {0x0472, "gaz_ET"}
543
};
544
545
/* Declared as or_IN to get around compiler errors*/
546
ILCID_POSIX_SUBTABLE(or_IN) {
547
    {0x48,   "or"},
548
    {0x0448, "or_IN"},
549
};
550
551
ILCID_POSIX_SUBTABLE(pa) {
552
    {0x46,   "pa"},
553
    {0x0446, "pa_IN"},
554
    {0x0846, "pa_Arab_PK"},
555
    {0x0846, "pa_PK"}
556
};
557
558
ILCID_POSIX_SUBTABLE(pap) {
559
    {0x79, "pap"},
560
    {0x0479, "pap_029"},
561
    {0x0479, "pap_AN"}     /*Left in for compatibility*/
562
};
563
564
ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
565
ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
566
567
ILCID_POSIX_SUBTABLE(pt) {
568
    {0x16,   "pt"},
569
    {0x0416, "pt_BR"},
570
    {0x0816, "pt_PT"}
571
};
572
573
ILCID_POSIX_SUBTABLE(qu) {
574
    {0x6b,   "qu"},
575
    {0x046b, "qu_BO"},
576
    {0x086b, "qu_EC"},
577
    {0x0C6b, "qu_PE"},
578
    {0x046b, "quz_BO"},
579
    {0x086b, "quz_EC"},
580
    {0x0C6b, "quz_PE"}
581
};
582
583
ILCID_POSIX_SUBTABLE(quc) {
584
    {0x93,   "quc"},
585
    {0x0493, "quc_CO"},
586
    /*
587
        "quc_Latn_GT" is an exceptional case. Language ID of "quc"
588
        is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
589
        under the group of "qut". "qut" is a retired ISO 639-3 language
590
        code for West Central Quiche, and merged to "quc".
591
        It looks Windows previously reserved "qut" for K'iche', but,
592
        decided to use "quc" when adding a locale for K'iche' (Guatemala).
593
594
        This data structure used here assumes language ID bits in
595
        LCID is unique for alphabetic language code. But this is not true
596
        for "quc_Latn_GT". If we don't have the data below, LCID look up
597
        by alphabetic locale ID (POSIX) will fail. The same entry is found
598
        under "qut" below, which is required for reverse look up.
599
    */
600
    {0x0486, "quc_Latn_GT"}
601
};
602
603
ILCID_POSIX_SUBTABLE(qut) {
604
    {0x86,   "qut"},
605
    {0x0486, "qut_GT"},
606
    /*
607
        See the note in "quc" above.
608
    */
609
    {0x0486, "quc_Latn_GT"}
610
};
611
612
ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
613
614
ILCID_POSIX_SUBTABLE(ro) {
615
    {0x18,   "ro"},
616
    {0x0418, "ro_RO"},
617
    {0x0818, "ro_MD"}
618
};
619
620
// TODO: This is almost certainly 'wrong'.  0 in Windows is a synonym for LOCALE_USER_DEFAULT.
621
// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
622
// (Except that it's not invariant in ICU)
623
ILCID_POSIX_SUBTABLE(root) {
624
    {0x00,   "root"}
625
};
626
627
ILCID_POSIX_SUBTABLE(ru) {
628
    {0x19,   "ru"},
629
    {0x0419, "ru_RU"},
630
    {0x0819, "ru_MD"}
631
};
632
633
ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
634
ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
635
ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
636
637
ILCID_POSIX_SUBTABLE(sd) {
638
    {0x59,   "sd"},
639
    {0x0459, "sd_Deva_IN"},
640
    {0x0459, "sd_IN"},
641
    {0x0859, "sd_Arab_PK"},
642
    {0x0859, "sd_PK"},
643
    {0x7c59, "sd_Arab"}
644
};
645
646
ILCID_POSIX_SUBTABLE(se) {
647
    {0x3b,   "se"},
648
    {0x0c3b, "se_FI"},
649
    {0x043b, "se_NO"},
650
    {0x083b, "se_SE"},
651
    {0x783b, "sma"},
652
    {0x183b, "sma_NO"},
653
    {0x1c3b, "sma_SE"},
654
    {0x7c3b, "smj"},
655
    {0x703b, "smn"},
656
    {0x743b, "sms"},
657
    {0x103b, "smj_NO"},
658
    {0x143b, "smj_SE"},
659
    {0x243b, "smn_FI"},
660
    {0x203b, "sms_FI"},
661
};
662
663
ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
664
ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
665
ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
666
667
ILCID_POSIX_SUBTABLE(so) {
668
    {0x77,   "so"},
669
    {0x0477, "so_SO"}
670
};
671
672
ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
673
ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
674
675
ILCID_POSIX_SUBTABLE(sv) {
676
    {0x1d,   "sv"},
677
    {0x081d, "sv_FI"},
678
    {0x041d, "sv_SE"}
679
};
680
681
ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
682
ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
683
684
ILCID_POSIX_SUBTABLE(ta) {
685
    {0x49,   "ta"},
686
    {0x0449, "ta_IN"},
687
    {0x0849, "ta_LK"}
688
};
689
690
ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
691
692
/* Cyrillic based by default */
693
ILCID_POSIX_SUBTABLE(tg) {
694
    {0x28,   "tg"},
695
    {0x7c28, "tg_Cyrl"},
696
    {0x0428, "tg_Cyrl_TJ"}
697
};
698
699
ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
700
701
ILCID_POSIX_SUBTABLE(ti) {
702
    {0x73,   "ti"},
703
    {0x0873, "ti_ER"},
704
    {0x0473, "ti_ET"}
705
};
706
707
ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
708
709
ILCID_POSIX_SUBTABLE(tn) {
710
    {0x32,   "tn"},
711
    {0x0832, "tn_BW"},
712
    {0x0432, "tn_ZA"}
713
};
714
715
ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
716
ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
717
ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
718
719
ILCID_POSIX_SUBTABLE(tzm) {
720
    {0x5f,   "tzm"},
721
    {0x7c5f, "tzm_Latn"},
722
    {0x085f, "tzm_Latn_DZ"},
723
    {0x105f, "tzm_Tfng_MA"},
724
    {0x045f, "tzm_Arab_MA"},
725
    {0x045f, "tmz"}
726
};
727
728
ILCID_POSIX_SUBTABLE(ug) {
729
    {0x80,   "ug"},
730
    {0x0480, "ug_CN"},
731
    {0x0480, "ug_Arab_CN"}
732
};
733
734
ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
735
736
ILCID_POSIX_SUBTABLE(ur) {
737
    {0x20,   "ur"},
738
    {0x0820, "ur_IN"},
739
    {0x0420, "ur_PK"}
740
};
741
742
ILCID_POSIX_SUBTABLE(uz) {
743
    {0x43,   "uz"},
744
    {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
745
    {0x7843, "uz_Cyrl"},  /* Cyrillic based */
746
    {0x0843, "uz_UZ"},  /* Cyrillic based */
747
    {0x0443, "uz_Latn_UZ"}, /* Latin based */
748
    {0x7c43, "uz_Latn"} /* Latin based */
749
};
750
751
ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
752
    {0x33,   "ve"},
753
    {0x0433, "ve_ZA"},
754
    {0x0433, "ven_ZA"}
755
};
756
757
ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
758
ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
759
ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
760
761
ILCID_POSIX_SUBTABLE(yi) {
762
    {0x003d, "yi"},
763
    {0x043d, "yi_001"}
764
};
765
766
ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
767
768
// Windows & ICU tend to different names for some of these
769
// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
770
ILCID_POSIX_SUBTABLE(zh) {
771
    {0x0004, "zh_Hans"},
772
    {0x7804, "zh"},
773
    {0x0804, "zh_CN"},
774
    {0x0804, "zh_Hans_CN"},
775
    {0x0c04, "zh_Hant_HK"},
776
    {0x0c04, "zh_HK"},
777
    {0x1404, "zh_Hant_MO"},
778
    {0x1404, "zh_MO"},
779
    {0x1004, "zh_Hans_SG"},
780
    {0x1004, "zh_SG"},
781
    {0x0404, "zh_Hant_TW"},
782
    {0x7c04, "zh_Hant"},
783
    {0x0404, "zh_TW"},
784
    {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
785
    {0x30404,"zh_TW"},          /* Bopomofo order */
786
    {0x20004,"zh@collation=stroke"},
787
    {0x20404,"zh_Hant@collation=stroke"},
788
    {0x20404,"zh_Hant_TW@collation=stroke"},
789
    {0x20404,"zh_TW@collation=stroke"},
790
    {0x20804,"zh_Hans@collation=stroke"},
791
    {0x20804,"zh_Hans_CN@collation=stroke"},
792
    {0x20804,"zh_CN@collation=stroke"}
793
    // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
794
};
795
796
ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
797
798
/* This must be static and grouped by LCID. */
799
static const ILcidPosixMap gPosixIDmap[] = {
800
    ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
801
    ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
802
    ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
803
    ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
804
    ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
805
    ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
806
    ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
807
    ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
808
/*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */
809
    ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
810
    ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */
811
    ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
812
    ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
813
    ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
814
    ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
815
    ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
816
    ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */
817
    ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
818
    ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
819
    ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
820
    ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
821
    ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
822
    ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
823
    ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
824
    ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
825
    ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
826
    ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
827
    ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
828
    ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
829
    ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
830
    ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
831
    ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */
832
    ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
833
    ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
834
    ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
835
    ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
836
    ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */
837
    ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
838
    ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
839
    ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */
840
    ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
841
    ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
842
    ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
843
    ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
844
    ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
845
    ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
846
    ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
847
    ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
848
    ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
849
    ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */
850
    ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
851
    ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
852
    ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */
853
    ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
854
    ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
855
    ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
856
    ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
857
    ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
858
    ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
859
    ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
860
    ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
861
    ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
862
    ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
863
    ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
864
    ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
865
    ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
866
    ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
867
    ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
868
    ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
869
    ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
870
    ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
871
    ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
872
    ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
873
    ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
874
    ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
875
    ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
876
    ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
877
    ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
878
    ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
879
    ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
880
    ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
881
    ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
882
    ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
883
    ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
884
    ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
885
    ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
886
/*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
887
    ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
888
    ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
889
/*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
890
    ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
891
    ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
892
    ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
893
    ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
894
    ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
895
    ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
896
    ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */
897
    ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
898
    ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
899
    ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
900
    ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
901
    ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */
902
    ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
903
    ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
904
    ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
905
    ILCID_POSIX_MAP(root),  /*  root                          0x00 */
906
    ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
907
    ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
908
    ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
909
    ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
910
    ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
911
    ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
912
/*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
913
    ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
914
    ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
915
    ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
916
    ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
917
    ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
918
/*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
919
    ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */
920
    ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
921
    ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
922
    ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
923
    ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
924
    ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
925
    ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
926
    ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
927
    ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
928
    ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
929
    ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
930
    ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
931
    ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */
932
    ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
933
    ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */
934
    ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
935
    ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
936
    ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
937
    ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
938
    ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
939
    ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
940
    ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
941
    ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
942
    ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */
943
    ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
944
    ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
945
    ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
946
};
947
948
static const uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
949
950
/**
951
 * Do not call this function. It is called by hostID.
952
 * The function is not private because this struct must stay as a C struct,
953
 * and this is an internal class.
954
 */
955
static int32_t
956
idCmp(const char* id1, const char* id2)
957
0
{
958
0
    int32_t diffIdx = 0;
959
0
    while (*id1 == *id2 && *id1 != 0) {
960
0
        diffIdx++;
961
0
        id1++;
962
0
        id2++;
963
0
    }
964
0
    return diffIdx;
965
0
}
966
967
/**
968
 * Searches for a Windows LCID
969
 *
970
 * @param posixID the Posix style locale id.
971
 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
972
 *               no equivalent Windows LCID.
973
 * @return the LCID
974
 */
975
static uint32_t
976
getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode* status)
977
0
{
978
0
    int32_t bestIdx = 0;
979
0
    int32_t bestIdxDiff = 0;
980
0
    int32_t posixIDlen = (int32_t)uprv_strlen(posixID);
981
0
    uint32_t idx;
982
983
0
    for (idx = 0; idx < this_0->numRegions; idx++ ) {
984
0
        int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
985
0
        if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
986
0
            if (posixIDlen == sameChars) {
987
                /* Exact match */
988
0
                return this_0->regionMaps[idx].hostID;
989
0
            }
990
0
            bestIdxDiff = sameChars;
991
0
            bestIdx = idx;
992
0
        }
993
0
    }
994
    /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
995
    /* We also have to make sure that sid and si and similar string subsets don't match. */
996
0
    if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
997
0
        && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
998
0
    {
999
0
        *status = U_USING_FALLBACK_WARNING;
1000
0
        return this_0->regionMaps[bestIdx].hostID;
1001
0
    }
1002
1003
    /*no match found */
1004
0
    *status = U_ILLEGAL_ARGUMENT_ERROR;
1005
0
    return this_0->regionMaps->hostID;
1006
0
}
1007
1008
static const char*
1009
getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
1010
0
{
1011
0
    uint32_t i;
1012
0
    for (i = 0; i < this_0->numRegions; i++)
1013
0
    {
1014
0
        if (this_0->regionMaps[i].hostID == hostID)
1015
0
        {
1016
0
            return this_0->regionMaps[i].posixID;
1017
0
        }
1018
0
    }
1019
1020
    /* If you get here, then no matching region was found,
1021
       so return the language id with the wild card region. */
1022
0
    return this_0->regionMaps[0].posixID;
1023
0
}
1024
1025
/*
1026
//////////////////////////////////////
1027
//
1028
// LCID --> POSIX
1029
//
1030
/////////////////////////////////////
1031
*/
1032
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1033
/*
1034
 * Various language tags needs to be changed:
1035
 * quz -> qu
1036
 * prs -> fa
1037
 */
1038
#define FIX_LANGUAGE_ID_TAG(buffer, len) \
1039
    if (len >= 3) { \
1040
        if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {\
1041
            buffer[2] = 0; \
1042
            uprv_strcat(buffer, buffer+3); \
1043
        } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {\
1044
            buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0; \
1045
            uprv_strcat(buffer, buffer+3); \
1046
        } \
1047
    }
1048
1049
#endif
1050
1051
U_CAPI int32_t
1052
uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1053
0
{
1054
0
    uint16_t langID;
1055
0
    uint32_t localeIndex;
1056
0
    UBool bLookup = TRUE;
1057
0
    const char *pPosixID = NULL;
1058
1059
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1060
    static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");
1061
1062
    char locName[LOCALE_NAME_MAX_LENGTH] = {};
1063
1064
    // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1065
    // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1066
    // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1067
    // use the Windows API to resolve locale ID for this specific case.
1068
    if ((hostid & 0x3FF) != 0x92) {
1069
        int32_t tmpLen = 0;
1070
        char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {};
1071
1072
        // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1073
        tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
1074
        if (tmpLen > 1) {
1075
            int32_t i = 0;
1076
            // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
1077
            bLookup = FALSE;
1078
            for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1079
            {
1080
                locName[i] = (char)(windowsLocaleName[i]);
1081
1082
                // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1083
                // In such cases, we need special mapping data found in the hardcoded table
1084
                // in this source file.
1085
                if (windowsLocaleName[i] == L'_')
1086
                {
1087
                    // Keep the base locale, without variant
1088
                    // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1089
                    locName[i] = '\0';
1090
                    tmpLen = i;
1091
                    bLookup = TRUE;
1092
                    break;
1093
                }
1094
                else if (windowsLocaleName[i] == L'-')
1095
                {
1096
                    // Windows names use -, ICU uses _
1097
                    locName[i] = '_';
1098
                }
1099
                else if (windowsLocaleName[i] == L'\0')
1100
                {
1101
                    // No point in doing more work than necessary
1102
                    break;
1103
                }
1104
            }
1105
            // TODO: Need to understand this better, why isn't it an alias?
1106
            FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1107
            pPosixID = locName;
1108
        }
1109
    }
1110
#endif
1111
1112
0
    if (bLookup) {
1113
0
        const char *pCandidate = NULL;
1114
0
        langID = LANGUAGE_LCID(hostid);
1115
1116
0
        for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1117
0
            if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1118
0
                pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1119
0
                break;
1120
0
            }
1121
0
        }
1122
1123
        /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1124
           If a match in the hardcoded table is longer than the Windows locale name without
1125
           variant, we use the one as the result */
1126
0
        if (pCandidate && (pPosixID == NULL || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1127
0
            pPosixID = pCandidate;
1128
0
        }
1129
0
    }
1130
1131
0
    if (pPosixID) {
1132
0
        int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
1133
0
        int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1134
0
        uprv_memcpy(posixID, pPosixID, copyLen);
1135
0
        if (resLen < posixIDCapacity) {
1136
0
            posixID[resLen] = 0;
1137
0
            if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1138
0
                *status = U_ZERO_ERROR;
1139
0
            }
1140
0
        } else if (resLen == posixIDCapacity) {
1141
0
            *status = U_STRING_NOT_TERMINATED_WARNING;
1142
0
        } else {
1143
0
            *status = U_BUFFER_OVERFLOW_ERROR;
1144
0
        }
1145
0
        return resLen;
1146
0
    }
1147
1148
    /* no match found */
1149
0
    *status = U_ILLEGAL_ARGUMENT_ERROR;
1150
0
    return -1;
1151
0
}
1152
1153
/*
1154
//////////////////////////////////////
1155
//
1156
// POSIX --> LCID
1157
// This should only be called from uloc_getLCID.
1158
// The locale ID must be in canonical form.
1159
//
1160
/////////////////////////////////////
1161
*/
1162
U_CAPI uint32_t
1163
uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
1164
0
{
1165
0
    if (U_FAILURE(*status)) {
1166
0
        return 0;
1167
0
    }
1168
1169
    // The purpose of this function is to leverage the Windows platform name->lcid
1170
    // conversion functionality when available.
1171
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1172
    int32_t len;
1173
    char baseName[ULOC_FULLNAME_CAPACITY] = {};
1174
    const char * mylocaleID = localeID;
1175
1176
    // Check any for keywords.
1177
    if (uprv_strchr(localeID, '@'))
1178
    {
1179
        icu::CharString collVal;
1180
        {
1181
            icu::CharStringByteSink sink(&collVal);
1182
            ulocimp_getKeywordValue(localeID, "collation", sink, status);
1183
        }
1184
        if (U_SUCCESS(*status) && !collVal.isEmpty())
1185
        {
1186
            // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1187
            return 0;
1188
        }
1189
        else
1190
        {
1191
            // If the locale ID contains keywords other than collation, just use the base name.
1192
            len = uloc_getBaseName(localeID, baseName, UPRV_LENGTHOF(baseName) - 1, status);
1193
1194
            if (U_SUCCESS(*status) && len > 0)
1195
            {
1196
                baseName[len] = 0;
1197
                mylocaleID = baseName;
1198
            }
1199
        }
1200
    }
1201
1202
    char asciiBCP47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1203
    // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
1204
    (void)uloc_toLanguageTag(mylocaleID, asciiBCP47Tag, UPRV_LENGTHOF(asciiBCP47Tag), FALSE, status);
1205
1206
    if (U_SUCCESS(*status))
1207
    {
1208
        // Need it to be UTF-16, not 8-bit
1209
        wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1210
        int32_t i;
1211
        for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1212
        {
1213
            if (asciiBCP47Tag[i] == '\0')
1214
            {
1215
                break;
1216
            }
1217
            else
1218
            {
1219
                // Copy the character
1220
                bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1221
            }
1222
        }
1223
1224
        if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1225
        {
1226
            // Ensure it's null terminated
1227
            bcp47Tag[i] = L'\0';
1228
            LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);
1229
            if (lcid > 0)
1230
            {
1231
                // Found LCID from windows, return that one, unless its completely ambiguous
1232
                // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1233
                // for this process.
1234
                if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1235
                {
1236
                    return lcid;
1237
                }
1238
            }
1239
        }
1240
    }
1241
#else
1242
0
    (void) localeID; // Suppress unused variable warning.
1243
0
#endif
1244
1245
    // Nothing found, or not implemented.
1246
0
    return 0;
1247
0
}
1248
1249
U_CAPI uint32_t
1250
uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1251
0
{
1252
    // This function does the table lookup when native platform name->lcid conversion isn't available,
1253
    // or for locales that don't follow patterns the platform expects.
1254
0
    uint32_t   low    = 0;
1255
0
    uint32_t   high   = gLocaleCount;
1256
0
    uint32_t   mid;
1257
0
    uint32_t   oldmid = 0;
1258
0
    int32_t    compVal;
1259
1260
0
    uint32_t   value         = 0;
1261
0
    uint32_t   fallbackValue = (uint32_t)-1;
1262
0
    UErrorCode myStatus;
1263
0
    uint32_t   idx;
1264
1265
    /* Check for incomplete id. */
1266
0
    if (!langID || !posixID || uprv_strlen(langID) < 2 || uprv_strlen(posixID) < 2) {
1267
0
        return 0;
1268
0
    }
1269
1270
    /*Binary search for the map entry for normal cases */
1271
1272
0
    while (high > low)  /*binary search*/{
1273
1274
0
        mid = (high+low) >> 1; /*Finds median*/
1275
1276
0
        if (mid == oldmid) 
1277
0
            break;
1278
1279
0
        compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1280
0
        if (compVal < 0){
1281
0
            high = mid;
1282
0
        }
1283
0
        else if (compVal > 0){
1284
0
            low = mid;
1285
0
        }
1286
0
        else /*we found it*/{
1287
0
            return getHostID(&gPosixIDmap[mid], posixID, status);
1288
0
        }
1289
0
        oldmid = mid;
1290
0
    }
1291
1292
    /*
1293
     * Sometimes we can't do a binary search on posixID because some LCIDs
1294
     * go to different locales.  We hit one of those special cases.
1295
     */
1296
0
    for (idx = 0; idx < gLocaleCount; idx++ ) {
1297
0
        myStatus = U_ZERO_ERROR;
1298
0
        value = getHostID(&gPosixIDmap[idx], posixID, &myStatus);
1299
0
        if (myStatus == U_ZERO_ERROR) {
1300
0
            return value;
1301
0
        }
1302
0
        else if (myStatus == U_USING_FALLBACK_WARNING) {
1303
0
            fallbackValue = value;
1304
0
        }
1305
0
    }
1306
1307
0
    if (fallbackValue != (uint32_t)-1) {
1308
0
        *status = U_USING_FALLBACK_WARNING;
1309
0
        return fallbackValue;
1310
0
    }
1311
1312
    /* no match found */
1313
0
    *status = U_ILLEGAL_ARGUMENT_ERROR;
1314
0
    return 0;   /* return international (root) */
1315
0
}