Coverage Report

Created: 2025-06-13 06:34

/src/icu/icu4c/source/common/locmap.cpp
Line
Count
Source (jump to first uncovered line)
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
 **********************************************************************
5
 *   Copyright (C) 1996-2016, International Business Machines
6
 *   Corporation and others.  All Rights Reserved.
7
 **********************************************************************
8
 *
9
 * Provides functionality for mapping between
10
 * LCID and Posix IDs or ICU locale to codepage
11
 *
12
 * Note: All classes and code in this file are
13
 *       intended for internal use only.
14
 *
15
 * Methods of interest:
16
 *   unsigned long convertToLCID(const char*);
17
 *   const char* convertToPosix(unsigned long);
18
 *
19
 * Kathleen Wilson, 4/30/96
20
 *
21
 *  Date        Name        Description
22
 *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added
23
 *                          setId() method and safety check against 
24
 *                          MAX_ID_LENGTH.
25
 * 04/23/99     stephen     Added C wrapper for convertToPosix.
26
 * 09/18/00     george      Removed the memory leaks.
27
 * 08/23/01     george      Convert to C
28
 */
29
30
#include "locmap.h"
31
#include "charstr.h"
32
#include "cstring.h"
33
#include "cmemory.h"
34
#include "ulocimp.h"
35
#include "unicode/uloc.h"
36
37
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
38
#include <windows.h>
39
#include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID
40
#endif
41
42
/*
43
 * Note:
44
 * The mapping from Win32 locale ID numbers to POSIX locale strings should
45
 * be the faster one.
46
 *
47
 * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
48
 * [MS-LCID] Windows Language Code Identifier (LCID) Reference
49
 */
50
51
namespace {
52
53
/*
54
////////////////////////////////////////////////
55
//
56
// Internal Classes for LCID <--> POSIX Mapping
57
//
58
/////////////////////////////////////////////////
59
*/
60
61
typedef struct ILcidPosixElement
62
{
63
    const uint32_t hostID;
64
    const char * const posixID;
65
} ILcidPosixElement;
66
67
typedef struct ILcidPosixMap
68
{
69
    const uint32_t numRegions;
70
    const struct ILcidPosixElement* const regionMaps;
71
} ILcidPosixMap;
72
73
74
/*
75
/////////////////////////////////////////////////
76
//
77
// Easy macros to make the LCID <--> POSIX Mapping
78
//
79
/////////////////////////////////////////////////
80
*/
81
82
/**
83
 * The standard one language/one country mapping for LCID.
84
 * The first element must be the language, and the following
85
 * elements are the language with the country.
86
 * @param hostID LCID in host format such as 0x044d
87
 * @param languageID posix ID of just the language such as 'de'
88
 * @param posixID posix ID of the language_TERRITORY such as 'de_CH'
89
 */
90
#define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \
91
constexpr ILcidPosixElement locmap_ ## languageID [] = { \
92
    {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \
93
    {hostID, #posixID}, \
94
};
95
96
/**
97
 * Define a subtable by ID
98
 * @param id the POSIX ID, either a language or language_TERRITORY
99
 */
100
#define ILCID_POSIX_SUBTABLE(id) \
101
constexpr ILcidPosixElement locmap_ ## id [] =
102
103
104
/**
105
 * Create the map for the posixID. This macro supposes that the language string
106
 * name is the same as the global variable name, and that the first element
107
 * in the ILcidPosixElement is just the language.
108
 * @param _posixID the full POSIX ID for this entry.
109
 */
110
#define ILCID_POSIX_MAP(_posixID) \
111
    {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID}
112
113
/*
114
////////////////////////////////////////////
115
//
116
// Create the table of LCID to POSIX Mapping
117
// None of it should be dynamically created.
118
//
119
// Keep static locale variables inside the function so that
120
// it can be created properly during static init.
121
//
122
// Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier 
123
//       (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx
124
//
125
//       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be
126
//       maintained for support of older Windows version.
127
//       Update: Windows 7 (091130)
128
//
129
// Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain
130
//       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is
131
//       called from uloc_getLCID(), keywords other than collation are already removed. If we really need
132
//       to support other keywords in this mapping data, we must update the implementation.
133
////////////////////////////////////////////
134
*/
135
136
// TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as 
137
// LocaleNameToLCID and LCIDToLocaleName provide 90% of these.
138
139
ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)
140
141
ILCID_POSIX_SUBTABLE(ar) {
142
    {0x01,   "ar"},
143
    {0x3801, "ar_AE"},
144
    {0x3c01, "ar_BH"},
145
    {0x1401, "ar_DZ"},
146
    {0x0c01, "ar_EG"},
147
    {0x0801, "ar_IQ"},
148
    {0x2c01, "ar_JO"},
149
    {0x3401, "ar_KW"},
150
    {0x3001, "ar_LB"},
151
    {0x1001, "ar_LY"},
152
    {0x1801, "ar_MA"},
153
    {0x1801, "ar_MO"},
154
    {0x2001, "ar_OM"},
155
    {0x4001, "ar_QA"},
156
    {0x0401, "ar_SA"},
157
    {0x2801, "ar_SY"},
158
    {0x1c01, "ar_TN"},
159
    {0x2401, "ar_YE"}
160
};
161
162
ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)
163
ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)
164
ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)
165
166
ILCID_POSIX_SUBTABLE(az) {
167
    {0x2c,   "az"},
168
    {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */
169
    {0x742c, "az_Cyrl"},  /* Cyrillic based */
170
    {0x042c, "az_Latn_AZ"}, /* Latin based */
171
    {0x782c, "az_Latn"}, /* Latin based */
172
    {0x042c, "az_AZ"} /* Latin based */
173
};
174
175
ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)
176
ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)
177
178
/*ILCID_POSIX_SUBTABLE(ber) {
179
    {0x5f,   "ber"},
180
    {0x045f, "ber_Arab_DZ"},
181
    {0x045f, "ber_Arab"},
182
    {0x085f, "ber_Latn_DZ"},
183
    {0x085f, "ber_Latn"}
184
};*/
185
186
ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)
187
188
ILCID_POSIX_SUBTABLE(bin) {
189
    {0x66, "bin"},
190
    {0x0466, "bin_NG"}
191
};
192
193
ILCID_POSIX_SUBTABLE(bn) {
194
    {0x45,   "bn"},
195
    {0x0845, "bn_BD"},
196
    {0x0445, "bn_IN"}
197
};
198
199
ILCID_POSIX_SUBTABLE(bo) {
200
    {0x51,   "bo"},
201
    {0x0851, "bo_BT"},
202
    {0x0451, "bo_CN"},
203
    {0x0c51, "dz_BT"}
204
};
205
206
ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)
207
208
ILCID_POSIX_SUBTABLE(ca) {
209
    {0x03,   "ca"},
210
    {0x0403, "ca_ES"},
211
    {0x0803, "ca_ES_VALENCIA"}
212
};
213
214
ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)
215
216
ILCID_POSIX_SUBTABLE(chr) {
217
    {0x05c,  "chr"},
218
    {0x7c5c, "chr_Cher"},
219
    {0x045c, "chr_Cher_US"},
220
    {0x045c, "chr_US"}
221
};
222
223
// ICU has chosen different names for these.
224
ILCID_POSIX_SUBTABLE(ckb) {
225
    {0x92,   "ckb"},
226
    {0x7c92, "ckb_Arab"},
227
    {0x0492, "ckb_Arab_IQ"}
228
};
229
230
/* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */
231
ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)
232
233
ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)
234
ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)
235
236
// Windows doesn't know POSIX or BCP47 Unicode phonebook sort names
237
ILCID_POSIX_SUBTABLE(de) {
238
    {0x07,   "de"},
239
    {0x0c07, "de_AT"},
240
    {0x0807, "de_CH"},
241
    {0x0407, "de_DE"},
242
    {0x1407, "de_LI"},
243
    {0x1007, "de_LU"},
244
    {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/
245
    {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/
246
};
247
248
ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)
249
ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)
250
251
// Windows uses an empty string for 'invariant'
252
ILCID_POSIX_SUBTABLE(en) {
253
    {0x09,   "en"},
254
    {0x0c09, "en_AU"},
255
    {0x2809, "en_BZ"},
256
    {0x1009, "en_CA"},
257
    {0x0809, "en_GB"},
258
    {0x3c09, "en_HK"},
259
    {0x3809, "en_ID"},
260
    {0x1809, "en_IE"},
261
    {0x4009, "en_IN"},
262
    {0x2009, "en_JM"},
263
    {0x4409, "en_MY"},
264
    {0x1409, "en_NZ"},
265
    {0x3409, "en_PH"},
266
    {0x4809, "en_SG"},
267
    {0x2C09, "en_TT"},
268
    {0x0409, "en_US"},
269
    {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */
270
    {0x2409, "en_029"},
271
    {0x1c09, "en_ZA"},
272
    {0x3009, "en_ZW"},
273
    {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */
274
    {0x0409, "en_AS"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
275
    {0x0409, "en_GU"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
276
    {0x0409, "en_MH"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
277
    {0x0409, "en_MP"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
278
    {0x0409, "en_UM"}   /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */
279
};
280
281
ILCID_POSIX_SUBTABLE(en_US_POSIX) {
282
    {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */
283
};
284
285
// Windows doesn't know POSIX or BCP47 Unicode traditional sort names
286
ILCID_POSIX_SUBTABLE(es) {
287
    {0x0a,   "es"},
288
    {0x2c0a, "es_AR"},
289
    {0x400a, "es_BO"},
290
    {0x340a, "es_CL"},
291
    {0x240a, "es_CO"},
292
    {0x140a, "es_CR"},
293
    {0x5c0a, "es_CU"},
294
    {0x1c0a, "es_DO"},
295
    {0x300a, "es_EC"},
296
    {0x0c0a, "es_ES"},      /*Modern sort.*/
297
    {0x100a, "es_GT"},
298
    {0x480a, "es_HN"},
299
    {0x080a, "es_MX"},
300
    {0x4c0a, "es_NI"},
301
    {0x180a, "es_PA"},
302
    {0x280a, "es_PE"},
303
    {0x500a, "es_PR"},
304
    {0x3c0a, "es_PY"},
305
    {0x440a, "es_SV"},
306
    {0x540a, "es_US"},
307
    {0x380a, "es_UY"},
308
    {0x200a, "es_VE"},
309
    {0x580a, "es_419"},
310
    {0x040a, "es_ES@collation=traditional"},
311
    {0x040a, "es@collation=traditional"}        // Windows will treat this as es-ES@collation=traditional
312
};
313
314
ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)
315
ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)
316
317
/* ISO-639 doesn't distinguish between Persian and Dari.*/
318
ILCID_POSIX_SUBTABLE(fa) {
319
    {0x29,   "fa"},
320
    {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */
321
    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
322
};
323
324
325
/* duplicate for roundtripping */
326
ILCID_POSIX_SUBTABLE(fa_AF) {
327
    {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */
328
    {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */
329
};
330
331
ILCID_POSIX_SUBTABLE(ff) {
332
    {0x67,   "ff"},
333
    {0x7c67, "ff_Latn"},
334
    {0x0867, "ff_Latn_SN"},
335
    {0x0467, "ff_NG"}
336
};
337
338
ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)
339
ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)
340
ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)
341
342
ILCID_POSIX_SUBTABLE(fr) {
343
    {0x0c,   "fr"},
344
    {0x080c, "fr_BE"},
345
    {0x0c0c, "fr_CA"},
346
    {0x240c, "fr_CD"},
347
    {0x240c, "fr_CG"},
348
    {0x100c, "fr_CH"},
349
    {0x300c, "fr_CI"},
350
    {0x2c0c, "fr_CM"},
351
    {0x040c, "fr_FR"},
352
    {0x3c0c, "fr_HT"},
353
    {0x140c, "fr_LU"},
354
    {0x380c, "fr_MA"},
355
    {0x180c, "fr_MC"},
356
    {0x340c, "fr_ML"},
357
    {0x200c, "fr_RE"},
358
    {0x280c, "fr_SN"},
359
    {0xe40c, "fr_015"},
360
    {0x1c0c, "fr_029"}
361
};
362
363
ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)
364
365
ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)
366
367
ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */
368
    {0x3c,   "ga"},
369
    {0x083c, "ga_IE"},
370
    {0x043c, "gd_GB"}
371
};
372
373
ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */
374
    {0x91,   "gd"},
375
    {0x0491, "gd_GB"}
376
};
377
378
ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)
379
ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)
380
ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)
381
ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)
382
383
ILCID_POSIX_SUBTABLE(ha) {
384
    {0x68,   "ha"},
385
    {0x7c68, "ha_Latn"},
386
    {0x0468, "ha_Latn_NG"},
387
};
388
389
ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)
390
ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)
391
ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)
392
393
/* This LCID is really four different locales.*/
394
ILCID_POSIX_SUBTABLE(hr) {
395
    {0x1a,   "hr"},
396
    {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */
397
    {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */
398
    {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */
399
    {0x781a, "bs"},     /* Bosnian */
400
    {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */
401
    {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */
402
    {0x101a, "hr_BA"},  /* Croatian in Bosnia */
403
    {0x041a, "hr_HR"},  /* Croatian*/
404
    {0x2c1a, "sr_Latn_ME"},
405
    {0x241a, "sr_Latn_RS"},
406
    {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */
407
    {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/
408
    {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */
409
    {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */
410
    {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/
411
    {0x301a, "sr_Cyrl_ME"},
412
    {0x281a, "sr_Cyrl_RS"},
413
    {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */
414
    {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */
415
};
416
417
ILCID_POSIX_SUBTABLE(hsb) {
418
    {0x2E,   "hsb"},
419
    {0x042E, "hsb_DE"},
420
    {0x082E, "dsb_DE"},
421
    {0x7C2E, "dsb"},
422
};
423
424
ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)
425
ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)
426
427
ILCID_POSIX_SUBTABLE(ibb) {
428
    {0x69, "ibb"},
429
    {0x0469, "ibb_NG"}
430
};
431
432
ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)
433
ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)
434
ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)
435
ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)
436
437
ILCID_POSIX_SUBTABLE(it) {
438
    {0x10,   "it"},
439
    {0x0810, "it_CH"},
440
    {0x0410, "it_IT"}
441
};
442
443
ILCID_POSIX_SUBTABLE(iu) {
444
    {0x5d,   "iu"},
445
    {0x045d, "iu_Cans_CA"},
446
    {0x785d, "iu_Cans"},
447
    {0x085d, "iu_Latn_CA"},
448
    {0x7c5d, "iu_Latn"}
449
};
450
451
ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/
452
ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)
453
ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)
454
ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)
455
ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)
456
ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)
457
ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)
458
459
ILCID_POSIX_SUBTABLE(ko) {
460
    {0x12,   "ko"},
461
    {0x0812, "ko_KP"},
462
    {0x0412, "ko_KR"}
463
};
464
465
ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)
466
ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)
467
468
ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */
469
    {0x60,   "ks"},
470
    {0x0460, "ks_Arab_IN"},
471
    {0x0860, "ks_Deva_IN"}
472
};
473
474
ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */
475
476
ILCID_POSIX_SUBTABLE(la) {
477
    {0x76,   "la"},
478
    {0x0476, "la_001"},
479
    {0x0476, "la_IT"}       /*Left in for compatibility*/
480
};
481
482
ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)
483
ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)
484
ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)
485
ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)
486
ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)
487
ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)
488
ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)
489
490
ILCID_POSIX_SUBTABLE(mn) {
491
    {0x50,   "mn"},
492
    {0x0450, "mn_MN"},
493
    {0x7c50, "mn_Mong"},
494
    {0x0850, "mn_Mong_CN"},
495
    {0x0850, "mn_CN"},
496
    {0x7850, "mn_Cyrl"},
497
    {0x0c50, "mn_Mong_MN"}
498
};
499
500
ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)
501
ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)
502
ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)
503
504
ILCID_POSIX_SUBTABLE(ms) {
505
    {0x3e,   "ms"},
506
    {0x083e, "ms_BN"},   /* Brunei Darussalam*/
507
    {0x043e, "ms_MY"}    /* Malaysia*/
508
};
509
510
ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)
511
ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)
512
513
ILCID_POSIX_SUBTABLE(ne) {
514
    {0x61,   "ne"},
515
    {0x0861, "ne_IN"},   /* India*/
516
    {0x0461, "ne_NP"}    /* Nepal*/
517
};
518
519
ILCID_POSIX_SUBTABLE(nl) {
520
    {0x13,   "nl"},
521
    {0x0813, "nl_BE"},
522
    {0x0413, "nl_NL"}
523
};
524
525
/* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/
526
// TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.
527
ILCID_POSIX_SUBTABLE(no) {
528
    {0x14,   "no"},     /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */ 
529
    {0x7c14, "nb"},     /* really nb */
530
    {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */
531
    {0x0414, "no_NO"},  /* really nb_NO */
532
    {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */
533
    {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */
534
    {0x0814, "no_NO_NY"}/* really nn_NO */
535
};
536
537
ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */
538
ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)
539
540
ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */
541
    {0x72,   "om"},
542
    {0x0472, "om_ET"},
543
    {0x0472, "gaz_ET"}
544
};
545
546
/* Declared as or_IN to get around compiler errors*/
547
ILCID_POSIX_SUBTABLE(or_IN) {
548
    {0x48,   "or"},
549
    {0x0448, "or_IN"},
550
};
551
552
ILCID_POSIX_SUBTABLE(pa) {
553
    {0x46,   "pa"},
554
    {0x0446, "pa_IN"},
555
    {0x0846, "pa_Arab_PK"},
556
    {0x0846, "pa_PK"}
557
};
558
559
ILCID_POSIX_SUBTABLE(pap) {
560
    {0x79, "pap"},
561
    {0x0479, "pap_029"},
562
    {0x0479, "pap_AN"}     /*Left in for compatibility*/
563
};
564
565
ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)
566
ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)
567
568
ILCID_POSIX_SUBTABLE(pt) {
569
    {0x16,   "pt"},
570
    {0x0416, "pt_BR"},
571
    {0x0816, "pt_PT"}
572
};
573
574
ILCID_POSIX_SUBTABLE(qu) {
575
    {0x6b,   "qu"},
576
    {0x046b, "qu_BO"},
577
    {0x086b, "qu_EC"},
578
    {0x0C6b, "qu_PE"},
579
    {0x046b, "quz_BO"},
580
    {0x086b, "quz_EC"},
581
    {0x0C6b, "quz_PE"}
582
};
583
584
ILCID_POSIX_SUBTABLE(quc) {
585
    {0x93,   "quc"},
586
    {0x0493, "quc_CO"},
587
    /*
588
        "quc_Latn_GT" is an exceptional case. Language ID of "quc"
589
        is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be
590
        under the group of "qut". "qut" is a retired ISO 639-3 language
591
        code for West Central Quiche, and merged to "quc".
592
        It looks Windows previously reserved "qut" for K'iche', but,
593
        decided to use "quc" when adding a locale for K'iche' (Guatemala).
594
595
        This data structure used here assumes language ID bits in
596
        LCID is unique for alphabetic language code. But this is not true
597
        for "quc_Latn_GT". If we don't have the data below, LCID look up
598
        by alphabetic locale ID (POSIX) will fail. The same entry is found
599
        under "qut" below, which is required for reverse look up.
600
    */
601
    {0x0486, "quc_Latn_GT"}
602
};
603
604
ILCID_POSIX_SUBTABLE(qut) {
605
    {0x86,   "qut"},
606
    {0x0486, "qut_GT"},
607
    /*
608
        See the note in "quc" above.
609
    */
610
    {0x0486, "quc_Latn_GT"}
611
};
612
613
ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)
614
615
ILCID_POSIX_SUBTABLE(ro) {
616
    {0x18,   "ro"},
617
    {0x0418, "ro_RO"},
618
    {0x0818, "ro_MD"}
619
};
620
621
// TODO: This is almost certainly 'wrong'.  0 in Windows is a synonym for LOCALE_USER_DEFAULT.
622
// More likely this is a similar concept to the Windows 0x7f Invariant locale ""
623
// (Except that it's not invariant in ICU)
624
ILCID_POSIX_SUBTABLE(root) {
625
    {0x00,   "root"}
626
};
627
628
ILCID_POSIX_SUBTABLE(ru) {
629
    {0x19,   "ru"},
630
    {0x0419, "ru_RU"},
631
    {0x0819, "ru_MD"}
632
};
633
634
ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)
635
ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)
636
ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)
637
638
ILCID_POSIX_SUBTABLE(sd) {
639
    {0x59,   "sd"},
640
    {0x0459, "sd_Deva_IN"},
641
    {0x0459, "sd_IN"},
642
    {0x0859, "sd_Arab_PK"},
643
    {0x0859, "sd_PK"},
644
    {0x7c59, "sd_Arab"}
645
};
646
647
ILCID_POSIX_SUBTABLE(se) {
648
    {0x3b,   "se"},
649
    {0x0c3b, "se_FI"},
650
    {0x043b, "se_NO"},
651
    {0x083b, "se_SE"},
652
    {0x783b, "sma"},
653
    {0x183b, "sma_NO"},
654
    {0x1c3b, "sma_SE"},
655
    {0x7c3b, "smj"},
656
    {0x703b, "smn"},
657
    {0x743b, "sms"},
658
    {0x103b, "smj_NO"},
659
    {0x143b, "smj_SE"},
660
    {0x243b, "smn_FI"},
661
    {0x203b, "sms_FI"},
662
};
663
664
ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)
665
ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)
666
ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)
667
668
ILCID_POSIX_SUBTABLE(so) {
669
    {0x77,   "so"},
670
    {0x0477, "so_SO"}
671
};
672
673
ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)
674
ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)
675
676
ILCID_POSIX_SUBTABLE(sv) {
677
    {0x1d,   "sv"},
678
    {0x081d, "sv_FI"},
679
    {0x041d, "sv_SE"}
680
};
681
682
ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)
683
ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)
684
685
ILCID_POSIX_SUBTABLE(ta) {
686
    {0x49,   "ta"},
687
    {0x0449, "ta_IN"},
688
    {0x0849, "ta_LK"}
689
};
690
691
ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)
692
693
/* Cyrillic based by default */
694
ILCID_POSIX_SUBTABLE(tg) {
695
    {0x28,   "tg"},
696
    {0x7c28, "tg_Cyrl"},
697
    {0x0428, "tg_Cyrl_TJ"}
698
};
699
700
ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)
701
702
ILCID_POSIX_SUBTABLE(ti) {
703
    {0x73,   "ti"},
704
    {0x0873, "ti_ER"},
705
    {0x0473, "ti_ET"}
706
};
707
708
ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)
709
710
ILCID_POSIX_SUBTABLE(tn) {
711
    {0x32,   "tn"},
712
    {0x0832, "tn_BW"},
713
    {0x0432, "tn_ZA"}
714
};
715
716
ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)
717
ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)
718
ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)
719
720
ILCID_POSIX_SUBTABLE(tzm) {
721
    {0x5f,   "tzm"},
722
    {0x7c5f, "tzm_Latn"},
723
    {0x085f, "tzm_Latn_DZ"},
724
    {0x105f, "tzm_Tfng_MA"},
725
    {0x045f, "tzm_Arab_MA"},
726
    {0x045f, "tmz"}
727
};
728
729
ILCID_POSIX_SUBTABLE(ug) {
730
    {0x80,   "ug"},
731
    {0x0480, "ug_CN"},
732
    {0x0480, "ug_Arab_CN"}
733
};
734
735
ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)
736
737
ILCID_POSIX_SUBTABLE(ur) {
738
    {0x20,   "ur"},
739
    {0x0820, "ur_IN"},
740
    {0x0420, "ur_PK"}
741
};
742
743
ILCID_POSIX_SUBTABLE(uz) {
744
    {0x43,   "uz"},
745
    {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */
746
    {0x7843, "uz_Cyrl"},  /* Cyrillic based */
747
    {0x0843, "uz_UZ"},  /* Cyrillic based */
748
    {0x0443, "uz_Latn_UZ"}, /* Latin based */
749
    {0x7c43, "uz_Latn"} /* Latin based */
750
};
751
752
ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */
753
    {0x33,   "ve"},
754
    {0x0433, "ve_ZA"},
755
    {0x0433, "ven_ZA"}
756
};
757
758
ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)
759
ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)
760
ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)
761
762
ILCID_POSIX_SUBTABLE(yi) {
763
    {0x003d, "yi"},
764
    {0x043d, "yi_001"}
765
};
766
767
ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)
768
769
// Windows & ICU tend to different names for some of these
770
// TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.
771
ILCID_POSIX_SUBTABLE(zh) {
772
    {0x0004, "zh_Hans"},
773
    {0x7804, "zh"},
774
    {0x0804, "zh_CN"},
775
    {0x0804, "zh_Hans_CN"},
776
    {0x0c04, "zh_Hant_HK"},
777
    {0x0c04, "zh_HK"},
778
    {0x1404, "zh_Hant_MO"},
779
    {0x1404, "zh_MO"},
780
    {0x1004, "zh_Hans_SG"},
781
    {0x1004, "zh_SG"},
782
    {0x0404, "zh_Hant_TW"},
783
    {0x7c04, "zh_Hant"},
784
    {0x0404, "zh_TW"},
785
    {0x30404,"zh_Hant_TW"},     /* Bopomofo order */
786
    {0x30404,"zh_TW"},          /* Bopomofo order */
787
    {0x20004,"zh@collation=stroke"},
788
    {0x20404,"zh_Hant@collation=stroke"},
789
    {0x20404,"zh_Hant_TW@collation=stroke"},
790
    {0x20404,"zh_TW@collation=stroke"},
791
    {0x20804,"zh_Hans@collation=stroke"},
792
    {0x20804,"zh_Hans_CN@collation=stroke"},
793
    {0x20804,"zh_CN@collation=stroke"}
794
    // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804
795
};
796
797
ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)
798
799
/* This must be static and grouped by LCID. */
800
constexpr ILcidPosixMap gPosixIDmap[] = {
801
    ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */
802
    ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */
803
    ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */
804
    ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */
805
    ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */
806
    ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */
807
    ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */
808
    ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */
809
/*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */
810
    ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */
811
    ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */
812
    ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */
813
    ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */
814
    ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */
815
    ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */
816
    ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */
817
    ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */
818
    ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */
819
    ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */
820
    ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */
821
    ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */
822
    ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */
823
    ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */
824
    ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */
825
    ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */
826
    ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */
827
    ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */
828
    ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */
829
    ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */
830
    ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */
831
    ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */
832
    ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */
833
    ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */
834
    ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */
835
    ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */
836
    ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */
837
    ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */
838
    ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */
839
    ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */
840
    ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */
841
    ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */
842
    ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */
843
    ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */
844
    ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */
845
    ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */
846
    ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */
847
    ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */
848
    ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */
849
    ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */
850
    ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */
851
    ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */
852
    ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */
853
    ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */
854
    ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */
855
    ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */
856
    ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */
857
    ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */
858
    ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */
859
    ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */
860
    ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */
861
    ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */
862
    ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */
863
    ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */
864
    ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */
865
    ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */
866
    ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */
867
    ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */
868
    ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */
869
    ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */
870
    ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */
871
    ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */
872
    ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */
873
    ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */
874
    ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */
875
    ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */
876
    ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */
877
    ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */
878
    ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */
879
    ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */
880
    ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */
881
    ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */
882
    ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */
883
    ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */
884
    ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */
885
    ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */
886
    ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */
887
/*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */
888
    ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */
889
    ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */
890
/*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */
891
    ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */
892
    ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */
893
    ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */
894
    ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */
895
    ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */
896
    ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */
897
    ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */
898
    ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */
899
    ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */
900
    ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */
901
    ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */
902
    ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */
903
    ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */
904
    ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */
905
    ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */
906
    ILCID_POSIX_MAP(root),  /*  root                          0x00 */
907
    ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */
908
    ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */
909
    ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */
910
    ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */
911
    ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */
912
    ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */
913
/*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */
914
    ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */
915
    ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */
916
    ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */
917
    ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */
918
    ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */
919
/*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */
920
    ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */
921
    ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */
922
    ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */
923
    ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */
924
    ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */
925
    ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */
926
    ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */
927
    ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */
928
    ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */
929
    ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */
930
    ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */
931
    ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */
932
    ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */
933
    ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */
934
    ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */
935
    ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */
936
    ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */
937
    ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */
938
    ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */
939
    ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */
940
    ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */
941
    ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */
942
    ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */
943
    ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */
944
    ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */
945
    ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */
946
    ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */
947
};
948
949
constexpr uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);
950
951
/**
952
 * Do not call this function. It is called by hostID.
953
 * The function is not private because this struct must stay as a C struct,
954
 * and this is an internal class.
955
 */
956
int32_t
957
idCmp(const char* id1, const char* id2)
958
0
{
959
0
    int32_t diffIdx = 0;
960
0
    while (*id1 == *id2 && *id1 != 0) {
961
0
        diffIdx++;
962
0
        id1++;
963
0
        id2++;
964
0
    }
965
0
    return diffIdx;
966
0
}
967
968
/**
969
 * Searches for a Windows LCID
970
 *
971
 * @param posixID the Posix style locale id.
972
 * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has
973
 *               no equivalent Windows LCID.
974
 * @return the LCID
975
 */
976
uint32_t
977
getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode& status)
978
0
{
979
0
    if (U_FAILURE(status)) { return locmap_root->hostID; }
980
0
    int32_t bestIdx = 0;
981
0
    int32_t bestIdxDiff = 0;
982
0
    int32_t posixIDlen = static_cast<int32_t>(uprv_strlen(posixID));
983
0
    uint32_t idx;
984
985
0
    for (idx = 0; idx < this_0->numRegions; idx++ ) {
986
0
        int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);
987
0
        if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) {
988
0
            if (posixIDlen == sameChars) {
989
                /* Exact match */
990
0
                return this_0->regionMaps[idx].hostID;
991
0
            }
992
0
            bestIdxDiff = sameChars;
993
0
            bestIdx = idx;
994
0
        }
995
0
    }
996
    /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */
997
    /* We also have to make sure that sid and si and similar string subsets don't match. */
998
0
    if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')
999
0
        && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)
1000
0
    {
1001
0
        status = U_USING_FALLBACK_WARNING;
1002
0
        return this_0->regionMaps[bestIdx].hostID;
1003
0
    }
1004
1005
    /*no match found */
1006
0
    status = U_ILLEGAL_ARGUMENT_ERROR;
1007
0
    return locmap_root->hostID;
1008
0
}
1009
1010
const char*
1011
getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)
1012
0
{
1013
0
    uint32_t i;
1014
0
    for (i = 0; i < this_0->numRegions; i++)
1015
0
    {
1016
0
        if (this_0->regionMaps[i].hostID == hostID)
1017
0
        {
1018
0
            return this_0->regionMaps[i].posixID;
1019
0
        }
1020
0
    }
1021
1022
    /* If you get here, then no matching region was found,
1023
       so return the language id with the wild card region. */
1024
0
    return this_0->regionMaps[0].posixID;
1025
0
}
1026
1027
/*
1028
//////////////////////////////////////
1029
//
1030
// LCID --> POSIX
1031
//
1032
/////////////////////////////////////
1033
*/
1034
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1035
/*
1036
 * Various language tags needs to be changed:
1037
 * quz -> qu
1038
 * prs -> fa
1039
 */
1040
void FIX_LANGUAGE_ID_TAG(char* buffer, int32_t len) {
1041
    if (len >= 3) {
1042
        if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') {
1043
            buffer[2] = 0;
1044
            uprv_strcat(buffer, buffer+3);
1045
        } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') {
1046
            buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0;
1047
            uprv_strcat(buffer, buffer+3);
1048
        }
1049
    }
1050
}
1051
#endif
1052
1053
}  // namespace
1054
1055
U_CAPI int32_t
1056
uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)
1057
0
{
1058
0
    uint16_t langID;
1059
0
    uint32_t localeIndex;
1060
0
    UBool bLookup = true;
1061
0
    const char *pPosixID = nullptr;
1062
1063
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1064
    static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");
1065
1066
    char locName[LOCALE_NAME_MAX_LENGTH] = {};
1067
1068
    // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and
1069
    // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for
1070
    // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot
1071
    // use the Windows API to resolve locale ID for this specific case.
1072
    if ((hostid & 0x3FF) != 0x92) {
1073
        int32_t tmpLen = 0;
1074
        char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {};
1075
1076
        // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.
1077
        tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);
1078
        if (tmpLen > 1) {
1079
            int32_t i = 0;
1080
            // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.
1081
            bLookup = false;
1082
            for (i = 0; i < UPRV_LENGTHOF(locName); i++)
1083
            {
1084
                locName[i] = (char)(windowsLocaleName[i]);
1085
1086
                // Windows locale name may contain sorting variant, such as "es-ES_tradnl".
1087
                // In such cases, we need special mapping data found in the hardcoded table
1088
                // in this source file.
1089
                if (windowsLocaleName[i] == L'_')
1090
                {
1091
                    // Keep the base locale, without variant
1092
                    // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?
1093
                    locName[i] = '\0';
1094
                    tmpLen = i;
1095
                    bLookup = true;
1096
                    break;
1097
                }
1098
                else if (windowsLocaleName[i] == L'-')
1099
                {
1100
                    // Windows names use -, ICU uses _
1101
                    locName[i] = '_';
1102
                }
1103
                else if (windowsLocaleName[i] == L'\0')
1104
                {
1105
                    // No point in doing more work than necessary
1106
                    break;
1107
                }
1108
            }
1109
            // TODO: Need to understand this better, why isn't it an alias?
1110
            FIX_LANGUAGE_ID_TAG(locName, tmpLen);
1111
            pPosixID = locName;
1112
        }
1113
    }
1114
#endif
1115
1116
0
    if (bLookup) {
1117
0
        const char *pCandidate = nullptr;
1118
0
        langID = LANGUAGE_LCID(hostid);
1119
1120
0
        for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) {
1121
0
            if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) {
1122
0
                pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);
1123
0
                break;
1124
0
            }
1125
0
        }
1126
1127
        /* On Windows, when locale name has a variant, we still look up the hardcoded table.
1128
           If a match in the hardcoded table is longer than the Windows locale name without
1129
           variant, we use the one as the result */
1130
0
        if (pCandidate && (pPosixID == nullptr || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) {
1131
0
            pPosixID = pCandidate;
1132
0
        }
1133
0
    }
1134
1135
0
    if (pPosixID) {
1136
0
        int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));
1137
0
        int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;
1138
0
        uprv_memcpy(posixID, pPosixID, copyLen);
1139
0
        if (resLen < posixIDCapacity) {
1140
0
            posixID[resLen] = 0;
1141
0
            if (*status == U_STRING_NOT_TERMINATED_WARNING) {
1142
0
                *status = U_ZERO_ERROR;
1143
0
            }
1144
0
        } else if (resLen == posixIDCapacity) {
1145
0
            *status = U_STRING_NOT_TERMINATED_WARNING;
1146
0
        } else {
1147
0
            *status = U_BUFFER_OVERFLOW_ERROR;
1148
0
        }
1149
0
        return resLen;
1150
0
    }
1151
1152
    /* no match found */
1153
0
    *status = U_ILLEGAL_ARGUMENT_ERROR;
1154
0
    return 0;
1155
0
}
1156
1157
/*
1158
//////////////////////////////////////
1159
//
1160
// POSIX --> LCID
1161
// This should only be called from uloc_getLCID.
1162
// The locale ID must be in canonical form.
1163
//
1164
/////////////////////////////////////
1165
*/
1166
U_CAPI uint32_t
1167
uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)
1168
0
{
1169
0
    if (U_FAILURE(*status)) {
1170
0
        return 0;
1171
0
    }
1172
1173
    // The purpose of this function is to leverage the Windows platform name->lcid
1174
    // conversion functionality when available.
1175
#if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API
1176
    int32_t len;
1177
    icu::CharString baseName;
1178
    const char * mylocaleID = localeID;
1179
1180
    // Check any for keywords.
1181
    if (uprv_strchr(localeID, '@'))
1182
    {
1183
        icu::CharString collVal = ulocimp_getKeywordValue(localeID, "collation", *status);
1184
        if (U_SUCCESS(*status) && !collVal.isEmpty())
1185
        {
1186
            // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.
1187
            return 0;
1188
        }
1189
        else
1190
        {
1191
            // If the locale ID contains keywords other than collation, just use the base name.
1192
            baseName = ulocimp_getBaseName(localeID, *status);
1193
            if (U_SUCCESS(*status) && !baseName.isEmpty())
1194
            {
1195
                mylocaleID = baseName.data();
1196
            }
1197
        }
1198
    }
1199
1200
    // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form
1201
    icu::CharString asciiBCP47Tag = ulocimp_toLanguageTag(mylocaleID, false, *status);
1202
1203
    if (U_SUCCESS(*status))
1204
    {
1205
        // Need it to be UTF-16, not 8-bit
1206
        wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {};
1207
        int32_t i;
1208
        for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)
1209
        {
1210
            if (asciiBCP47Tag[i] == '\0')
1211
            {
1212
                break;
1213
            }
1214
            else
1215
            {
1216
                // Copy the character
1217
                bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);
1218
            }
1219
        }
1220
1221
        if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))
1222
        {
1223
            // Ensure it's null terminated
1224
            bcp47Tag[i] = L'\0';
1225
            LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);
1226
            if (lcid > 0)
1227
            {
1228
                // Found LCID from windows, return that one, unless its completely ambiguous
1229
                // LOCALE_USER_DEFAULT and transients are OK because they will round trip
1230
                // for this process.
1231
                if (lcid != LOCALE_CUSTOM_UNSPECIFIED)
1232
                {
1233
                    return lcid;
1234
                }
1235
            }
1236
        }
1237
    }
1238
#else
1239
0
    (void) localeID; // Suppress unused variable warning.
1240
0
#endif
1241
1242
    // Nothing found, or not implemented.
1243
0
    return 0;
1244
0
}
1245
1246
U_CAPI uint32_t
1247
uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)
1248
0
{
1249
0
    if (U_FAILURE(*status) ||
1250
0
            langID == nullptr ||
1251
0
            posixID == nullptr ||
1252
0
            uprv_strlen(langID) < 2 ||
1253
0
            uprv_strlen(posixID) < 2) {
1254
0
        return locmap_root->hostID;
1255
0
    }
1256
1257
    // This function does the table lookup when native platform name->lcid conversion isn't available,
1258
    // or for locales that don't follow patterns the platform expects.
1259
0
    uint32_t   low    = 0;
1260
0
    uint32_t   high   = gLocaleCount;
1261
0
    uint32_t   mid;
1262
0
    uint32_t   oldmid = 0;
1263
0
    int32_t    compVal;
1264
1265
0
    uint32_t   value         = 0;
1266
0
    uint32_t   fallbackValue = (uint32_t)-1;
1267
0
    UErrorCode myStatus;
1268
0
    uint32_t   idx;
1269
1270
    /*Binary search for the map entry for normal cases */
1271
1272
0
    while (high > low)  /*binary search*/{
1273
1274
0
        mid = (high+low) >> 1; /*Finds median*/
1275
1276
0
        if (mid == oldmid) 
1277
0
            break;
1278
1279
0
        compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);
1280
0
        if (compVal < 0){
1281
0
            high = mid;
1282
0
        }
1283
0
        else if (compVal > 0){
1284
0
            low = mid;
1285
0
        }
1286
0
        else /*we found it*/{
1287
0
            return getHostID(&gPosixIDmap[mid], posixID, *status);
1288
0
        }
1289
0
        oldmid = mid;
1290
0
    }
1291
1292
    /*
1293
     * Sometimes we can't do a binary search on posixID because some LCIDs
1294
     * go to different locales.  We hit one of those special cases.
1295
     */
1296
0
    for (idx = 0; idx < gLocaleCount; idx++ ) {
1297
0
        myStatus = U_ZERO_ERROR;
1298
0
        value = getHostID(&gPosixIDmap[idx], posixID, myStatus);
1299
0
        if (myStatus == U_ZERO_ERROR) {
1300
0
            return value;
1301
0
        }
1302
0
        else if (myStatus == U_USING_FALLBACK_WARNING) {
1303
0
            fallbackValue = value;
1304
0
        }
1305
0
    }
1306
1307
0
    if (fallbackValue != (uint32_t)-1) {
1308
0
        *status = U_USING_FALLBACK_WARNING;
1309
0
        return fallbackValue;
1310
0
    }
1311
1312
    /* no match found */
1313
0
    *status = U_ILLEGAL_ARGUMENT_ERROR;
1314
0
    return locmap_root->hostID;   /* return international (root) */
1315
0
}