/src/icu/icu4c/source/common/locmap.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // © 2016 and later: Unicode, Inc. and others.  | 
2  |  | // License & terms of use: http://www.unicode.org/copyright.html  | 
3  |  | /*  | 
4  |  |  **********************************************************************  | 
5  |  |  *   Copyright (C) 1996-2016, International Business Machines  | 
6  |  |  *   Corporation and others.  All Rights Reserved.  | 
7  |  |  **********************************************************************  | 
8  |  |  *  | 
9  |  |  * Provides functionality for mapping between  | 
10  |  |  * LCID and Posix IDs or ICU locale to codepage  | 
11  |  |  *  | 
12  |  |  * Note: All classes and code in this file are  | 
13  |  |  *       intended for internal use only.  | 
14  |  |  *  | 
15  |  |  * Methods of interest:  | 
16  |  |  *   unsigned long convertToLCID(const char*);  | 
17  |  |  *   const char* convertToPosix(unsigned long);  | 
18  |  |  *  | 
19  |  |  * Kathleen Wilson, 4/30/96  | 
20  |  |  *  | 
21  |  |  *  Date        Name        Description  | 
22  |  |  *  3/11/97     aliu        Fixed off-by-one bug in assignment operator. Added  | 
23  |  |  *                          setId() method and safety check against   | 
24  |  |  *                          MAX_ID_LENGTH.  | 
25  |  |  * 04/23/99     stephen     Added C wrapper for convertToPosix.  | 
26  |  |  * 09/18/00     george      Removed the memory leaks.  | 
27  |  |  * 08/23/01     george      Convert to C  | 
28  |  |  */  | 
29  |  |  | 
30  |  | #include "locmap.h"  | 
31  |  | #include "charstr.h"  | 
32  |  | #include "cstring.h"  | 
33  |  | #include "cmemory.h"  | 
34  |  | #include "ulocimp.h"  | 
35  |  | #include "unicode/uloc.h"  | 
36  |  |  | 
37  |  | #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API  | 
38  |  | #include <windows.h>  | 
39  |  | #include <winnls.h> // LCIDToLocaleName and LocaleNameToLCID  | 
40  |  | #endif  | 
41  |  |  | 
42  |  | /*  | 
43  |  |  * Note:  | 
44  |  |  * The mapping from Win32 locale ID numbers to POSIX locale strings should  | 
45  |  |  * be the faster one.  | 
46  |  |  *  | 
47  |  |  * Windows LCIDs are defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx  | 
48  |  |  * [MS-LCID] Windows Language Code Identifier (LCID) Reference  | 
49  |  |  */  | 
50  |  |  | 
51  |  | namespace { | 
52  |  |  | 
53  |  | /*  | 
54  |  | ////////////////////////////////////////////////  | 
55  |  | //  | 
56  |  | // Internal Classes for LCID <--> POSIX Mapping  | 
57  |  | //  | 
58  |  | /////////////////////////////////////////////////  | 
59  |  | */  | 
60  |  |  | 
61  |  | typedef struct ILcidPosixElement  | 
62  |  | { | 
63  |  |     const uint32_t hostID;  | 
64  |  |     const char * const posixID;  | 
65  |  | } ILcidPosixElement;  | 
66  |  |  | 
67  |  | typedef struct ILcidPosixMap  | 
68  |  | { | 
69  |  |     const uint32_t numRegions;  | 
70  |  |     const struct ILcidPosixElement* const regionMaps;  | 
71  |  | } ILcidPosixMap;  | 
72  |  |  | 
73  |  |  | 
74  |  | /*  | 
75  |  | /////////////////////////////////////////////////  | 
76  |  | //  | 
77  |  | // Easy macros to make the LCID <--> POSIX Mapping  | 
78  |  | //  | 
79  |  | /////////////////////////////////////////////////  | 
80  |  | */  | 
81  |  |  | 
82  |  | /**  | 
83  |  |  * The standard one language/one country mapping for LCID.  | 
84  |  |  * The first element must be the language, and the following  | 
85  |  |  * elements are the language with the country.  | 
86  |  |  * @param hostID LCID in host format such as 0x044d  | 
87  |  |  * @param languageID posix ID of just the language such as 'de'  | 
88  |  |  * @param posixID posix ID of the language_TERRITORY such as 'de_CH'  | 
89  |  |  */  | 
90  |  | #define ILCID_POSIX_ELEMENT_ARRAY(hostID, languageID, posixID) \  | 
91  |  | constexpr ILcidPosixElement locmap_ ## languageID [] = { \ | 
92  |  |     {LANGUAGE_LCID(hostID), #languageID},     /* parent locale */ \ | 
93  |  |     {hostID, #posixID}, \ | 
94  |  | };  | 
95  |  |  | 
96  |  | /**  | 
97  |  |  * Define a subtable by ID  | 
98  |  |  * @param id the POSIX ID, either a language or language_TERRITORY  | 
99  |  |  */  | 
100  |  | #define ILCID_POSIX_SUBTABLE(id) \  | 
101  |  | constexpr ILcidPosixElement locmap_ ## id [] =  | 
102  |  |  | 
103  |  |  | 
104  |  | /**  | 
105  |  |  * Create the map for the posixID. This macro supposes that the language string  | 
106  |  |  * name is the same as the global variable name, and that the first element  | 
107  |  |  * in the ILcidPosixElement is just the language.  | 
108  |  |  * @param _posixID the full POSIX ID for this entry.  | 
109  |  |  */  | 
110  |  | #define ILCID_POSIX_MAP(_posixID) \  | 
111  |  |     {UPRV_LENGTHOF(locmap_ ## _posixID), locmap_ ## _posixID} | 
112  |  |  | 
113  |  | /*  | 
114  |  | ////////////////////////////////////////////  | 
115  |  | //  | 
116  |  | // Create the table of LCID to POSIX Mapping  | 
117  |  | // None of it should be dynamically created.  | 
118  |  | //  | 
119  |  | // Keep static locale variables inside the function so that  | 
120  |  | // it can be created properly during static init.  | 
121  |  | //  | 
122  |  | // Note: This table should be updated periodically. Check the [MS-LCID] Windows Language Code Identifier   | 
123  |  | //       (LCID) Reference defined at https://msdn.microsoft.com/en-us/library/cc233965.aspx  | 
124  |  | //  | 
125  |  | //       Microsoft is moving away from LCID in favor of locale name as of Vista.  This table needs to be  | 
126  |  | //       maintained for support of older Windows version.  | 
127  |  | //       Update: Windows 7 (091130)  | 
128  |  | //  | 
129  |  | // Note: Microsoft assign a different LCID if a locale has a sorting variant. POSIX IDs below may contain  | 
130  |  | //       @collation=XXX, but no other keywords are allowed (at least for now). When uprv_convertToLCID() is  | 
131  |  | //       called from uloc_getLCID(), keywords other than collation are already removed. If we really need  | 
132  |  | //       to support other keywords in this mapping data, we must update the implementation.  | 
133  |  | ////////////////////////////////////////////  | 
134  |  | */  | 
135  |  |  | 
136  |  | // TODO: For Windows ideally this table would be a list of exceptions rather than a complete list as   | 
137  |  | // LocaleNameToLCID and LCIDToLocaleName provide 90% of these.  | 
138  |  |  | 
139  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0436, af, af_ZA)  | 
140  |  |  | 
141  |  | ILCID_POSIX_SUBTABLE(ar) { | 
142  |  |     {0x01,   "ar"}, | 
143  |  |     {0x3801, "ar_AE"}, | 
144  |  |     {0x3c01, "ar_BH"}, | 
145  |  |     {0x1401, "ar_DZ"}, | 
146  |  |     {0x0c01, "ar_EG"}, | 
147  |  |     {0x0801, "ar_IQ"}, | 
148  |  |     {0x2c01, "ar_JO"}, | 
149  |  |     {0x3401, "ar_KW"}, | 
150  |  |     {0x3001, "ar_LB"}, | 
151  |  |     {0x1001, "ar_LY"}, | 
152  |  |     {0x1801, "ar_MA"}, | 
153  |  |     {0x1801, "ar_MO"}, | 
154  |  |     {0x2001, "ar_OM"}, | 
155  |  |     {0x4001, "ar_QA"}, | 
156  |  |     {0x0401, "ar_SA"}, | 
157  |  |     {0x2801, "ar_SY"}, | 
158  |  |     {0x1c01, "ar_TN"}, | 
159  |  |     {0x2401, "ar_YE"} | 
160  |  | };  | 
161  |  |  | 
162  |  | ILCID_POSIX_ELEMENT_ARRAY(0x044d, as, as_IN)  | 
163  |  | ILCID_POSIX_ELEMENT_ARRAY(0x045e, am, am_ET)  | 
164  |  | ILCID_POSIX_ELEMENT_ARRAY(0x047a, arn,arn_CL)  | 
165  |  |  | 
166  |  | ILCID_POSIX_SUBTABLE(az) { | 
167  |  |     {0x2c,   "az"}, | 
168  |  |     {0x082c, "az_Cyrl_AZ"},  /* Cyrillic based */ | 
169  |  |     {0x742c, "az_Cyrl"},  /* Cyrillic based */ | 
170  |  |     {0x042c, "az_Latn_AZ"}, /* Latin based */ | 
171  |  |     {0x782c, "az_Latn"}, /* Latin based */ | 
172  |  |     {0x042c, "az_AZ"} /* Latin based */ | 
173  |  | };  | 
174  |  |  | 
175  |  | ILCID_POSIX_ELEMENT_ARRAY(0x046d, ba, ba_RU)  | 
176  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0423, be, be_BY)  | 
177  |  |  | 
178  |  | /*ILCID_POSIX_SUBTABLE(ber) { | 
179  |  |     {0x5f,   "ber"}, | 
180  |  |     {0x045f, "ber_Arab_DZ"}, | 
181  |  |     {0x045f, "ber_Arab"}, | 
182  |  |     {0x085f, "ber_Latn_DZ"}, | 
183  |  |     {0x085f, "ber_Latn"} | 
184  |  | };*/  | 
185  |  |  | 
186  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0402, bg, bg_BG)  | 
187  |  |  | 
188  |  | ILCID_POSIX_SUBTABLE(bin) { | 
189  |  |     {0x66, "bin"}, | 
190  |  |     {0x0466, "bin_NG"} | 
191  |  | };  | 
192  |  |  | 
193  |  | ILCID_POSIX_SUBTABLE(bn) { | 
194  |  |     {0x45,   "bn"}, | 
195  |  |     {0x0845, "bn_BD"}, | 
196  |  |     {0x0445, "bn_IN"} | 
197  |  | };  | 
198  |  |  | 
199  |  | ILCID_POSIX_SUBTABLE(bo) { | 
200  |  |     {0x51,   "bo"}, | 
201  |  |     {0x0851, "bo_BT"}, | 
202  |  |     {0x0451, "bo_CN"}, | 
203  |  |     {0x0c51, "dz_BT"} | 
204  |  | };  | 
205  |  |  | 
206  |  | ILCID_POSIX_ELEMENT_ARRAY(0x047e, br, br_FR)  | 
207  |  |  | 
208  |  | ILCID_POSIX_SUBTABLE(ca) { | 
209  |  |     {0x03,   "ca"}, | 
210  |  |     {0x0403, "ca_ES"}, | 
211  |  |     {0x0803, "ca_ES_VALENCIA"} | 
212  |  | };  | 
213  |  |  | 
214  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0483, co, co_FR)  | 
215  |  |  | 
216  |  | ILCID_POSIX_SUBTABLE(chr) { | 
217  |  |     {0x05c,  "chr"}, | 
218  |  |     {0x7c5c, "chr_Cher"}, | 
219  |  |     {0x045c, "chr_Cher_US"}, | 
220  |  |     {0x045c, "chr_US"} | 
221  |  | };  | 
222  |  |  | 
223  |  | // ICU has chosen different names for these.  | 
224  |  | ILCID_POSIX_SUBTABLE(ckb) { | 
225  |  |     {0x92,   "ckb"}, | 
226  |  |     {0x7c92, "ckb_Arab"}, | 
227  |  |     {0x0492, "ckb_Arab_IQ"} | 
228  |  | };  | 
229  |  |  | 
230  |  | /* Declared as cs_CZ to get around compiler errors on z/OS, which defines cs as a function */  | 
231  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0405, cs, cs_CZ)  | 
232  |  |  | 
233  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0452, cy, cy_GB)  | 
234  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0406, da, da_DK)  | 
235  |  |  | 
236  |  | // Windows doesn't know POSIX or BCP47 Unicode phonebook sort names  | 
237  |  | ILCID_POSIX_SUBTABLE(de) { | 
238  |  |     {0x07,   "de"}, | 
239  |  |     {0x0c07, "de_AT"}, | 
240  |  |     {0x0807, "de_CH"}, | 
241  |  |     {0x0407, "de_DE"}, | 
242  |  |     {0x1407, "de_LI"}, | 
243  |  |     {0x1007, "de_LU"}, | 
244  |  |     {0x10407,"de_DE@collation=phonebook"},  /*This is really de_DE_PHONEBOOK on Windows*/ | 
245  |  |     {0x10407,"de@collation=phonebook"}  /*This is really de_DE_PHONEBOOK on Windows*/ | 
246  |  | };  | 
247  |  |  | 
248  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0465, dv, dv_MV)  | 
249  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0408, el, el_GR)  | 
250  |  |  | 
251  |  | // Windows uses an empty string for 'invariant'  | 
252  |  | ILCID_POSIX_SUBTABLE(en) { | 
253  |  |     {0x09,   "en"}, | 
254  |  |     {0x0c09, "en_AU"}, | 
255  |  |     {0x2809, "en_BZ"}, | 
256  |  |     {0x1009, "en_CA"}, | 
257  |  |     {0x0809, "en_GB"}, | 
258  |  |     {0x3c09, "en_HK"}, | 
259  |  |     {0x3809, "en_ID"}, | 
260  |  |     {0x1809, "en_IE"}, | 
261  |  |     {0x4009, "en_IN"}, | 
262  |  |     {0x2009, "en_JM"}, | 
263  |  |     {0x4409, "en_MY"}, | 
264  |  |     {0x1409, "en_NZ"}, | 
265  |  |     {0x3409, "en_PH"}, | 
266  |  |     {0x4809, "en_SG"}, | 
267  |  |     {0x2C09, "en_TT"}, | 
268  |  |     {0x0409, "en_US"}, | 
269  |  |     {0x007f, "en_US_POSIX"}, /* duplicate for round-tripping */ | 
270  |  |     {0x2409, "en_029"}, | 
271  |  |     {0x1c09, "en_ZA"}, | 
272  |  |     {0x3009, "en_ZW"}, | 
273  |  |     {0x2409, "en_VI"},  /* Virgin Islands AKA Caribbean Islands (en_CB). On Windows8+ This is 0x1000 or dynamically assigned */ | 
274  |  |     {0x0409, "en_AS"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */ | 
275  |  |     {0x0409, "en_GU"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */ | 
276  |  |     {0x0409, "en_MH"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */ | 
277  |  |     {0x0409, "en_MP"},  /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */ | 
278  |  |     {0x0409, "en_UM"}   /* Alias for en_US. Leave last.  On Windows8+ This is 0x1000 or dynamically assigned */ | 
279  |  | };  | 
280  |  |  | 
281  |  | ILCID_POSIX_SUBTABLE(en_US_POSIX) { | 
282  |  |     {0x007f, "en_US_POSIX"} /* duplicate for roundtripping */ | 
283  |  | };  | 
284  |  |  | 
285  |  | // Windows doesn't know POSIX or BCP47 Unicode traditional sort names  | 
286  |  | ILCID_POSIX_SUBTABLE(es) { | 
287  |  |     {0x0a,   "es"}, | 
288  |  |     {0x2c0a, "es_AR"}, | 
289  |  |     {0x400a, "es_BO"}, | 
290  |  |     {0x340a, "es_CL"}, | 
291  |  |     {0x240a, "es_CO"}, | 
292  |  |     {0x140a, "es_CR"}, | 
293  |  |     {0x5c0a, "es_CU"}, | 
294  |  |     {0x1c0a, "es_DO"}, | 
295  |  |     {0x300a, "es_EC"}, | 
296  |  |     {0x0c0a, "es_ES"},      /*Modern sort.*/ | 
297  |  |     {0x100a, "es_GT"}, | 
298  |  |     {0x480a, "es_HN"}, | 
299  |  |     {0x080a, "es_MX"}, | 
300  |  |     {0x4c0a, "es_NI"}, | 
301  |  |     {0x180a, "es_PA"}, | 
302  |  |     {0x280a, "es_PE"}, | 
303  |  |     {0x500a, "es_PR"}, | 
304  |  |     {0x3c0a, "es_PY"}, | 
305  |  |     {0x440a, "es_SV"}, | 
306  |  |     {0x540a, "es_US"}, | 
307  |  |     {0x380a, "es_UY"}, | 
308  |  |     {0x200a, "es_VE"}, | 
309  |  |     {0x580a, "es_419"}, | 
310  |  |     {0x040a, "es_ES@collation=traditional"}, | 
311  |  |     {0x040a, "es@collation=traditional"}        // Windows will treat this as es-ES@collation=traditional | 
312  |  | };  | 
313  |  |  | 
314  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0425, et, et_EE)  | 
315  |  | ILCID_POSIX_ELEMENT_ARRAY(0x042d, eu, eu_ES)  | 
316  |  |  | 
317  |  | /* ISO-639 doesn't distinguish between Persian and Dari.*/  | 
318  |  | ILCID_POSIX_SUBTABLE(fa) { | 
319  |  |     {0x29,   "fa"}, | 
320  |  |     {0x0429, "fa_IR"},  /* Persian/Farsi (Iran) */ | 
321  |  |     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */ | 
322  |  | };  | 
323  |  |  | 
324  |  |  | 
325  |  | /* duplicate for roundtripping */  | 
326  |  | ILCID_POSIX_SUBTABLE(fa_AF) { | 
327  |  |     {0x8c,   "fa_AF"},  /* Persian/Dari (Afghanistan) */ | 
328  |  |     {0x048c, "fa_AF"}   /* Persian/Dari (Afghanistan) */ | 
329  |  | };  | 
330  |  |  | 
331  |  | ILCID_POSIX_SUBTABLE(ff) { | 
332  |  |     {0x67,   "ff"}, | 
333  |  |     {0x7c67, "ff_Latn"}, | 
334  |  |     {0x0867, "ff_Latn_SN"}, | 
335  |  |     {0x0467, "ff_NG"} | 
336  |  | };  | 
337  |  |  | 
338  |  | ILCID_POSIX_ELEMENT_ARRAY(0x040b, fi, fi_FI)  | 
339  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0464, fil,fil_PH)  | 
340  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0438, fo, fo_FO)  | 
341  |  |  | 
342  |  | ILCID_POSIX_SUBTABLE(fr) { | 
343  |  |     {0x0c,   "fr"}, | 
344  |  |     {0x080c, "fr_BE"}, | 
345  |  |     {0x0c0c, "fr_CA"}, | 
346  |  |     {0x240c, "fr_CD"}, | 
347  |  |     {0x240c, "fr_CG"}, | 
348  |  |     {0x100c, "fr_CH"}, | 
349  |  |     {0x300c, "fr_CI"}, | 
350  |  |     {0x2c0c, "fr_CM"}, | 
351  |  |     {0x040c, "fr_FR"}, | 
352  |  |     {0x3c0c, "fr_HT"}, | 
353  |  |     {0x140c, "fr_LU"}, | 
354  |  |     {0x380c, "fr_MA"}, | 
355  |  |     {0x180c, "fr_MC"}, | 
356  |  |     {0x340c, "fr_ML"}, | 
357  |  |     {0x200c, "fr_RE"}, | 
358  |  |     {0x280c, "fr_SN"}, | 
359  |  |     {0xe40c, "fr_015"}, | 
360  |  |     {0x1c0c, "fr_029"} | 
361  |  | };  | 
362  |  |  | 
363  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0467, fuv, fuv_NG)  | 
364  |  |  | 
365  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0462, fy, fy_NL)  | 
366  |  |  | 
367  |  | ILCID_POSIX_SUBTABLE(ga) { /* Gaelic (Ireland) */ | 
368  |  |     {0x3c,   "ga"}, | 
369  |  |     {0x083c, "ga_IE"}, | 
370  |  |     {0x043c, "gd_GB"} | 
371  |  | };  | 
372  |  |  | 
373  |  | ILCID_POSIX_SUBTABLE(gd) { /* Gaelic (Scotland) */ | 
374  |  |     {0x91,   "gd"}, | 
375  |  |     {0x0491, "gd_GB"} | 
376  |  | };  | 
377  |  |  | 
378  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0456, gl, gl_ES)  | 
379  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0447, gu, gu_IN)  | 
380  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0474, gn, gn_PY)  | 
381  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0484, gsw,gsw_FR)  | 
382  |  |  | 
383  |  | ILCID_POSIX_SUBTABLE(ha) { | 
384  |  |     {0x68,   "ha"}, | 
385  |  |     {0x7c68, "ha_Latn"}, | 
386  |  |     {0x0468, "ha_Latn_NG"}, | 
387  |  | };  | 
388  |  |  | 
389  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0475, haw,haw_US)  | 
390  |  | ILCID_POSIX_ELEMENT_ARRAY(0x040d, he, he_IL)  | 
391  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0439, hi, hi_IN)  | 
392  |  |  | 
393  |  | /* This LCID is really four different locales.*/  | 
394  |  | ILCID_POSIX_SUBTABLE(hr) { | 
395  |  |     {0x1a,   "hr"}, | 
396  |  |     {0x141a, "bs_Latn_BA"},  /* Bosnian, Bosnia and Herzegovina */ | 
397  |  |     {0x681a, "bs_Latn"},  /* Bosnian, Bosnia and Herzegovina */ | 
398  |  |     {0x141a, "bs_BA"},  /* Bosnian, Bosnia and Herzegovina */ | 
399  |  |     {0x781a, "bs"},     /* Bosnian */ | 
400  |  |     {0x201a, "bs_Cyrl_BA"},  /* Bosnian, Bosnia and Herzegovina */ | 
401  |  |     {0x641a, "bs_Cyrl"},  /* Bosnian, Bosnia and Herzegovina */ | 
402  |  |     {0x101a, "hr_BA"},  /* Croatian in Bosnia */ | 
403  |  |     {0x041a, "hr_HR"},  /* Croatian*/ | 
404  |  |     {0x2c1a, "sr_Latn_ME"}, | 
405  |  |     {0x241a, "sr_Latn_RS"}, | 
406  |  |     {0x181a, "sr_Latn_BA"}, /* Serbo-Croatian in Bosnia */ | 
407  |  |     {0x081a, "sr_Latn_CS"}, /* Serbo-Croatian*/ | 
408  |  |     {0x701a, "sr_Latn"},    /* It's 0x1a or 0x081a, pick one to make the test program happy. */ | 
409  |  |     {0x1c1a, "sr_Cyrl_BA"}, /* Serbo-Croatian in Bosnia */ | 
410  |  |     {0x0c1a, "sr_Cyrl_CS"}, /* Serbian*/ | 
411  |  |     {0x301a, "sr_Cyrl_ME"}, | 
412  |  |     {0x281a, "sr_Cyrl_RS"}, | 
413  |  |     {0x6c1a, "sr_Cyrl"},    /* It's 0x1a or 0x0c1a, pick one to make the test program happy. */ | 
414  |  |     {0x7c1a, "sr"}          /* In CLDR sr is sr_Cyrl. */ | 
415  |  | };  | 
416  |  |  | 
417  |  | ILCID_POSIX_SUBTABLE(hsb) { | 
418  |  |     {0x2E,   "hsb"}, | 
419  |  |     {0x042E, "hsb_DE"}, | 
420  |  |     {0x082E, "dsb_DE"}, | 
421  |  |     {0x7C2E, "dsb"}, | 
422  |  | };  | 
423  |  |  | 
424  |  | ILCID_POSIX_ELEMENT_ARRAY(0x040e, hu, hu_HU)  | 
425  |  | ILCID_POSIX_ELEMENT_ARRAY(0x042b, hy, hy_AM)  | 
426  |  |  | 
427  |  | ILCID_POSIX_SUBTABLE(ibb) { | 
428  |  |     {0x69, "ibb"}, | 
429  |  |     {0x0469, "ibb_NG"} | 
430  |  | };  | 
431  |  |  | 
432  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0421, id, id_ID)  | 
433  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0470, ig, ig_NG)  | 
434  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0478, ii, ii_CN)  | 
435  |  | ILCID_POSIX_ELEMENT_ARRAY(0x040f, is, is_IS)  | 
436  |  |  | 
437  |  | ILCID_POSIX_SUBTABLE(it) { | 
438  |  |     {0x10,   "it"}, | 
439  |  |     {0x0810, "it_CH"}, | 
440  |  |     {0x0410, "it_IT"} | 
441  |  | };  | 
442  |  |  | 
443  |  | ILCID_POSIX_SUBTABLE(iu) { | 
444  |  |     {0x5d,   "iu"}, | 
445  |  |     {0x045d, "iu_Cans_CA"}, | 
446  |  |     {0x785d, "iu_Cans"}, | 
447  |  |     {0x085d, "iu_Latn_CA"}, | 
448  |  |     {0x7c5d, "iu_Latn"} | 
449  |  | };  | 
450  |  |  | 
451  |  | ILCID_POSIX_ELEMENT_ARRAY(0x040d, iw, iw_IL)    /*Left in for compatibility*/  | 
452  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0411, ja, ja_JP)  | 
453  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0437, ka, ka_GE)  | 
454  |  | ILCID_POSIX_ELEMENT_ARRAY(0x043f, kk, kk_KZ)  | 
455  |  | ILCID_POSIX_ELEMENT_ARRAY(0x046f, kl, kl_GL)  | 
456  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0453, km, km_KH)  | 
457  |  | ILCID_POSIX_ELEMENT_ARRAY(0x044b, kn, kn_IN)  | 
458  |  |  | 
459  |  | ILCID_POSIX_SUBTABLE(ko) { | 
460  |  |     {0x12,   "ko"}, | 
461  |  |     {0x0812, "ko_KP"}, | 
462  |  |     {0x0412, "ko_KR"} | 
463  |  | };  | 
464  |  |  | 
465  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0457, kok, kok_IN)  | 
466  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0471, kr,  kr_NG)  | 
467  |  |  | 
468  |  | ILCID_POSIX_SUBTABLE(ks) {         /* We could add PK and CN too */ | 
469  |  |     {0x60,   "ks"}, | 
470  |  |     {0x0460, "ks_Arab_IN"}, | 
471  |  |     {0x0860, "ks_Deva_IN"} | 
472  |  | };  | 
473  |  |  | 
474  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0440, ky, ky_KG)   /* Kyrgyz is spoken in Kyrgyzstan */  | 
475  |  |  | 
476  |  | ILCID_POSIX_SUBTABLE(la) { | 
477  |  |     {0x76,   "la"}, | 
478  |  |     {0x0476, "la_001"}, | 
479  |  |     {0x0476, "la_IT"}       /*Left in for compatibility*/ | 
480  |  | };  | 
481  |  |  | 
482  |  | ILCID_POSIX_ELEMENT_ARRAY(0x046e, lb, lb_LU)  | 
483  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0454, lo, lo_LA)  | 
484  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0427, lt, lt_LT)  | 
485  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0426, lv, lv_LV)  | 
486  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0481, mi, mi_NZ)  | 
487  |  | ILCID_POSIX_ELEMENT_ARRAY(0x042f, mk, mk_MK)  | 
488  |  | ILCID_POSIX_ELEMENT_ARRAY(0x044c, ml, ml_IN)  | 
489  |  |  | 
490  |  | ILCID_POSIX_SUBTABLE(mn) { | 
491  |  |     {0x50,   "mn"}, | 
492  |  |     {0x0450, "mn_MN"}, | 
493  |  |     {0x7c50, "mn_Mong"}, | 
494  |  |     {0x0850, "mn_Mong_CN"}, | 
495  |  |     {0x0850, "mn_CN"}, | 
496  |  |     {0x7850, "mn_Cyrl"}, | 
497  |  |     {0x0c50, "mn_Mong_MN"} | 
498  |  | };  | 
499  |  |  | 
500  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0458, mni,mni_IN)  | 
501  |  | ILCID_POSIX_ELEMENT_ARRAY(0x047c, moh,moh_CA)  | 
502  |  | ILCID_POSIX_ELEMENT_ARRAY(0x044e, mr, mr_IN)  | 
503  |  |  | 
504  |  | ILCID_POSIX_SUBTABLE(ms) { | 
505  |  |     {0x3e,   "ms"}, | 
506  |  |     {0x083e, "ms_BN"},   /* Brunei Darussalam*/ | 
507  |  |     {0x043e, "ms_MY"}    /* Malaysia*/ | 
508  |  | };  | 
509  |  |  | 
510  |  | ILCID_POSIX_ELEMENT_ARRAY(0x043a, mt, mt_MT)  | 
511  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0455, my, my_MM)  | 
512  |  |  | 
513  |  | ILCID_POSIX_SUBTABLE(ne) { | 
514  |  |     {0x61,   "ne"}, | 
515  |  |     {0x0861, "ne_IN"},   /* India*/ | 
516  |  |     {0x0461, "ne_NP"}    /* Nepal*/ | 
517  |  | };  | 
518  |  |  | 
519  |  | ILCID_POSIX_SUBTABLE(nl) { | 
520  |  |     {0x13,   "nl"}, | 
521  |  |     {0x0813, "nl_BE"}, | 
522  |  |     {0x0413, "nl_NL"} | 
523  |  | };  | 
524  |  |  | 
525  |  | /* The "no" locale split into nb and nn.  By default in ICU, "no" is nb.*/  | 
526  |  | // TODO: Not all of these are needed on Windows, but I don't know how ICU treats preferred ones here.  | 
527  |  | ILCID_POSIX_SUBTABLE(no) { | 
528  |  |     {0x14,   "no"},     /* really nb_NO - actually Windows differentiates between neutral (no region) and specific (with region) */  | 
529  |  |     {0x7c14, "nb"},     /* really nb */ | 
530  |  |     {0x0414, "nb_NO"},  /* really nb_NO. Keep first in the 414 list. */ | 
531  |  |     {0x0414, "no_NO"},  /* really nb_NO */ | 
532  |  |     {0x0814, "nn_NO"},  /* really nn_NO. Keep first in the 814 list.  */ | 
533  |  |     {0x7814, "nn"},     /* It's 0x14 or 0x814, pick one to make the test program happy. */ | 
534  |  |     {0x0814, "no_NO_NY"}/* really nn_NO */ | 
535  |  | };  | 
536  |  |  | 
537  |  | ILCID_POSIX_ELEMENT_ARRAY(0x046c, nso,nso_ZA)   /* TODO: Verify the ISO-639 code */  | 
538  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0482, oc, oc_FR)  | 
539  |  |  | 
540  |  | ILCID_POSIX_SUBTABLE(om) { /* TODO: Verify the country */ | 
541  |  |     {0x72,   "om"}, | 
542  |  |     {0x0472, "om_ET"}, | 
543  |  |     {0x0472, "gaz_ET"} | 
544  |  | };  | 
545  |  |  | 
546  |  | /* Declared as or_IN to get around compiler errors*/  | 
547  |  | ILCID_POSIX_SUBTABLE(or_IN) { | 
548  |  |     {0x48,   "or"}, | 
549  |  |     {0x0448, "or_IN"}, | 
550  |  | };  | 
551  |  |  | 
552  |  | ILCID_POSIX_SUBTABLE(pa) { | 
553  |  |     {0x46,   "pa"}, | 
554  |  |     {0x0446, "pa_IN"}, | 
555  |  |     {0x0846, "pa_Arab_PK"}, | 
556  |  |     {0x0846, "pa_PK"} | 
557  |  | };  | 
558  |  |  | 
559  |  | ILCID_POSIX_SUBTABLE(pap) { | 
560  |  |     {0x79, "pap"}, | 
561  |  |     {0x0479, "pap_029"}, | 
562  |  |     {0x0479, "pap_AN"}     /*Left in for compatibility*/ | 
563  |  | };  | 
564  |  |  | 
565  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0415, pl, pl_PL)  | 
566  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0463, ps, ps_AF)  | 
567  |  |  | 
568  |  | ILCID_POSIX_SUBTABLE(pt) { | 
569  |  |     {0x16,   "pt"}, | 
570  |  |     {0x0416, "pt_BR"}, | 
571  |  |     {0x0816, "pt_PT"} | 
572  |  | };  | 
573  |  |  | 
574  |  | ILCID_POSIX_SUBTABLE(qu) { | 
575  |  |     {0x6b,   "qu"}, | 
576  |  |     {0x046b, "qu_BO"}, | 
577  |  |     {0x086b, "qu_EC"}, | 
578  |  |     {0x0C6b, "qu_PE"}, | 
579  |  |     {0x046b, "quz_BO"}, | 
580  |  |     {0x086b, "quz_EC"}, | 
581  |  |     {0x0C6b, "quz_PE"} | 
582  |  | };  | 
583  |  |  | 
584  |  | ILCID_POSIX_SUBTABLE(quc) { | 
585  |  |     {0x93,   "quc"}, | 
586  |  |     {0x0493, "quc_CO"}, | 
587  |  |     /*  | 
588  |  |         "quc_Latn_GT" is an exceptional case. Language ID of "quc"  | 
589  |  |         is 0x93, but LCID of "quc_Latn_GT" is 0x486, which should be  | 
590  |  |         under the group of "qut". "qut" is a retired ISO 639-3 language  | 
591  |  |         code for West Central Quiche, and merged to "quc".  | 
592  |  |         It looks Windows previously reserved "qut" for K'iche', but,  | 
593  |  |         decided to use "quc" when adding a locale for K'iche' (Guatemala).  | 
594  |  |  | 
595  |  |         This data structure used here assumes language ID bits in  | 
596  |  |         LCID is unique for alphabetic language code. But this is not true  | 
597  |  |         for "quc_Latn_GT". If we don't have the data below, LCID look up  | 
598  |  |         by alphabetic locale ID (POSIX) will fail. The same entry is found  | 
599  |  |         under "qut" below, which is required for reverse look up.  | 
600  |  |     */  | 
601  |  |     {0x0486, "quc_Latn_GT"} | 
602  |  | };  | 
603  |  |  | 
604  |  | ILCID_POSIX_SUBTABLE(qut) { | 
605  |  |     {0x86,   "qut"}, | 
606  |  |     {0x0486, "qut_GT"}, | 
607  |  |     /*  | 
608  |  |         See the note in "quc" above.  | 
609  |  |     */  | 
610  |  |     {0x0486, "quc_Latn_GT"} | 
611  |  | };  | 
612  |  |  | 
613  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0417, rm, rm_CH)  | 
614  |  |  | 
615  |  | ILCID_POSIX_SUBTABLE(ro) { | 
616  |  |     {0x18,   "ro"}, | 
617  |  |     {0x0418, "ro_RO"}, | 
618  |  |     {0x0818, "ro_MD"} | 
619  |  | };  | 
620  |  |  | 
621  |  | // TODO: This is almost certainly 'wrong'.  0 in Windows is a synonym for LOCALE_USER_DEFAULT.  | 
622  |  | // More likely this is a similar concept to the Windows 0x7f Invariant locale ""  | 
623  |  | // (Except that it's not invariant in ICU)  | 
624  |  | ILCID_POSIX_SUBTABLE(root) { | 
625  |  |     {0x00,   "root"} | 
626  |  | };  | 
627  |  |  | 
628  |  | ILCID_POSIX_SUBTABLE(ru) { | 
629  |  |     {0x19,   "ru"}, | 
630  |  |     {0x0419, "ru_RU"}, | 
631  |  |     {0x0819, "ru_MD"} | 
632  |  | };  | 
633  |  |  | 
634  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0487, rw, rw_RW)  | 
635  |  | ILCID_POSIX_ELEMENT_ARRAY(0x044f, sa, sa_IN)  | 
636  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0485, sah,sah_RU)  | 
637  |  |  | 
638  |  | ILCID_POSIX_SUBTABLE(sd) { | 
639  |  |     {0x59,   "sd"}, | 
640  |  |     {0x0459, "sd_Deva_IN"}, | 
641  |  |     {0x0459, "sd_IN"}, | 
642  |  |     {0x0859, "sd_Arab_PK"}, | 
643  |  |     {0x0859, "sd_PK"}, | 
644  |  |     {0x7c59, "sd_Arab"} | 
645  |  | };  | 
646  |  |  | 
647  |  | ILCID_POSIX_SUBTABLE(se) { | 
648  |  |     {0x3b,   "se"}, | 
649  |  |     {0x0c3b, "se_FI"}, | 
650  |  |     {0x043b, "se_NO"}, | 
651  |  |     {0x083b, "se_SE"}, | 
652  |  |     {0x783b, "sma"}, | 
653  |  |     {0x183b, "sma_NO"}, | 
654  |  |     {0x1c3b, "sma_SE"}, | 
655  |  |     {0x7c3b, "smj"}, | 
656  |  |     {0x703b, "smn"}, | 
657  |  |     {0x743b, "sms"}, | 
658  |  |     {0x103b, "smj_NO"}, | 
659  |  |     {0x143b, "smj_SE"}, | 
660  |  |     {0x243b, "smn_FI"}, | 
661  |  |     {0x203b, "sms_FI"}, | 
662  |  | };  | 
663  |  |  | 
664  |  | ILCID_POSIX_ELEMENT_ARRAY(0x045b, si, si_LK)  | 
665  |  | ILCID_POSIX_ELEMENT_ARRAY(0x041b, sk, sk_SK)  | 
666  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0424, sl, sl_SI)  | 
667  |  |  | 
668  |  | ILCID_POSIX_SUBTABLE(so) { | 
669  |  |     {0x77,   "so"}, | 
670  |  |     {0x0477, "so_SO"} | 
671  |  | };  | 
672  |  |  | 
673  |  | ILCID_POSIX_ELEMENT_ARRAY(0x041c, sq, sq_AL)  | 
674  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0430, st, st_ZA)  | 
675  |  |  | 
676  |  | ILCID_POSIX_SUBTABLE(sv) { | 
677  |  |     {0x1d,   "sv"}, | 
678  |  |     {0x081d, "sv_FI"}, | 
679  |  |     {0x041d, "sv_SE"} | 
680  |  | };  | 
681  |  |  | 
682  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0441, sw, sw_KE)  | 
683  |  | ILCID_POSIX_ELEMENT_ARRAY(0x045A, syr, syr_SY)  | 
684  |  |  | 
685  |  | ILCID_POSIX_SUBTABLE(ta) { | 
686  |  |     {0x49,   "ta"}, | 
687  |  |     {0x0449, "ta_IN"}, | 
688  |  |     {0x0849, "ta_LK"} | 
689  |  | };  | 
690  |  |  | 
691  |  | ILCID_POSIX_ELEMENT_ARRAY(0x044a, te, te_IN)  | 
692  |  |  | 
693  |  | /* Cyrillic based by default */  | 
694  |  | ILCID_POSIX_SUBTABLE(tg) { | 
695  |  |     {0x28,   "tg"}, | 
696  |  |     {0x7c28, "tg_Cyrl"}, | 
697  |  |     {0x0428, "tg_Cyrl_TJ"} | 
698  |  | };  | 
699  |  |  | 
700  |  | ILCID_POSIX_ELEMENT_ARRAY(0x041e, th, th_TH)  | 
701  |  |  | 
702  |  | ILCID_POSIX_SUBTABLE(ti) { | 
703  |  |     {0x73,   "ti"}, | 
704  |  |     {0x0873, "ti_ER"}, | 
705  |  |     {0x0473, "ti_ET"} | 
706  |  | };  | 
707  |  |  | 
708  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0442, tk, tk_TM)  | 
709  |  |  | 
710  |  | ILCID_POSIX_SUBTABLE(tn) { | 
711  |  |     {0x32,   "tn"}, | 
712  |  |     {0x0832, "tn_BW"}, | 
713  |  |     {0x0432, "tn_ZA"} | 
714  |  | };  | 
715  |  |  | 
716  |  | ILCID_POSIX_ELEMENT_ARRAY(0x041f, tr, tr_TR)  | 
717  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0431, ts, ts_ZA)  | 
718  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0444, tt, tt_RU)  | 
719  |  |  | 
720  |  | ILCID_POSIX_SUBTABLE(tzm) { | 
721  |  |     {0x5f,   "tzm"}, | 
722  |  |     {0x7c5f, "tzm_Latn"}, | 
723  |  |     {0x085f, "tzm_Latn_DZ"}, | 
724  |  |     {0x105f, "tzm_Tfng_MA"}, | 
725  |  |     {0x045f, "tzm_Arab_MA"}, | 
726  |  |     {0x045f, "tmz"} | 
727  |  | };  | 
728  |  |  | 
729  |  | ILCID_POSIX_SUBTABLE(ug) { | 
730  |  |     {0x80,   "ug"}, | 
731  |  |     {0x0480, "ug_CN"}, | 
732  |  |     {0x0480, "ug_Arab_CN"} | 
733  |  | };  | 
734  |  |  | 
735  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0422, uk, uk_UA)  | 
736  |  |  | 
737  |  | ILCID_POSIX_SUBTABLE(ur) { | 
738  |  |     {0x20,   "ur"}, | 
739  |  |     {0x0820, "ur_IN"}, | 
740  |  |     {0x0420, "ur_PK"} | 
741  |  | };  | 
742  |  |  | 
743  |  | ILCID_POSIX_SUBTABLE(uz) { | 
744  |  |     {0x43,   "uz"}, | 
745  |  |     {0x0843, "uz_Cyrl_UZ"},  /* Cyrillic based */ | 
746  |  |     {0x7843, "uz_Cyrl"},  /* Cyrillic based */ | 
747  |  |     {0x0843, "uz_UZ"},  /* Cyrillic based */ | 
748  |  |     {0x0443, "uz_Latn_UZ"}, /* Latin based */ | 
749  |  |     {0x7c43, "uz_Latn"} /* Latin based */ | 
750  |  | };  | 
751  |  |  | 
752  |  | ILCID_POSIX_SUBTABLE(ve) { /* TODO: Verify the country */ | 
753  |  |     {0x33,   "ve"}, | 
754  |  |     {0x0433, "ve_ZA"}, | 
755  |  |     {0x0433, "ven_ZA"} | 
756  |  | };  | 
757  |  |  | 
758  |  | ILCID_POSIX_ELEMENT_ARRAY(0x042a, vi, vi_VN)  | 
759  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0488, wo, wo_SN)  | 
760  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0434, xh, xh_ZA)  | 
761  |  |  | 
762  |  | ILCID_POSIX_SUBTABLE(yi) { | 
763  |  |     {0x003d, "yi"}, | 
764  |  |     {0x043d, "yi_001"} | 
765  |  | };  | 
766  |  |  | 
767  |  | ILCID_POSIX_ELEMENT_ARRAY(0x046a, yo, yo_NG)  | 
768  |  |  | 
769  |  | // Windows & ICU tend to different names for some of these  | 
770  |  | // TODO: Windows probably does not need all of these entries, but I don't know how the precedence works.  | 
771  |  | ILCID_POSIX_SUBTABLE(zh) { | 
772  |  |     {0x0004, "zh_Hans"}, | 
773  |  |     {0x7804, "zh"}, | 
774  |  |     {0x0804, "zh_CN"}, | 
775  |  |     {0x0804, "zh_Hans_CN"}, | 
776  |  |     {0x0c04, "zh_Hant_HK"}, | 
777  |  |     {0x0c04, "zh_HK"}, | 
778  |  |     {0x1404, "zh_Hant_MO"}, | 
779  |  |     {0x1404, "zh_MO"}, | 
780  |  |     {0x1004, "zh_Hans_SG"}, | 
781  |  |     {0x1004, "zh_SG"}, | 
782  |  |     {0x0404, "zh_Hant_TW"}, | 
783  |  |     {0x7c04, "zh_Hant"}, | 
784  |  |     {0x0404, "zh_TW"}, | 
785  |  |     {0x30404,"zh_Hant_TW"},     /* Bopomofo order */ | 
786  |  |     {0x30404,"zh_TW"},          /* Bopomofo order */ | 
787  |  |     {0x20004,"zh@collation=stroke"}, | 
788  |  |     {0x20404,"zh_Hant@collation=stroke"}, | 
789  |  |     {0x20404,"zh_Hant_TW@collation=stroke"}, | 
790  |  |     {0x20404,"zh_TW@collation=stroke"}, | 
791  |  |     {0x20804,"zh_Hans@collation=stroke"}, | 
792  |  |     {0x20804,"zh_Hans_CN@collation=stroke"}, | 
793  |  |     {0x20804,"zh_CN@collation=stroke"} | 
794  |  |     // TODO: Alternate collations for other LCIDs are missing, eg: 0x50804  | 
795  |  | };  | 
796  |  |  | 
797  |  | ILCID_POSIX_ELEMENT_ARRAY(0x0435, zu, zu_ZA)  | 
798  |  |  | 
799  |  | /* This must be static and grouped by LCID. */  | 
800  |  | constexpr ILcidPosixMap gPosixIDmap[] = { | 
801  |  |     ILCID_POSIX_MAP(af),    /*  af  Afrikaans                 0x36 */  | 
802  |  |     ILCID_POSIX_MAP(am),    /*  am  Amharic                   0x5e */  | 
803  |  |     ILCID_POSIX_MAP(ar),    /*  ar  Arabic                    0x01 */  | 
804  |  |     ILCID_POSIX_MAP(arn),   /*  arn Araucanian/Mapudungun     0x7a */  | 
805  |  |     ILCID_POSIX_MAP(as),    /*  as  Assamese                  0x4d */  | 
806  |  |     ILCID_POSIX_MAP(az),    /*  az  Azerbaijani               0x2c */  | 
807  |  |     ILCID_POSIX_MAP(ba),    /*  ba  Bashkir                   0x6d */  | 
808  |  |     ILCID_POSIX_MAP(be),    /*  be  Belarusian                0x23 */  | 
809  |  | /*    ILCID_POSIX_MAP(ber),     ber Berber/Tamazight          0x5f */  | 
810  |  |     ILCID_POSIX_MAP(bg),    /*  bg  Bulgarian                 0x02 */  | 
811  |  |     ILCID_POSIX_MAP(bin),   /*  bin Edo                       0x66 */  | 
812  |  |     ILCID_POSIX_MAP(bn),    /*  bn  Bengali; Bangla           0x45 */  | 
813  |  |     ILCID_POSIX_MAP(bo),    /*  bo  Tibetan                   0x51 */  | 
814  |  |     ILCID_POSIX_MAP(br),    /*  br  Breton                    0x7e */  | 
815  |  |     ILCID_POSIX_MAP(ca),    /*  ca  Catalan                   0x03 */  | 
816  |  |     ILCID_POSIX_MAP(chr),   /*  chr Cherokee                  0x5c */  | 
817  |  |     ILCID_POSIX_MAP(ckb),   /*  ckb Sorani (Central Kurdish)  0x92 */  | 
818  |  |     ILCID_POSIX_MAP(co),    /*  co  Corsican                  0x83 */  | 
819  |  |     ILCID_POSIX_MAP(cs),    /*  cs  Czech                     0x05 */  | 
820  |  |     ILCID_POSIX_MAP(cy),    /*  cy  Welsh                     0x52 */  | 
821  |  |     ILCID_POSIX_MAP(da),    /*  da  Danish                    0x06 */  | 
822  |  |     ILCID_POSIX_MAP(de),    /*  de  German                    0x07 */  | 
823  |  |     ILCID_POSIX_MAP(dv),    /*  dv  Divehi                    0x65 */  | 
824  |  |     ILCID_POSIX_MAP(el),    /*  el  Greek                     0x08 */  | 
825  |  |     ILCID_POSIX_MAP(en),    /*  en  English                   0x09 */  | 
826  |  |     ILCID_POSIX_MAP(en_US_POSIX), /*    invariant             0x7f */  | 
827  |  |     ILCID_POSIX_MAP(es),    /*  es  Spanish                   0x0a */  | 
828  |  |     ILCID_POSIX_MAP(et),    /*  et  Estonian                  0x25 */  | 
829  |  |     ILCID_POSIX_MAP(eu),    /*  eu  Basque                    0x2d */  | 
830  |  |     ILCID_POSIX_MAP(fa),    /*  fa  Persian/Farsi             0x29 */  | 
831  |  |     ILCID_POSIX_MAP(fa_AF), /*  fa  Persian/Dari              0x8c */  | 
832  |  |     ILCID_POSIX_MAP(ff),    /*  ff  Fula                      0x67 */  | 
833  |  |     ILCID_POSIX_MAP(fi),    /*  fi  Finnish                   0x0b */  | 
834  |  |     ILCID_POSIX_MAP(fil),   /*  fil Filipino                  0x64 */  | 
835  |  |     ILCID_POSIX_MAP(fo),    /*  fo  Faroese                   0x38 */  | 
836  |  |     ILCID_POSIX_MAP(fr),    /*  fr  French                    0x0c */  | 
837  |  |     ILCID_POSIX_MAP(fuv),   /*  fuv Fulfulde - Nigeria        0x67 */  | 
838  |  |     ILCID_POSIX_MAP(fy),    /*  fy  Frisian                   0x62 */  | 
839  |  |     ILCID_POSIX_MAP(ga),    /*  *   Gaelic (Ireland,Scotland) 0x3c */  | 
840  |  |     ILCID_POSIX_MAP(gd),    /*  gd  Gaelic (United Kingdom)   0x91 */  | 
841  |  |     ILCID_POSIX_MAP(gl),    /*  gl  Galician                  0x56 */  | 
842  |  |     ILCID_POSIX_MAP(gn),    /*  gn  Guarani                   0x74 */  | 
843  |  |     ILCID_POSIX_MAP(gsw),   /*  gsw Alemanic/Alsatian/Swiss German 0x84 */  | 
844  |  |     ILCID_POSIX_MAP(gu),    /*  gu  Gujarati                  0x47 */  | 
845  |  |     ILCID_POSIX_MAP(ha),    /*  ha  Hausa                     0x68 */  | 
846  |  |     ILCID_POSIX_MAP(haw),   /*  haw Hawaiian                  0x75 */  | 
847  |  |     ILCID_POSIX_MAP(he),    /*  he  Hebrew (formerly iw)      0x0d */  | 
848  |  |     ILCID_POSIX_MAP(hi),    /*  hi  Hindi                     0x39 */  | 
849  |  |     ILCID_POSIX_MAP(hr),    /*  *   Croatian and others       0x1a */  | 
850  |  |     ILCID_POSIX_MAP(hsb),   /*  hsb Upper Sorbian             0x2e */  | 
851  |  |     ILCID_POSIX_MAP(hu),    /*  hu  Hungarian                 0x0e */  | 
852  |  |     ILCID_POSIX_MAP(hy),    /*  hy  Armenian                  0x2b */  | 
853  |  |     ILCID_POSIX_MAP(ibb),   /*  ibb Ibibio - Nigeria          0x69 */  | 
854  |  |     ILCID_POSIX_MAP(id),    /*  id  Indonesian (formerly in)  0x21 */  | 
855  |  |     ILCID_POSIX_MAP(ig),    /*  ig  Igbo                      0x70 */  | 
856  |  |     ILCID_POSIX_MAP(ii),    /*  ii  Sichuan Yi                0x78 */  | 
857  |  |     ILCID_POSIX_MAP(is),    /*  is  Icelandic                 0x0f */  | 
858  |  |     ILCID_POSIX_MAP(it),    /*  it  Italian                   0x10 */  | 
859  |  |     ILCID_POSIX_MAP(iu),    /*  iu  Inuktitut                 0x5d */  | 
860  |  |     ILCID_POSIX_MAP(iw),    /*  iw  Hebrew                    0x0d */  | 
861  |  |     ILCID_POSIX_MAP(ja),    /*  ja  Japanese                  0x11 */  | 
862  |  |     ILCID_POSIX_MAP(ka),    /*  ka  Georgian                  0x37 */  | 
863  |  |     ILCID_POSIX_MAP(kk),    /*  kk  Kazakh                    0x3f */  | 
864  |  |     ILCID_POSIX_MAP(kl),    /*  kl  Kalaallisut               0x6f */  | 
865  |  |     ILCID_POSIX_MAP(km),    /*  km  Khmer                     0x53 */  | 
866  |  |     ILCID_POSIX_MAP(kn),    /*  kn  Kannada                   0x4b */  | 
867  |  |     ILCID_POSIX_MAP(ko),    /*  ko  Korean                    0x12 */  | 
868  |  |     ILCID_POSIX_MAP(kok),   /*  kok Konkani                   0x57 */  | 
869  |  |     ILCID_POSIX_MAP(kr),    /*  kr  Kanuri                    0x71 */  | 
870  |  |     ILCID_POSIX_MAP(ks),    /*  ks  Kashmiri                  0x60 */  | 
871  |  |     ILCID_POSIX_MAP(ky),    /*  ky  Kyrgyz                    0x40 */  | 
872  |  |     ILCID_POSIX_MAP(lb),    /*  lb  Luxembourgish             0x6e */  | 
873  |  |     ILCID_POSIX_MAP(la),    /*  la  Latin                     0x76 */  | 
874  |  |     ILCID_POSIX_MAP(lo),    /*  lo  Lao                       0x54 */  | 
875  |  |     ILCID_POSIX_MAP(lt),    /*  lt  Lithuanian                0x27 */  | 
876  |  |     ILCID_POSIX_MAP(lv),    /*  lv  Latvian, Lettish          0x26 */  | 
877  |  |     ILCID_POSIX_MAP(mi),    /*  mi  Maori                     0x81 */  | 
878  |  |     ILCID_POSIX_MAP(mk),    /*  mk  Macedonian                0x2f */  | 
879  |  |     ILCID_POSIX_MAP(ml),    /*  ml  Malayalam                 0x4c */  | 
880  |  |     ILCID_POSIX_MAP(mn),    /*  mn  Mongolian                 0x50 */  | 
881  |  |     ILCID_POSIX_MAP(mni),   /*  mni Manipuri                  0x58 */  | 
882  |  |     ILCID_POSIX_MAP(moh),   /*  moh Mohawk                    0x7c */  | 
883  |  |     ILCID_POSIX_MAP(mr),    /*  mr  Marathi                   0x4e */  | 
884  |  |     ILCID_POSIX_MAP(ms),    /*  ms  Malay                     0x3e */  | 
885  |  |     ILCID_POSIX_MAP(mt),    /*  mt  Maltese                   0x3a */  | 
886  |  |     ILCID_POSIX_MAP(my),    /*  my  Burmese                   0x55 */  | 
887  |  | /*    ILCID_POSIX_MAP(nb),    //  no  Norwegian                 0x14 */  | 
888  |  |     ILCID_POSIX_MAP(ne),    /*  ne  Nepali                    0x61 */  | 
889  |  |     ILCID_POSIX_MAP(nl),    /*  nl  Dutch                     0x13 */  | 
890  |  | /*    ILCID_POSIX_MAP(nn),    //  no  Norwegian                 0x14 */  | 
891  |  |     ILCID_POSIX_MAP(no),    /*  *   Norwegian                 0x14 */  | 
892  |  |     ILCID_POSIX_MAP(nso),   /*  nso Sotho, Northern (Sepedi dialect) 0x6c */  | 
893  |  |     ILCID_POSIX_MAP(oc),    /*  oc  Occitan                   0x82 */  | 
894  |  |     ILCID_POSIX_MAP(om),    /*  om  Oromo                     0x72 */  | 
895  |  |     ILCID_POSIX_MAP(or_IN), /*  or  Oriya                     0x48 */  | 
896  |  |     ILCID_POSIX_MAP(pa),    /*  pa  Punjabi                   0x46 */  | 
897  |  |     ILCID_POSIX_MAP(pap),   /*  pap Papiamentu                0x79 */  | 
898  |  |     ILCID_POSIX_MAP(pl),    /*  pl  Polish                    0x15 */  | 
899  |  |     ILCID_POSIX_MAP(ps),    /*  ps  Pashto                    0x63 */  | 
900  |  |     ILCID_POSIX_MAP(pt),    /*  pt  Portuguese                0x16 */  | 
901  |  |     ILCID_POSIX_MAP(qu),    /*  qu  Quechua                   0x6B */  | 
902  |  |     ILCID_POSIX_MAP(quc),   /*  quc K'iche                    0x93 */  | 
903  |  |     ILCID_POSIX_MAP(qut),   /*  qut K'iche                    0x86 */  | 
904  |  |     ILCID_POSIX_MAP(rm),    /*  rm  Raeto-Romance/Romansh     0x17 */  | 
905  |  |     ILCID_POSIX_MAP(ro),    /*  ro  Romanian                  0x18 */  | 
906  |  |     ILCID_POSIX_MAP(root),  /*  root                          0x00 */  | 
907  |  |     ILCID_POSIX_MAP(ru),    /*  ru  Russian                   0x19 */  | 
908  |  |     ILCID_POSIX_MAP(rw),    /*  rw  Kinyarwanda               0x87 */  | 
909  |  |     ILCID_POSIX_MAP(sa),    /*  sa  Sanskrit                  0x4f */  | 
910  |  |     ILCID_POSIX_MAP(sah),   /*  sah Yakut                     0x85 */  | 
911  |  |     ILCID_POSIX_MAP(sd),    /*  sd  Sindhi                    0x59 */  | 
912  |  |     ILCID_POSIX_MAP(se),    /*  se  Sami                      0x3b */  | 
913  |  | /*    ILCID_POSIX_MAP(sh),    //  sh  Serbo-Croatian            0x1a */  | 
914  |  |     ILCID_POSIX_MAP(si),    /*  si  Sinhalese                 0x5b */  | 
915  |  |     ILCID_POSIX_MAP(sk),    /*  sk  Slovak                    0x1b */  | 
916  |  |     ILCID_POSIX_MAP(sl),    /*  sl  Slovenian                 0x24 */  | 
917  |  |     ILCID_POSIX_MAP(so),    /*  so  Somali                    0x77 */  | 
918  |  |     ILCID_POSIX_MAP(sq),    /*  sq  Albanian                  0x1c */  | 
919  |  | /*    ILCID_POSIX_MAP(sr),    //  sr  Serbian                   0x1a */  | 
920  |  |     ILCID_POSIX_MAP(st),    /*  st  Sutu                      0x30 */  | 
921  |  |     ILCID_POSIX_MAP(sv),    /*  sv  Swedish                   0x1d */  | 
922  |  |     ILCID_POSIX_MAP(sw),    /*  sw  Swahili                   0x41 */  | 
923  |  |     ILCID_POSIX_MAP(syr),   /*  syr Syriac                    0x5A */  | 
924  |  |     ILCID_POSIX_MAP(ta),    /*  ta  Tamil                     0x49 */  | 
925  |  |     ILCID_POSIX_MAP(te),    /*  te  Telugu                    0x4a */  | 
926  |  |     ILCID_POSIX_MAP(tg),    /*  tg  Tajik                     0x28 */  | 
927  |  |     ILCID_POSIX_MAP(th),    /*  th  Thai                      0x1e */  | 
928  |  |     ILCID_POSIX_MAP(ti),    /*  ti  Tigrigna                  0x73 */  | 
929  |  |     ILCID_POSIX_MAP(tk),    /*  tk  Turkmen                   0x42 */  | 
930  |  |     ILCID_POSIX_MAP(tn),    /*  tn  Tswana                    0x32 */  | 
931  |  |     ILCID_POSIX_MAP(tr),    /*  tr  Turkish                   0x1f */  | 
932  |  |     ILCID_POSIX_MAP(ts),    /*  ts  Tsonga                    0x31 */  | 
933  |  |     ILCID_POSIX_MAP(tt),    /*  tt  Tatar                     0x44 */  | 
934  |  |     ILCID_POSIX_MAP(tzm),   /*  tzm Tamazight                 0x5f */  | 
935  |  |     ILCID_POSIX_MAP(ug),    /*  ug  Uighur                    0x80 */  | 
936  |  |     ILCID_POSIX_MAP(uk),    /*  uk  Ukrainian                 0x22 */  | 
937  |  |     ILCID_POSIX_MAP(ur),    /*  ur  Urdu                      0x20 */  | 
938  |  |     ILCID_POSIX_MAP(uz),    /*  uz  Uzbek                     0x43 */  | 
939  |  |     ILCID_POSIX_MAP(ve),    /*  ve  Venda                     0x33 */  | 
940  |  |     ILCID_POSIX_MAP(vi),    /*  vi  Vietnamese                0x2a */  | 
941  |  |     ILCID_POSIX_MAP(wo),    /*  wo  Wolof                     0x88 */  | 
942  |  |     ILCID_POSIX_MAP(xh),    /*  xh  Xhosa                     0x34 */  | 
943  |  |     ILCID_POSIX_MAP(yi),    /*  yi  Yiddish                   0x3d */  | 
944  |  |     ILCID_POSIX_MAP(yo),    /*  yo  Yoruba                    0x6a */  | 
945  |  |     ILCID_POSIX_MAP(zh),    /*  zh  Chinese                   0x04 */  | 
946  |  |     ILCID_POSIX_MAP(zu),    /*  zu  Zulu                      0x35 */  | 
947  |  | };  | 
948  |  |  | 
949  |  | constexpr uint32_t gLocaleCount = UPRV_LENGTHOF(gPosixIDmap);  | 
950  |  |  | 
951  |  | /**  | 
952  |  |  * Do not call this function. It is called by hostID.  | 
953  |  |  * The function is not private because this struct must stay as a C struct,  | 
954  |  |  * and this is an internal class.  | 
955  |  |  */  | 
956  |  | int32_t  | 
957  |  | idCmp(const char* id1, const char* id2)  | 
958  | 0  | { | 
959  | 0  |     int32_t diffIdx = 0;  | 
960  | 0  |     while (*id1 == *id2 && *id1 != 0) { | 
961  | 0  |         diffIdx++;  | 
962  | 0  |         id1++;  | 
963  | 0  |         id2++;  | 
964  | 0  |     }  | 
965  | 0  |     return diffIdx;  | 
966  | 0  | }  | 
967  |  |  | 
968  |  | /**  | 
969  |  |  * Searches for a Windows LCID  | 
970  |  |  *  | 
971  |  |  * @param posixID the Posix style locale id.  | 
972  |  |  * @param status gets set to U_ILLEGAL_ARGUMENT_ERROR when the Posix ID has  | 
973  |  |  *               no equivalent Windows LCID.  | 
974  |  |  * @return the LCID  | 
975  |  |  */  | 
976  |  | uint32_t  | 
977  |  | getHostID(const ILcidPosixMap *this_0, const char* posixID, UErrorCode& status)  | 
978  | 0  | { | 
979  | 0  |     if (U_FAILURE(status)) { return locmap_root->hostID; } | 
980  | 0  |     int32_t bestIdx = 0;  | 
981  | 0  |     int32_t bestIdxDiff = 0;  | 
982  | 0  |     int32_t posixIDlen = static_cast<int32_t>(uprv_strlen(posixID));  | 
983  | 0  |     uint32_t idx;  | 
984  |  | 
  | 
985  | 0  |     for (idx = 0; idx < this_0->numRegions; idx++ ) { | 
986  | 0  |         int32_t sameChars = idCmp(posixID, this_0->regionMaps[idx].posixID);  | 
987  | 0  |         if (sameChars > bestIdxDiff && this_0->regionMaps[idx].posixID[sameChars] == 0) { | 
988  | 0  |             if (posixIDlen == sameChars) { | 
989  |  |                 /* Exact match */  | 
990  | 0  |                 return this_0->regionMaps[idx].hostID;  | 
991  | 0  |             }  | 
992  | 0  |             bestIdxDiff = sameChars;  | 
993  | 0  |             bestIdx = idx;  | 
994  | 0  |         }  | 
995  | 0  |     }  | 
996  |  |     /* We asked for something unusual, like en_ZZ, and we try to return the number for the same language. */  | 
997  |  |     /* We also have to make sure that sid and si and similar string subsets don't match. */  | 
998  | 0  |     if ((posixID[bestIdxDiff] == '_' || posixID[bestIdxDiff] == '@')  | 
999  | 0  |         && this_0->regionMaps[bestIdx].posixID[bestIdxDiff] == 0)  | 
1000  | 0  |     { | 
1001  | 0  |         status = U_USING_FALLBACK_WARNING;  | 
1002  | 0  |         return this_0->regionMaps[bestIdx].hostID;  | 
1003  | 0  |     }  | 
1004  |  |  | 
1005  |  |     /*no match found */  | 
1006  | 0  |     status = U_ILLEGAL_ARGUMENT_ERROR;  | 
1007  | 0  |     return locmap_root->hostID;  | 
1008  | 0  | }  | 
1009  |  |  | 
1010  |  | const char*  | 
1011  |  | getPosixID(const ILcidPosixMap *this_0, uint32_t hostID)  | 
1012  | 0  | { | 
1013  | 0  |     uint32_t i;  | 
1014  | 0  |     for (i = 0; i < this_0->numRegions; i++)  | 
1015  | 0  |     { | 
1016  | 0  |         if (this_0->regionMaps[i].hostID == hostID)  | 
1017  | 0  |         { | 
1018  | 0  |             return this_0->regionMaps[i].posixID;  | 
1019  | 0  |         }  | 
1020  | 0  |     }  | 
1021  |  |  | 
1022  |  |     /* If you get here, then no matching region was found,  | 
1023  |  |        so return the language id with the wild card region. */  | 
1024  | 0  |     return this_0->regionMaps[0].posixID;  | 
1025  | 0  | }  | 
1026  |  |  | 
1027  |  | /*  | 
1028  |  | //////////////////////////////////////  | 
1029  |  | //  | 
1030  |  | // LCID --> POSIX  | 
1031  |  | //  | 
1032  |  | /////////////////////////////////////  | 
1033  |  | */  | 
1034  |  | #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API  | 
1035  |  | /*  | 
1036  |  |  * Various language tags needs to be changed:  | 
1037  |  |  * quz -> qu  | 
1038  |  |  * prs -> fa  | 
1039  |  |  */  | 
1040  |  | void FIX_LANGUAGE_ID_TAG(char* buffer, int32_t len) { | 
1041  |  |     if (len >= 3) { | 
1042  |  |         if (buffer[0] == 'q' && buffer[1] == 'u' && buffer[2] == 'z') { | 
1043  |  |             buffer[2] = 0;  | 
1044  |  |             uprv_strcat(buffer, buffer+3);  | 
1045  |  |         } else if (buffer[0] == 'p' && buffer[1] == 'r' && buffer[2] == 's') { | 
1046  |  |             buffer[0] = 'f'; buffer[1] = 'a'; buffer[2] = 0;  | 
1047  |  |             uprv_strcat(buffer, buffer+3);  | 
1048  |  |         }  | 
1049  |  |     }  | 
1050  |  | }  | 
1051  |  | #endif  | 
1052  |  |  | 
1053  |  | }  // namespace  | 
1054  |  |  | 
1055  |  | U_CAPI int32_t  | 
1056  |  | uprv_convertToPosix(uint32_t hostid, char *posixID, int32_t posixIDCapacity, UErrorCode* status)  | 
1057  | 0  | { | 
1058  | 0  |     uint16_t langID;  | 
1059  | 0  |     uint32_t localeIndex;  | 
1060  | 0  |     UBool bLookup = true;  | 
1061  | 0  |     const char *pPosixID = nullptr;  | 
1062  |  | 
  | 
1063  |  | #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API  | 
1064  |  |     static_assert(ULOC_FULLNAME_CAPACITY > LOCALE_NAME_MAX_LENGTH, "Windows locale names have smaller length than ICU locale names.");  | 
1065  |  |  | 
1066  |  |     char locName[LOCALE_NAME_MAX_LENGTH] = {}; | 
1067  |  |  | 
1068  |  |     // Note: Windows primary lang ID 0x92 in LCID is used for Central Kurdish and  | 
1069  |  |     // GetLocaleInfo() maps such LCID to "ku". However, CLDR uses "ku" for  | 
1070  |  |     // Northern Kurdish and "ckb" for Central Kurdish. For this reason, we cannot  | 
1071  |  |     // use the Windows API to resolve locale ID for this specific case.  | 
1072  |  |     if ((hostid & 0x3FF) != 0x92) { | 
1073  |  |         int32_t tmpLen = 0;  | 
1074  |  |         char16_t windowsLocaleName[LOCALE_NAME_MAX_LENGTH] = {}; | 
1075  |  |  | 
1076  |  |         // Note: LOCALE_ALLOW_NEUTRAL_NAMES was enabled in Windows7+, prior versions did not handle neutral (no-region) locale names.  | 
1077  |  |         tmpLen = LCIDToLocaleName(hostid, (PWSTR)windowsLocaleName, UPRV_LENGTHOF(windowsLocaleName), LOCALE_ALLOW_NEUTRAL_NAMES);  | 
1078  |  |         if (tmpLen > 1) { | 
1079  |  |             int32_t i = 0;  | 
1080  |  |             // Only need to look up in table if have _, eg for de-de_phoneb type alternate sort.  | 
1081  |  |             bLookup = false;  | 
1082  |  |             for (i = 0; i < UPRV_LENGTHOF(locName); i++)  | 
1083  |  |             { | 
1084  |  |                 locName[i] = (char)(windowsLocaleName[i]);  | 
1085  |  |  | 
1086  |  |                 // Windows locale name may contain sorting variant, such as "es-ES_tradnl".  | 
1087  |  |                 // In such cases, we need special mapping data found in the hardcoded table  | 
1088  |  |                 // in this source file.  | 
1089  |  |                 if (windowsLocaleName[i] == L'_')  | 
1090  |  |                 { | 
1091  |  |                     // Keep the base locale, without variant  | 
1092  |  |                     // TODO: Should these be mapped from _phoneb to @collation=phonebook, etc.?  | 
1093  |  |                     locName[i] = '\0';  | 
1094  |  |                     tmpLen = i;  | 
1095  |  |                     bLookup = true;  | 
1096  |  |                     break;  | 
1097  |  |                 }  | 
1098  |  |                 else if (windowsLocaleName[i] == L'-')  | 
1099  |  |                 { | 
1100  |  |                     // Windows names use -, ICU uses _  | 
1101  |  |                     locName[i] = '_';  | 
1102  |  |                 }  | 
1103  |  |                 else if (windowsLocaleName[i] == L'\0')  | 
1104  |  |                 { | 
1105  |  |                     // No point in doing more work than necessary  | 
1106  |  |                     break;  | 
1107  |  |                 }  | 
1108  |  |             }  | 
1109  |  |             // TODO: Need to understand this better, why isn't it an alias?  | 
1110  |  |             FIX_LANGUAGE_ID_TAG(locName, tmpLen);  | 
1111  |  |             pPosixID = locName;  | 
1112  |  |         }  | 
1113  |  |     }  | 
1114  |  | #endif  | 
1115  |  | 
  | 
1116  | 0  |     if (bLookup) { | 
1117  | 0  |         const char *pCandidate = nullptr;  | 
1118  | 0  |         langID = LANGUAGE_LCID(hostid);  | 
1119  |  | 
  | 
1120  | 0  |         for (localeIndex = 0; localeIndex < gLocaleCount; localeIndex++) { | 
1121  | 0  |             if (langID == gPosixIDmap[localeIndex].regionMaps->hostID) { | 
1122  | 0  |                 pCandidate = getPosixID(&gPosixIDmap[localeIndex], hostid);  | 
1123  | 0  |                 break;  | 
1124  | 0  |             }  | 
1125  | 0  |         }  | 
1126  |  |  | 
1127  |  |         /* On Windows, when locale name has a variant, we still look up the hardcoded table.  | 
1128  |  |            If a match in the hardcoded table is longer than the Windows locale name without  | 
1129  |  |            variant, we use the one as the result */  | 
1130  | 0  |         if (pCandidate && (pPosixID == nullptr || uprv_strlen(pCandidate) > uprv_strlen(pPosixID))) { | 
1131  | 0  |             pPosixID = pCandidate;  | 
1132  | 0  |         }  | 
1133  | 0  |     }  | 
1134  |  | 
  | 
1135  | 0  |     if (pPosixID) { | 
1136  | 0  |         int32_t resLen = static_cast<int32_t>(uprv_strlen(pPosixID));  | 
1137  | 0  |         int32_t copyLen = resLen <= posixIDCapacity ? resLen : posixIDCapacity;  | 
1138  | 0  |         uprv_memcpy(posixID, pPosixID, copyLen);  | 
1139  | 0  |         if (resLen < posixIDCapacity) { | 
1140  | 0  |             posixID[resLen] = 0;  | 
1141  | 0  |             if (*status == U_STRING_NOT_TERMINATED_WARNING) { | 
1142  | 0  |                 *status = U_ZERO_ERROR;  | 
1143  | 0  |             }  | 
1144  | 0  |         } else if (resLen == posixIDCapacity) { | 
1145  | 0  |             *status = U_STRING_NOT_TERMINATED_WARNING;  | 
1146  | 0  |         } else { | 
1147  | 0  |             *status = U_BUFFER_OVERFLOW_ERROR;  | 
1148  | 0  |         }  | 
1149  | 0  |         return resLen;  | 
1150  | 0  |     }  | 
1151  |  |  | 
1152  |  |     /* no match found */  | 
1153  | 0  |     *status = U_ILLEGAL_ARGUMENT_ERROR;  | 
1154  | 0  |     return 0;  | 
1155  | 0  | }  | 
1156  |  |  | 
1157  |  | /*  | 
1158  |  | //////////////////////////////////////  | 
1159  |  | //  | 
1160  |  | // POSIX --> LCID  | 
1161  |  | // This should only be called from uloc_getLCID.  | 
1162  |  | // The locale ID must be in canonical form.  | 
1163  |  | //  | 
1164  |  | /////////////////////////////////////  | 
1165  |  | */  | 
1166  |  | U_CAPI uint32_t  | 
1167  |  | uprv_convertToLCIDPlatform(const char* localeID, UErrorCode* status)  | 
1168  | 0  | { | 
1169  | 0  |     if (U_FAILURE(*status)) { | 
1170  | 0  |         return 0;  | 
1171  | 0  |     }  | 
1172  |  |  | 
1173  |  |     // The purpose of this function is to leverage the Windows platform name->lcid  | 
1174  |  |     // conversion functionality when available.  | 
1175  |  | #if U_PLATFORM_HAS_WIN32_API && UCONFIG_USE_WINDOWS_LCID_MAPPING_API  | 
1176  |  |     int32_t len;  | 
1177  |  |     icu::CharString baseName;  | 
1178  |  |     const char * mylocaleID = localeID;  | 
1179  |  |  | 
1180  |  |     // Check any for keywords.  | 
1181  |  |     if (uprv_strchr(localeID, '@'))  | 
1182  |  |     { | 
1183  |  |         icu::CharString collVal = ulocimp_getKeywordValue(localeID, "collation", *status);  | 
1184  |  |         if (U_SUCCESS(*status) && !collVal.isEmpty())  | 
1185  |  |         { | 
1186  |  |             // If it contains the keyword collation, return 0 so that the LCID lookup table will be used.  | 
1187  |  |             return 0;  | 
1188  |  |         }  | 
1189  |  |         else  | 
1190  |  |         { | 
1191  |  |             // If the locale ID contains keywords other than collation, just use the base name.  | 
1192  |  |             baseName = ulocimp_getBaseName(localeID, *status);  | 
1193  |  |             if (U_SUCCESS(*status) && !baseName.isEmpty())  | 
1194  |  |             { | 
1195  |  |                 mylocaleID = baseName.data();  | 
1196  |  |             }  | 
1197  |  |         }  | 
1198  |  |     }  | 
1199  |  |  | 
1200  |  |     // this will change it from de_DE@collation=phonebook to de-DE-u-co-phonebk form  | 
1201  |  |     icu::CharString asciiBCP47Tag = ulocimp_toLanguageTag(mylocaleID, false, *status);  | 
1202  |  |  | 
1203  |  |     if (U_SUCCESS(*status))  | 
1204  |  |     { | 
1205  |  |         // Need it to be UTF-16, not 8-bit  | 
1206  |  |         wchar_t bcp47Tag[LOCALE_NAME_MAX_LENGTH] = {}; | 
1207  |  |         int32_t i;  | 
1208  |  |         for (i = 0; i < UPRV_LENGTHOF(bcp47Tag); i++)  | 
1209  |  |         { | 
1210  |  |             if (asciiBCP47Tag[i] == '\0')  | 
1211  |  |             { | 
1212  |  |                 break;  | 
1213  |  |             }  | 
1214  |  |             else  | 
1215  |  |             { | 
1216  |  |                 // Copy the character  | 
1217  |  |                 bcp47Tag[i] = static_cast<wchar_t>(asciiBCP47Tag[i]);  | 
1218  |  |             }  | 
1219  |  |         }  | 
1220  |  |  | 
1221  |  |         if (i < (UPRV_LENGTHOF(bcp47Tag) - 1))  | 
1222  |  |         { | 
1223  |  |             // Ensure it's null terminated  | 
1224  |  |             bcp47Tag[i] = L'\0';  | 
1225  |  |             LCID lcid = LocaleNameToLCID(bcp47Tag, LOCALE_ALLOW_NEUTRAL_NAMES);  | 
1226  |  |             if (lcid > 0)  | 
1227  |  |             { | 
1228  |  |                 // Found LCID from windows, return that one, unless its completely ambiguous  | 
1229  |  |                 // LOCALE_USER_DEFAULT and transients are OK because they will round trip  | 
1230  |  |                 // for this process.  | 
1231  |  |                 if (lcid != LOCALE_CUSTOM_UNSPECIFIED)  | 
1232  |  |                 { | 
1233  |  |                     return lcid;  | 
1234  |  |                 }  | 
1235  |  |             }  | 
1236  |  |         }  | 
1237  |  |     }  | 
1238  |  | #else  | 
1239  | 0  |     (void) localeID; // Suppress unused variable warning.  | 
1240  | 0  | #endif  | 
1241  |  |  | 
1242  |  |     // Nothing found, or not implemented.  | 
1243  | 0  |     return 0;  | 
1244  | 0  | }  | 
1245  |  |  | 
1246  |  | U_CAPI uint32_t  | 
1247  |  | uprv_convertToLCID(const char *langID, const char* posixID, UErrorCode* status)  | 
1248  | 0  | { | 
1249  | 0  |     if (U_FAILURE(*status) ||  | 
1250  | 0  |             langID == nullptr ||  | 
1251  | 0  |             posixID == nullptr ||  | 
1252  | 0  |             uprv_strlen(langID) < 2 ||  | 
1253  | 0  |             uprv_strlen(posixID) < 2) { | 
1254  | 0  |         return locmap_root->hostID;  | 
1255  | 0  |     }  | 
1256  |  |  | 
1257  |  |     // This function does the table lookup when native platform name->lcid conversion isn't available,  | 
1258  |  |     // or for locales that don't follow patterns the platform expects.  | 
1259  | 0  |     uint32_t   low    = 0;  | 
1260  | 0  |     uint32_t   high   = gLocaleCount;  | 
1261  | 0  |     uint32_t   mid;  | 
1262  | 0  |     uint32_t   oldmid = 0;  | 
1263  | 0  |     int32_t    compVal;  | 
1264  |  | 
  | 
1265  | 0  |     uint32_t   value         = 0;  | 
1266  | 0  |     uint32_t   fallbackValue = (uint32_t)-1;  | 
1267  | 0  |     UErrorCode myStatus;  | 
1268  | 0  |     uint32_t   idx;  | 
1269  |  |  | 
1270  |  |     /*Binary search for the map entry for normal cases */  | 
1271  |  | 
  | 
1272  | 0  |     while (high > low)  /*binary search*/{ | 
1273  |  | 
  | 
1274  | 0  |         mid = (high+low) >> 1; /*Finds median*/  | 
1275  |  | 
  | 
1276  | 0  |         if (mid == oldmid)   | 
1277  | 0  |             break;  | 
1278  |  |  | 
1279  | 0  |         compVal = uprv_strcmp(langID, gPosixIDmap[mid].regionMaps->posixID);  | 
1280  | 0  |         if (compVal < 0){ | 
1281  | 0  |             high = mid;  | 
1282  | 0  |         }  | 
1283  | 0  |         else if (compVal > 0){ | 
1284  | 0  |             low = mid;  | 
1285  | 0  |         }  | 
1286  | 0  |         else /*we found it*/{ | 
1287  | 0  |             return getHostID(&gPosixIDmap[mid], posixID, *status);  | 
1288  | 0  |         }  | 
1289  | 0  |         oldmid = mid;  | 
1290  | 0  |     }  | 
1291  |  |  | 
1292  |  |     /*  | 
1293  |  |      * Sometimes we can't do a binary search on posixID because some LCIDs  | 
1294  |  |      * go to different locales.  We hit one of those special cases.  | 
1295  |  |      */  | 
1296  | 0  |     for (idx = 0; idx < gLocaleCount; idx++ ) { | 
1297  | 0  |         myStatus = U_ZERO_ERROR;  | 
1298  | 0  |         value = getHostID(&gPosixIDmap[idx], posixID, myStatus);  | 
1299  | 0  |         if (myStatus == U_ZERO_ERROR) { | 
1300  | 0  |             return value;  | 
1301  | 0  |         }  | 
1302  | 0  |         else if (myStatus == U_USING_FALLBACK_WARNING) { | 
1303  | 0  |             fallbackValue = value;  | 
1304  | 0  |         }  | 
1305  | 0  |     }  | 
1306  |  |  | 
1307  | 0  |     if (fallbackValue != (uint32_t)-1) { | 
1308  | 0  |         *status = U_USING_FALLBACK_WARNING;  | 
1309  | 0  |         return fallbackValue;  | 
1310  | 0  |     }  | 
1311  |  |  | 
1312  |  |     /* no match found */  | 
1313  | 0  |     *status = U_ILLEGAL_ARGUMENT_ERROR;  | 
1314  | 0  |     return locmap_root->hostID;   /* return international (root) */  | 
1315  | 0  | }  |