/src/libidn2/unistring/localcharset.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* Determine a canonical name for the current locale's character encoding.  | 
2  |  |  | 
3  |  |    Copyright (C) 2000-2006, 2008-2024 Free Software Foundation, Inc.  | 
4  |  |  | 
5  |  |    This file is free software: you can redistribute it and/or modify  | 
6  |  |    it under the terms of the GNU Lesser General Public License as  | 
7  |  |    published by the Free Software Foundation; either version 2.1 of the  | 
8  |  |    License, or (at your option) any later version.  | 
9  |  |  | 
10  |  |    This file is distributed in the hope that it will be useful,  | 
11  |  |    but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
12  |  |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
13  |  |    GNU Lesser General Public License for more details.  | 
14  |  |  | 
15  |  |    You should have received a copy of the GNU Lesser General Public License  | 
16  |  |    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */  | 
17  |  |  | 
18  |  | /* Written by Bruno Haible <bruno@clisp.org>.  */  | 
19  |  |  | 
20  |  | #include <config.h>  | 
21  |  |  | 
22  |  | /* Specification.  */  | 
23  |  | #include "localcharset.h"  | 
24  |  |  | 
25  |  | #include <stddef.h>  | 
26  |  | #include <stdio.h>  | 
27  |  | #include <string.h>  | 
28  |  | #include <stdlib.h>  | 
29  |  |  | 
30  |  | #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET  | 
31  |  | # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */  | 
32  |  | #endif  | 
33  |  |  | 
34  |  | #if defined _WIN32 && !defined __CYGWIN__  | 
35  |  | # define WINDOWS_NATIVE  | 
36  |  | # include <locale.h>  | 
37  |  | #endif  | 
38  |  |  | 
39  |  | #if defined __EMX__  | 
40  |  | /* Assume EMX program runs on OS/2, even if compiled under DOS.  */  | 
41  |  | # ifndef OS2  | 
42  |  | #  define OS2  | 
43  |  | # endif  | 
44  |  | #endif  | 
45  |  |  | 
46  |  | #if !defined WINDOWS_NATIVE  | 
47  |  | # if HAVE_LANGINFO_CODESET  | 
48  |  | #  include <langinfo.h>  | 
49  |  | # else  | 
50  |  | #  if 0 /* see comment regarding use of setlocale(), below */  | 
51  |  | #   include <locale.h>  | 
52  |  | #  endif  | 
53  |  | # endif  | 
54  |  | # ifdef __CYGWIN__  | 
55  |  | #  define WIN32_LEAN_AND_MEAN  | 
56  |  | #  include <windows.h>  | 
57  |  | # endif  | 
58  |  | #elif defined WINDOWS_NATIVE  | 
59  |  | # define WIN32_LEAN_AND_MEAN  | 
60  |  | # include <windows.h>  | 
61  |  |   /* For the use of setlocale() below, the Gnulib override in setlocale.c is  | 
62  |  |      not needed; see the platform lists in setlocale_null.m4.  */  | 
63  |  | # undef setlocale  | 
64  |  | #endif  | 
65  |  | #if defined OS2  | 
66  |  | # define INCL_DOS  | 
67  |  | # include <os2.h>  | 
68  |  | #endif  | 
69  |  |  | 
70  |  | /* For MB_CUR_MAX_L */  | 
71  |  | #if defined DARWIN7  | 
72  |  | # include <xlocale.h>  | 
73  |  | #endif  | 
74  |  |  | 
75  |  |  | 
76  |  | #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2  | 
77  |  |  | 
78  |  | /* On these platforms, we use a mapping from non-canonical encoding name  | 
79  |  |    to GNU canonical encoding name.  */  | 
80  |  |  | 
81  |  | /* With glibc-2.1 or newer, we don't need any canonicalization,  | 
82  |  |    because glibc has iconv and both glibc and libiconv support all  | 
83  |  |    GNU canonical names directly.  */  | 
84  |  | # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)  | 
85  |  |  | 
86  |  | struct table_entry  | 
87  |  | { | 
88  |  |   const char alias[11+1];  | 
89  |  |   const char canonical[11+1];  | 
90  |  | };  | 
91  |  |  | 
92  |  | /* Table of platform-dependent mappings, sorted in ascending order.  */  | 
93  |  | static const struct table_entry alias_table[] =  | 
94  |  |   { | 
95  |  | #  if defined __FreeBSD__                                   /* FreeBSD */  | 
96  |  |   /*{ "ARMSCII-8",  "ARMSCII-8" },*/ | 
97  |  |     { "Big5",       "BIG5" }, | 
98  |  |     { "C",          "ASCII" }, | 
99  |  |   /*{ "CP1131",     "CP1131" },*/ | 
100  |  |   /*{ "CP1251",     "CP1251" },*/ | 
101  |  |   /*{ "CP866",      "CP866" },*/ | 
102  |  |   /*{ "GB18030",    "GB18030" },*/ | 
103  |  |   /*{ "GB2312",     "GB2312" },*/ | 
104  |  |   /*{ "GBK",        "GBK" },*/ | 
105  |  |   /*{ "ISCII-DEV",  "?" },*/ | 
106  |  |     { "ISO8859-1",  "ISO-8859-1" }, | 
107  |  |     { "ISO8859-13", "ISO-8859-13" }, | 
108  |  |     { "ISO8859-15", "ISO-8859-15" }, | 
109  |  |     { "ISO8859-2",  "ISO-8859-2" }, | 
110  |  |     { "ISO8859-5",  "ISO-8859-5" }, | 
111  |  |     { "ISO8859-7",  "ISO-8859-7" }, | 
112  |  |     { "ISO8859-9",  "ISO-8859-9" }, | 
113  |  |   /*{ "KOI8-R",     "KOI8-R" },*/ | 
114  |  |   /*{ "KOI8-U",     "KOI8-U" },*/ | 
115  |  |     { "SJIS",       "SHIFT_JIS" }, | 
116  |  |     { "US-ASCII",   "ASCII" }, | 
117  |  |     { "eucCN",      "GB2312" }, | 
118  |  |     { "eucJP",      "EUC-JP" }, | 
119  |  |     { "eucKR",      "EUC-KR" } | 
120  |  | #   define alias_table_defined  | 
121  |  | #  endif  | 
122  |  | #  if defined __NetBSD__                                    /* NetBSD */  | 
123  |  |     { "646",        "ASCII" }, | 
124  |  |   /*{ "ARMSCII-8",  "ARMSCII-8" },*/ | 
125  |  |   /*{ "BIG5",       "BIG5" },*/ | 
126  |  |     { "Big5-HKSCS", "BIG5-HKSCS" }, | 
127  |  |   /*{ "CP1251",     "CP1251" },*/ | 
128  |  |   /*{ "CP866",      "CP866" },*/ | 
129  |  |   /*{ "GB18030",    "GB18030" },*/ | 
130  |  |   /*{ "GB2312",     "GB2312" },*/ | 
131  |  |     { "ISO8859-1",  "ISO-8859-1" }, | 
132  |  |     { "ISO8859-13", "ISO-8859-13" }, | 
133  |  |     { "ISO8859-15", "ISO-8859-15" }, | 
134  |  |     { "ISO8859-2",  "ISO-8859-2" }, | 
135  |  |     { "ISO8859-4",  "ISO-8859-4" }, | 
136  |  |     { "ISO8859-5",  "ISO-8859-5" }, | 
137  |  |     { "ISO8859-7",  "ISO-8859-7" }, | 
138  |  |   /*{ "KOI8-R",     "KOI8-R" },*/ | 
139  |  |   /*{ "KOI8-U",     "KOI8-U" },*/ | 
140  |  |   /*{ "PT154",      "PT154" },*/ | 
141  |  |     { "SJIS",       "SHIFT_JIS" }, | 
142  |  |     { "eucCN",      "GB2312" }, | 
143  |  |     { "eucJP",      "EUC-JP" }, | 
144  |  |     { "eucKR",      "EUC-KR" }, | 
145  |  |     { "eucTW",      "EUC-TW" } | 
146  |  | #   define alias_table_defined  | 
147  |  | #  endif  | 
148  |  | #  if defined __OpenBSD__                                   /* OpenBSD */  | 
149  |  |     { "646",        "ASCII" }, | 
150  |  |     { "ISO8859-1",  "ISO-8859-1" }, | 
151  |  |     { "ISO8859-13", "ISO-8859-13" }, | 
152  |  |     { "ISO8859-15", "ISO-8859-15" }, | 
153  |  |     { "ISO8859-2",  "ISO-8859-2" }, | 
154  |  |     { "ISO8859-4",  "ISO-8859-4" }, | 
155  |  |     { "ISO8859-5",  "ISO-8859-5" }, | 
156  |  |     { "ISO8859-7",  "ISO-8859-7" }, | 
157  |  |     { "US-ASCII",   "ASCII" } | 
158  |  | #   define alias_table_defined  | 
159  |  | #  endif  | 
160  |  | #  if defined __APPLE__ && defined __MACH__                 /* Mac OS X */  | 
161  |  |     /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is  | 
162  |  |        useless:  | 
163  |  |        - It returns the empty string when LANG is set to a locale of the  | 
164  |  |          form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8  | 
165  |  |          LC_CTYPE file.  | 
166  |  |        - The environment variables LANG, LC_CTYPE, LC_ALL are not set by  | 
167  |  |          the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.  | 
168  |  |        - The documentation says:  | 
169  |  |            "... all code that calls BSD system routines should ensure  | 
170  |  |             that the const *char parameters of these routines are in UTF-8  | 
171  |  |             encoding. All BSD system functions expect their string  | 
172  |  |             parameters to be in UTF-8 encoding and nothing else."  | 
173  |  |          It also says  | 
174  |  |            "An additional caveat is that string parameters for files,  | 
175  |  |             paths, and other file-system entities must be in canonical  | 
176  |  |             UTF-8. In a canonical UTF-8 Unicode string, all decomposable  | 
177  |  |             characters are decomposed ..."  | 
178  |  |          but this is not true: You can pass non-decomposed UTF-8 strings  | 
179  |  |          to file system functions, and it is the OS which will convert  | 
180  |  |          them to decomposed UTF-8 before accessing the file system.  | 
181  |  |        - The Apple Terminal application displays UTF-8 by default.  | 
182  |  |        - However, other applications are free to use different encodings:  | 
183  |  |          - xterm uses ISO-8859-1 by default.  | 
184  |  |          - TextEdit uses MacRoman by default.  | 
185  |  |        We prefer UTF-8 over decomposed UTF-8-MAC because one should  | 
186  |  |        minimize the use of decomposed Unicode. Unfortunately, through the  | 
187  |  |        Darwin file system, decomposed UTF-8 strings are leaked into user  | 
188  |  |        space nevertheless.  | 
189  |  |        Then there are also the locales with encodings other than US-ASCII  | 
190  |  |        and UTF-8. These locales can be occasionally useful to users (e.g.  | 
191  |  |        when grepping through ISO-8859-1 encoded text files), when all their  | 
192  |  |        file names are in US-ASCII.  | 
193  |  |      */  | 
194  |  |     { "ARMSCII-8",  "ARMSCII-8" }, | 
195  |  |     { "Big5",       "BIG5" }, | 
196  |  |     { "Big5HKSCS",  "BIG5-HKSCS" }, | 
197  |  |     { "CP1131",     "CP1131" }, | 
198  |  |     { "CP1251",     "CP1251" }, | 
199  |  |     { "CP866",      "CP866" }, | 
200  |  |     { "CP949",      "CP949" }, | 
201  |  |     { "GB18030",    "GB18030" }, | 
202  |  |     { "GB2312",     "GB2312" }, | 
203  |  |     { "GBK",        "GBK" }, | 
204  |  |   /*{ "ISCII-DEV",  "?" },*/ | 
205  |  |     { "ISO8859-1",  "ISO-8859-1" }, | 
206  |  |     { "ISO8859-13", "ISO-8859-13" }, | 
207  |  |     { "ISO8859-15", "ISO-8859-15" }, | 
208  |  |     { "ISO8859-2",  "ISO-8859-2" }, | 
209  |  |     { "ISO8859-4",  "ISO-8859-4" }, | 
210  |  |     { "ISO8859-5",  "ISO-8859-5" }, | 
211  |  |     { "ISO8859-7",  "ISO-8859-7" }, | 
212  |  |     { "ISO8859-9",  "ISO-8859-9" }, | 
213  |  |     { "KOI8-R",     "KOI8-R" }, | 
214  |  |     { "KOI8-U",     "KOI8-U" }, | 
215  |  |     { "PT154",      "PT154" }, | 
216  |  |     { "SJIS",       "SHIFT_JIS" }, | 
217  |  |     { "eucCN",      "GB2312" }, | 
218  |  |     { "eucJP",      "EUC-JP" }, | 
219  |  |     { "eucKR",      "EUC-KR" } | 
220  |  | #   define alias_table_defined  | 
221  |  | #  endif  | 
222  |  | #  if defined _AIX                                          /* AIX */  | 
223  |  |   /*{ "GBK",        "GBK" },*/ | 
224  |  |     { "IBM-1046",   "CP1046" }, | 
225  |  |     { "IBM-1124",   "CP1124" }, | 
226  |  |     { "IBM-1129",   "CP1129" }, | 
227  |  |     { "IBM-1252",   "CP1252" }, | 
228  |  |     { "IBM-850",    "CP850" }, | 
229  |  |     { "IBM-856",    "CP856" }, | 
230  |  |     { "IBM-921",    "ISO-8859-13" }, | 
231  |  |     { "IBM-922",    "CP922" }, | 
232  |  |     { "IBM-932",    "CP932" }, | 
233  |  |     { "IBM-943",    "CP943" }, | 
234  |  |     { "IBM-eucCN",  "GB2312" }, | 
235  |  |     { "IBM-eucJP",  "EUC-JP" }, | 
236  |  |     { "IBM-eucKR",  "EUC-KR" }, | 
237  |  |     { "IBM-eucTW",  "EUC-TW" }, | 
238  |  |     { "ISO8859-1",  "ISO-8859-1" }, | 
239  |  |     { "ISO8859-15", "ISO-8859-15" }, | 
240  |  |     { "ISO8859-2",  "ISO-8859-2" }, | 
241  |  |     { "ISO8859-5",  "ISO-8859-5" }, | 
242  |  |     { "ISO8859-6",  "ISO-8859-6" }, | 
243  |  |     { "ISO8859-7",  "ISO-8859-7" }, | 
244  |  |     { "ISO8859-8",  "ISO-8859-8" }, | 
245  |  |     { "ISO8859-9",  "ISO-8859-9" }, | 
246  |  |     { "TIS-620",    "TIS-620" }, | 
247  |  |   /*{ "UTF-8",      "UTF-8" },*/ | 
248  |  |     { "big5",       "BIG5" } | 
249  |  | #   define alias_table_defined  | 
250  |  | #  endif  | 
251  |  | #  if defined __hpux                                        /* HP-UX */  | 
252  |  |     { "SJIS",      "SHIFT_JIS" }, | 
253  |  |     { "arabic8",   "HP-ARABIC8" }, | 
254  |  |     { "big5",      "BIG5" }, | 
255  |  |     { "cp1251",    "CP1251" }, | 
256  |  |     { "eucJP",     "EUC-JP" }, | 
257  |  |     { "eucKR",     "EUC-KR" }, | 
258  |  |     { "eucTW",     "EUC-TW" }, | 
259  |  |     { "gb18030",   "GB18030" }, | 
260  |  |     { "greek8",    "HP-GREEK8" }, | 
261  |  |     { "hebrew8",   "HP-HEBREW8" }, | 
262  |  |     { "hkbig5",    "BIG5-HKSCS" }, | 
263  |  |     { "hp15CN",    "GB2312" }, | 
264  |  |     { "iso88591",  "ISO-8859-1" }, | 
265  |  |     { "iso885913", "ISO-8859-13" }, | 
266  |  |     { "iso885915", "ISO-8859-15" }, | 
267  |  |     { "iso88592",  "ISO-8859-2" }, | 
268  |  |     { "iso88594",  "ISO-8859-4" }, | 
269  |  |     { "iso88595",  "ISO-8859-5" }, | 
270  |  |     { "iso88596",  "ISO-8859-6" }, | 
271  |  |     { "iso88597",  "ISO-8859-7" }, | 
272  |  |     { "iso88598",  "ISO-8859-8" }, | 
273  |  |     { "iso88599",  "ISO-8859-9" }, | 
274  |  |     { "kana8",     "HP-KANA8" }, | 
275  |  |     { "koi8r",     "KOI8-R" }, | 
276  |  |     { "roman8",    "HP-ROMAN8" }, | 
277  |  |     { "tis620",    "TIS-620" }, | 
278  |  |     { "turkish8",  "HP-TURKISH8" }, | 
279  |  |     { "utf8",      "UTF-8" } | 
280  |  | #   define alias_table_defined  | 
281  |  | #  endif  | 
282  |  | #  if defined __sgi                                         /* IRIX */  | 
283  |  |     { "ISO8859-1",  "ISO-8859-1" }, | 
284  |  |     { "ISO8859-15", "ISO-8859-15" }, | 
285  |  |     { "ISO8859-2",  "ISO-8859-2" }, | 
286  |  |     { "ISO8859-5",  "ISO-8859-5" }, | 
287  |  |     { "ISO8859-7",  "ISO-8859-7" }, | 
288  |  |     { "ISO8859-9",  "ISO-8859-9" }, | 
289  |  |     { "eucCN",      "GB2312" }, | 
290  |  |     { "eucJP",      "EUC-JP" }, | 
291  |  |     { "eucKR",      "EUC-KR" }, | 
292  |  |     { "eucTW",      "EUC-TW" } | 
293  |  | #   define alias_table_defined  | 
294  |  | #  endif  | 
295  |  | #  if defined __osf__                                       /* OSF/1 */  | 
296  |  |   /*{ "GBK",        "GBK" },*/ | 
297  |  |     { "ISO8859-1",  "ISO-8859-1" }, | 
298  |  |     { "ISO8859-15", "ISO-8859-15" }, | 
299  |  |     { "ISO8859-2",  "ISO-8859-2" }, | 
300  |  |     { "ISO8859-4",  "ISO-8859-4" }, | 
301  |  |     { "ISO8859-5",  "ISO-8859-5" }, | 
302  |  |     { "ISO8859-7",  "ISO-8859-7" }, | 
303  |  |     { "ISO8859-8",  "ISO-8859-8" }, | 
304  |  |     { "ISO8859-9",  "ISO-8859-9" }, | 
305  |  |     { "KSC5601",    "CP949" }, | 
306  |  |     { "SJIS",       "SHIFT_JIS" }, | 
307  |  |     { "TACTIS",     "TIS-620" }, | 
308  |  |   /*{ "UTF-8",      "UTF-8" },*/ | 
309  |  |     { "big5",       "BIG5" }, | 
310  |  |     { "cp850",      "CP850" }, | 
311  |  |     { "dechanyu",   "DEC-HANYU" }, | 
312  |  |     { "dechanzi",   "GB2312" }, | 
313  |  |     { "deckanji",   "DEC-KANJI" }, | 
314  |  |     { "deckorean",  "EUC-KR" }, | 
315  |  |     { "eucJP",      "EUC-JP" }, | 
316  |  |     { "eucKR",      "EUC-KR" }, | 
317  |  |     { "eucTW",      "EUC-TW" }, | 
318  |  |     { "sdeckanji",  "EUC-JP" } | 
319  |  | #   define alias_table_defined  | 
320  |  | #  endif  | 
321  |  | #  if defined __sun                                         /* Solaris */  | 
322  |  |     { "5601",        "EUC-KR" }, | 
323  |  |     { "646",         "ASCII" }, | 
324  |  |   /*{ "BIG5",        "BIG5" },*/ | 
325  |  |     { "Big5-HKSCS",  "BIG5-HKSCS" }, | 
326  |  |     { "GB18030",     "GB18030" }, | 
327  |  |   /*{ "GBK",         "GBK" },*/ | 
328  |  |     { "ISO8859-1",   "ISO-8859-1" }, | 
329  |  |     { "ISO8859-11",  "TIS-620" }, | 
330  |  |     { "ISO8859-13",  "ISO-8859-13" }, | 
331  |  |     { "ISO8859-15",  "ISO-8859-15" }, | 
332  |  |     { "ISO8859-2",   "ISO-8859-2" }, | 
333  |  |     { "ISO8859-3",   "ISO-8859-3" }, | 
334  |  |     { "ISO8859-4",   "ISO-8859-4" }, | 
335  |  |     { "ISO8859-5",   "ISO-8859-5" }, | 
336  |  |     { "ISO8859-6",   "ISO-8859-6" }, | 
337  |  |     { "ISO8859-7",   "ISO-8859-7" }, | 
338  |  |     { "ISO8859-8",   "ISO-8859-8" }, | 
339  |  |     { "ISO8859-9",   "ISO-8859-9" }, | 
340  |  |     { "PCK",         "SHIFT_JIS" }, | 
341  |  |     { "TIS620.2533", "TIS-620" }, | 
342  |  |   /*{ "UTF-8",       "UTF-8" },*/ | 
343  |  |     { "ansi-1251",   "CP1251" }, | 
344  |  |     { "cns11643",    "EUC-TW" }, | 
345  |  |     { "eucJP",       "EUC-JP" }, | 
346  |  |     { "gb2312",      "GB2312" }, | 
347  |  |     { "koi8-r",      "KOI8-R" } | 
348  |  | #   define alias_table_defined  | 
349  |  | #  endif  | 
350  |  | #  if defined __minix                                       /* Minix */  | 
351  |  |     { "646", "ASCII" } | 
352  |  | #   define alias_table_defined  | 
353  |  | #  endif  | 
354  |  | #  if defined WINDOWS_NATIVE || defined __CYGWIN__          /* Windows */  | 
355  |  |     { "CP1361",  "JOHAB" }, | 
356  |  |     { "CP20127", "ASCII" }, | 
357  |  |     { "CP20866", "KOI8-R" }, | 
358  |  |     { "CP20936", "GB2312" }, | 
359  |  |     { "CP21866", "KOI8-RU" }, | 
360  |  |     { "CP28591", "ISO-8859-1" }, | 
361  |  |     { "CP28592", "ISO-8859-2" }, | 
362  |  |     { "CP28593", "ISO-8859-3" }, | 
363  |  |     { "CP28594", "ISO-8859-4" }, | 
364  |  |     { "CP28595", "ISO-8859-5" }, | 
365  |  |     { "CP28596", "ISO-8859-6" }, | 
366  |  |     { "CP28597", "ISO-8859-7" }, | 
367  |  |     { "CP28598", "ISO-8859-8" }, | 
368  |  |     { "CP28599", "ISO-8859-9" }, | 
369  |  |     { "CP28605", "ISO-8859-15" }, | 
370  |  |     { "CP38598", "ISO-8859-8" }, | 
371  |  |     { "CP51932", "EUC-JP" }, | 
372  |  |     { "CP51936", "GB2312" }, | 
373  |  |     { "CP51949", "EUC-KR" }, | 
374  |  |     { "CP51950", "EUC-TW" }, | 
375  |  |     { "CP54936", "GB18030" }, | 
376  |  |     { "CP65001", "UTF-8" }, | 
377  |  |     { "CP936",   "GBK" } | 
378  |  | #   define alias_table_defined  | 
379  |  | #  endif  | 
380  |  | #  if defined OS2                                           /* OS/2 */  | 
381  |  |     /* The list of encodings is taken from "List of OS/2 Codepages"  | 
382  |  |        by Alex Taylor:  | 
383  |  |        <http://altsan.org/os2/toolkits/uls/index.html#codepages>.  | 
384  |  |        See also "__convcp() of kLIBC":  | 
385  |  |        <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>.  */  | 
386  |  |     { "CP1004",        "CP1252" }, | 
387  |  |   /*{ "CP1041",        "CP943" },*/ | 
388  |  |   /*{ "CP1088",        "CP949" },*/ | 
389  |  |     { "CP1089",        "ISO-8859-6" }, | 
390  |  |   /*{ "CP1114",        "CP950" },*/ | 
391  |  |   /*{ "CP1115",        "GB2312" },*/ | 
392  |  |     { "CP1208",        "UTF-8" }, | 
393  |  |   /*{ "CP1380",        "GB2312" },*/ | 
394  |  |     { "CP1381",        "GB2312" }, | 
395  |  |     { "CP1383",        "GB2312" }, | 
396  |  |     { "CP1386",        "GBK" }, | 
397  |  |   /*{ "CP301",         "CP943" },*/ | 
398  |  |     { "CP3372",        "EUC-JP" }, | 
399  |  |     { "CP4946",        "CP850" }, | 
400  |  |   /*{ "CP5048",        "JIS_X0208-1990" },*/ | 
401  |  |   /*{ "CP5049",        "JIS_X0212-1990" },*/ | 
402  |  |   /*{ "CP5067",        "KS_C_5601-1987" },*/ | 
403  |  |     { "CP813",         "ISO-8859-7" }, | 
404  |  |     { "CP819",         "ISO-8859-1" }, | 
405  |  |     { "CP878",         "KOI8-R" }, | 
406  |  |   /*{ "CP897",         "CP943" },*/ | 
407  |  |     { "CP912",         "ISO-8859-2" }, | 
408  |  |     { "CP913",         "ISO-8859-3" }, | 
409  |  |     { "CP914",         "ISO-8859-4" }, | 
410  |  |     { "CP915",         "ISO-8859-5" }, | 
411  |  |     { "CP916",         "ISO-8859-8" }, | 
412  |  |     { "CP920",         "ISO-8859-9" }, | 
413  |  |     { "CP921",         "ISO-8859-13" }, | 
414  |  |     { "CP923",         "ISO-8859-15" }, | 
415  |  |   /*{ "CP941",         "CP943" },*/ | 
416  |  |   /*{ "CP947",         "CP950" },*/ | 
417  |  |   /*{ "CP951",         "CP949" },*/ | 
418  |  |   /*{ "CP952",         "JIS_X0208-1990" },*/ | 
419  |  |   /*{ "CP953",         "JIS_X0212-1990" },*/ | 
420  |  |     { "CP954",         "EUC-JP" }, | 
421  |  |     { "CP964",         "EUC-TW" }, | 
422  |  |     { "CP970",         "EUC-KR" }, | 
423  |  |   /*{ "CP971",         "KS_C_5601-1987" },*/ | 
424  |  |     { "IBM-1004",      "CP1252" }, | 
425  |  |   /*{ "IBM-1006",      "?" },*/ | 
426  |  |   /*{ "IBM-1008",      "?" },*/ | 
427  |  |   /*{ "IBM-1041",      "CP943" },*/ | 
428  |  |   /*{ "IBM-1051",      "?" },*/ | 
429  |  |   /*{ "IBM-1088",      "CP949" },*/ | 
430  |  |     { "IBM-1089",      "ISO-8859-6" }, | 
431  |  |   /*{ "IBM-1098",      "?" },*/ | 
432  |  |   /*{ "IBM-1114",      "CP950" },*/ | 
433  |  |   /*{ "IBM-1115",      "GB2312" },*/ | 
434  |  |   /*{ "IBM-1116",      "?" },*/ | 
435  |  |   /*{ "IBM-1117",      "?" },*/ | 
436  |  |   /*{ "IBM-1118",      "?" },*/ | 
437  |  |   /*{ "IBM-1119",      "?" },*/ | 
438  |  |     { "IBM-1124",      "CP1124" }, | 
439  |  |     { "IBM-1125",      "CP1125" }, | 
440  |  |     { "IBM-1131",      "CP1131" }, | 
441  |  |     { "IBM-1208",      "UTF-8" }, | 
442  |  |     { "IBM-1250",      "CP1250" }, | 
443  |  |     { "IBM-1251",      "CP1251" }, | 
444  |  |     { "IBM-1252",      "CP1252" }, | 
445  |  |     { "IBM-1253",      "CP1253" }, | 
446  |  |     { "IBM-1254",      "CP1254" }, | 
447  |  |     { "IBM-1255",      "CP1255" }, | 
448  |  |     { "IBM-1256",      "CP1256" }, | 
449  |  |     { "IBM-1257",      "CP1257" }, | 
450  |  |   /*{ "IBM-1275",      "?" },*/ | 
451  |  |   /*{ "IBM-1276",      "?" },*/ | 
452  |  |   /*{ "IBM-1277",      "?" },*/ | 
453  |  |   /*{ "IBM-1280",      "?" },*/ | 
454  |  |   /*{ "IBM-1281",      "?" },*/ | 
455  |  |   /*{ "IBM-1282",      "?" },*/ | 
456  |  |   /*{ "IBM-1283",      "?" },*/ | 
457  |  |   /*{ "IBM-1380",      "GB2312" },*/ | 
458  |  |     { "IBM-1381",      "GB2312" }, | 
459  |  |     { "IBM-1383",      "GB2312" }, | 
460  |  |     { "IBM-1386",      "GBK" }, | 
461  |  |   /*{ "IBM-301",       "CP943" },*/ | 
462  |  |     { "IBM-3372",      "EUC-JP" }, | 
463  |  |     { "IBM-367",       "ASCII" }, | 
464  |  |     { "IBM-437",       "CP437" }, | 
465  |  |     { "IBM-4946",      "CP850" }, | 
466  |  |   /*{ "IBM-5048",      "JIS_X0208-1990" },*/ | 
467  |  |   /*{ "IBM-5049",      "JIS_X0212-1990" },*/ | 
468  |  |   /*{ "IBM-5067",      "KS_C_5601-1987" },*/ | 
469  |  |     { "IBM-813",       "ISO-8859-7" }, | 
470  |  |     { "IBM-819",       "ISO-8859-1" }, | 
471  |  |     { "IBM-850",       "CP850" }, | 
472  |  |   /*{ "IBM-851",       "?" },*/ | 
473  |  |     { "IBM-852",       "CP852" }, | 
474  |  |     { "IBM-855",       "CP855" }, | 
475  |  |     { "IBM-856",       "CP856" }, | 
476  |  |     { "IBM-857",       "CP857" }, | 
477  |  |   /*{ "IBM-859",       "?" },*/ | 
478  |  |     { "IBM-860",       "CP860" }, | 
479  |  |     { "IBM-861",       "CP861" }, | 
480  |  |     { "IBM-862",       "CP862" }, | 
481  |  |     { "IBM-863",       "CP863" }, | 
482  |  |     { "IBM-864",       "CP864" }, | 
483  |  |     { "IBM-865",       "CP865" }, | 
484  |  |     { "IBM-866",       "CP866" }, | 
485  |  |   /*{ "IBM-868",       "?" },*/ | 
486  |  |     { "IBM-869",       "CP869" }, | 
487  |  |     { "IBM-874",       "CP874" }, | 
488  |  |     { "IBM-878",       "KOI8-R" }, | 
489  |  |   /*{ "IBM-895",       "?" },*/ | 
490  |  |   /*{ "IBM-897",       "CP943" },*/ | 
491  |  |   /*{ "IBM-907",       "?" },*/ | 
492  |  |   /*{ "IBM-909",       "?" },*/ | 
493  |  |     { "IBM-912",       "ISO-8859-2" }, | 
494  |  |     { "IBM-913",       "ISO-8859-3" }, | 
495  |  |     { "IBM-914",       "ISO-8859-4" }, | 
496  |  |     { "IBM-915",       "ISO-8859-5" }, | 
497  |  |     { "IBM-916",       "ISO-8859-8" }, | 
498  |  |     { "IBM-920",       "ISO-8859-9" }, | 
499  |  |     { "IBM-921",       "ISO-8859-13" }, | 
500  |  |     { "IBM-922",       "CP922" }, | 
501  |  |     { "IBM-923",       "ISO-8859-15" }, | 
502  |  |     { "IBM-932",       "CP932" }, | 
503  |  |   /*{ "IBM-941",       "CP943" },*/ | 
504  |  |   /*{ "IBM-942",       "?" },*/ | 
505  |  |     { "IBM-943",       "CP943" }, | 
506  |  |   /*{ "IBM-947",       "CP950" },*/ | 
507  |  |     { "IBM-949",       "CP949" }, | 
508  |  |     { "IBM-950",       "CP950" }, | 
509  |  |   /*{ "IBM-951",       "CP949" },*/ | 
510  |  |   /*{ "IBM-952",       "JIS_X0208-1990" },*/ | 
511  |  |   /*{ "IBM-953",       "JIS_X0212-1990" },*/ | 
512  |  |     { "IBM-954",       "EUC-JP" }, | 
513  |  |   /*{ "IBM-955",       "?" },*/ | 
514  |  |     { "IBM-964",       "EUC-TW" }, | 
515  |  |     { "IBM-970",       "EUC-KR" }, | 
516  |  |   /*{ "IBM-971",       "KS_C_5601-1987" },*/ | 
517  |  |     { "IBM-eucCN",     "GB2312" }, | 
518  |  |     { "IBM-eucJP",     "EUC-JP" }, | 
519  |  |     { "IBM-eucKR",     "EUC-KR" }, | 
520  |  |     { "IBM-eucTW",     "EUC-TW" }, | 
521  |  |     { "IBM33722",      "EUC-JP" }, | 
522  |  |     { "ISO8859-1",     "ISO-8859-1" }, | 
523  |  |     { "ISO8859-2",     "ISO-8859-2" }, | 
524  |  |     { "ISO8859-3",     "ISO-8859-3" }, | 
525  |  |     { "ISO8859-4",     "ISO-8859-4" }, | 
526  |  |     { "ISO8859-5",     "ISO-8859-5" }, | 
527  |  |     { "ISO8859-6",     "ISO-8859-6" }, | 
528  |  |     { "ISO8859-7",     "ISO-8859-7" }, | 
529  |  |     { "ISO8859-8",     "ISO-8859-8" }, | 
530  |  |     { "ISO8859-9",     "ISO-8859-9" }, | 
531  |  |   /*{ "JISX0201-1976", "JISX0201-1976" },*/ | 
532  |  |   /*{ "JISX0208-1978", "?" },*/ | 
533  |  |   /*{ "JISX0208-1983", "JIS_X0208-1983" },*/ | 
534  |  |   /*{ "JISX0208-1990", "JIS_X0208-1990" },*/ | 
535  |  |   /*{ "JISX0212-1990", "JIS_X0212-1990" },*/ | 
536  |  |   /*{ "KSC5601-1987",  "KS_C_5601-1987" },*/ | 
537  |  |     { "SJIS-1",        "CP943" }, | 
538  |  |     { "SJIS-2",        "CP943" }, | 
539  |  |     { "eucJP",         "EUC-JP" }, | 
540  |  |     { "eucKR",         "EUC-KR" }, | 
541  |  |     { "eucTW-1993",    "EUC-TW" } | 
542  |  | #   define alias_table_defined  | 
543  |  | #  endif  | 
544  |  | #  if defined VMS                                           /* OpenVMS */  | 
545  |  |     /* The list of encodings is taken from the OpenVMS 7.3-1 documentation  | 
546  |  |        "Compaq C Run-Time Library Reference Manual for OpenVMS systems"  | 
547  |  |        section 10.7 "Handling Different Character Sets".  */  | 
548  |  |     { "DECHANYU",  "DEC-HANYU" }, | 
549  |  |     { "DECHANZI",  "GB2312" }, | 
550  |  |     { "DECKANJI",  "DEC-KANJI" }, | 
551  |  |     { "DECKOREAN", "EUC-KR" }, | 
552  |  |     { "ISO8859-1", "ISO-8859-1" }, | 
553  |  |     { "ISO8859-2", "ISO-8859-2" }, | 
554  |  |     { "ISO8859-5", "ISO-8859-5" }, | 
555  |  |     { "ISO8859-7", "ISO-8859-7" }, | 
556  |  |     { "ISO8859-8", "ISO-8859-8" }, | 
557  |  |     { "ISO8859-9", "ISO-8859-9" }, | 
558  |  |     { "SDECKANJI", "EUC-JP" }, | 
559  |  |     { "SJIS",      "SHIFT_JIS" }, | 
560  |  |     { "eucJP",     "EUC-JP" }, | 
561  |  |     { "eucTW",     "EUC-TW" } | 
562  |  | #   define alias_table_defined  | 
563  |  | #  endif  | 
564  |  | #  ifndef alias_table_defined  | 
565  |  |     /* Just a dummy entry, to avoid a C syntax error.  */  | 
566  |  |     { "", "" } | 
567  |  | #  endif  | 
568  |  |   };  | 
569  |  |  | 
570  |  | # endif  | 
571  |  |  | 
572  |  | #else  | 
573  |  |  | 
574  |  | /* On these platforms, we use a mapping from locale name to GNU canonical  | 
575  |  |    encoding name.  */  | 
576  |  |  | 
577  |  | struct table_entry  | 
578  |  | { | 
579  |  |   const char locale[17+1];  | 
580  |  |   const char canonical[11+1];  | 
581  |  | };  | 
582  |  |  | 
583  |  | /* Table of platform-dependent mappings, sorted in ascending order.  */  | 
584  |  | static const struct table_entry locale_table[] =  | 
585  |  |   { | 
586  |  | # if defined __FreeBSD__                                    /* FreeBSD 4.2 */  | 
587  |  |     { "cs_CZ.ISO_8859-2",  "ISO-8859-2" }, | 
588  |  |     { "da_DK.DIS_8859-15", "ISO-8859-15" }, | 
589  |  |     { "da_DK.ISO_8859-1",  "ISO-8859-1" }, | 
590  |  |     { "de_AT.DIS_8859-15", "ISO-8859-15" }, | 
591  |  |     { "de_AT.ISO_8859-1",  "ISO-8859-1" }, | 
592  |  |     { "de_CH.DIS_8859-15", "ISO-8859-15" }, | 
593  |  |     { "de_CH.ISO_8859-1",  "ISO-8859-1" }, | 
594  |  |     { "de_DE.DIS_8859-15", "ISO-8859-15" }, | 
595  |  |     { "de_DE.ISO_8859-1",  "ISO-8859-1" }, | 
596  |  |     { "en_AU.DIS_8859-15", "ISO-8859-15" }, | 
597  |  |     { "en_AU.ISO_8859-1",  "ISO-8859-1" }, | 
598  |  |     { "en_CA.DIS_8859-15", "ISO-8859-15" }, | 
599  |  |     { "en_CA.ISO_8859-1",  "ISO-8859-1" }, | 
600  |  |     { "en_GB.DIS_8859-15", "ISO-8859-15" }, | 
601  |  |     { "en_GB.ISO_8859-1",  "ISO-8859-1" }, | 
602  |  |     { "en_US.DIS_8859-15", "ISO-8859-15" }, | 
603  |  |     { "en_US.ISO_8859-1",  "ISO-8859-1" }, | 
604  |  |     { "es_ES.DIS_8859-15", "ISO-8859-15" }, | 
605  |  |     { "es_ES.ISO_8859-1",  "ISO-8859-1" }, | 
606  |  |     { "fi_FI.DIS_8859-15", "ISO-8859-15" }, | 
607  |  |     { "fi_FI.ISO_8859-1",  "ISO-8859-1" }, | 
608  |  |     { "fr_BE.DIS_8859-15", "ISO-8859-15" }, | 
609  |  |     { "fr_BE.ISO_8859-1",  "ISO-8859-1" }, | 
610  |  |     { "fr_CA.DIS_8859-15", "ISO-8859-15" }, | 
611  |  |     { "fr_CA.ISO_8859-1",  "ISO-8859-1" }, | 
612  |  |     { "fr_CH.DIS_8859-15", "ISO-8859-15" }, | 
613  |  |     { "fr_CH.ISO_8859-1",  "ISO-8859-1" }, | 
614  |  |     { "fr_FR.DIS_8859-15", "ISO-8859-15" }, | 
615  |  |     { "fr_FR.ISO_8859-1",  "ISO-8859-1" }, | 
616  |  |     { "hr_HR.ISO_8859-2",  "ISO-8859-2" }, | 
617  |  |     { "hu_HU.ISO_8859-2",  "ISO-8859-2" }, | 
618  |  |     { "is_IS.DIS_8859-15", "ISO-8859-15" }, | 
619  |  |     { "is_IS.ISO_8859-1",  "ISO-8859-1" }, | 
620  |  |     { "it_CH.DIS_8859-15", "ISO-8859-15" }, | 
621  |  |     { "it_CH.ISO_8859-1",  "ISO-8859-1" }, | 
622  |  |     { "it_IT.DIS_8859-15", "ISO-8859-15" }, | 
623  |  |     { "it_IT.ISO_8859-1",  "ISO-8859-1" }, | 
624  |  |     { "ja_JP.EUC",         "EUC-JP" }, | 
625  |  |     { "ja_JP.SJIS",        "SHIFT_JIS" }, | 
626  |  |     { "ja_JP.Shift_JIS",   "SHIFT_JIS" }, | 
627  |  |     { "ko_KR.EUC",         "EUC-KR" }, | 
628  |  |     { "la_LN.ASCII",       "ASCII" }, | 
629  |  |     { "la_LN.DIS_8859-15", "ISO-8859-15" }, | 
630  |  |     { "la_LN.ISO_8859-1",  "ISO-8859-1" }, | 
631  |  |     { "la_LN.ISO_8859-2",  "ISO-8859-2" }, | 
632  |  |     { "la_LN.ISO_8859-4",  "ISO-8859-4" }, | 
633  |  |     { "lt_LN.ASCII",       "ASCII" }, | 
634  |  |     { "lt_LN.DIS_8859-15", "ISO-8859-15" }, | 
635  |  |     { "lt_LN.ISO_8859-1",  "ISO-8859-1" }, | 
636  |  |     { "lt_LN.ISO_8859-2",  "ISO-8859-2" }, | 
637  |  |     { "lt_LT.ISO_8859-4",  "ISO-8859-4" }, | 
638  |  |     { "nl_BE.DIS_8859-15", "ISO-8859-15" }, | 
639  |  |     { "nl_BE.ISO_8859-1",  "ISO-8859-1" }, | 
640  |  |     { "nl_NL.DIS_8859-15", "ISO-8859-15" }, | 
641  |  |     { "nl_NL.ISO_8859-1",  "ISO-8859-1" }, | 
642  |  |     { "no_NO.DIS_8859-15", "ISO-8859-15" }, | 
643  |  |     { "no_NO.ISO_8859-1",  "ISO-8859-1" }, | 
644  |  |     { "pl_PL.ISO_8859-2",  "ISO-8859-2" }, | 
645  |  |     { "pt_PT.DIS_8859-15", "ISO-8859-15" }, | 
646  |  |     { "pt_PT.ISO_8859-1",  "ISO-8859-1" }, | 
647  |  |     { "ru_RU.CP866",       "CP866" }, | 
648  |  |     { "ru_RU.ISO_8859-5",  "ISO-8859-5" }, | 
649  |  |     { "ru_RU.KOI8-R",      "KOI8-R" }, | 
650  |  |     { "ru_SU.CP866",       "CP866" }, | 
651  |  |     { "ru_SU.ISO_8859-5",  "ISO-8859-5" }, | 
652  |  |     { "ru_SU.KOI8-R",      "KOI8-R" }, | 
653  |  |     { "sl_SI.ISO_8859-2",  "ISO-8859-2" }, | 
654  |  |     { "sv_SE.DIS_8859-15", "ISO-8859-15" }, | 
655  |  |     { "sv_SE.ISO_8859-1",  "ISO-8859-1" }, | 
656  |  |     { "uk_UA.KOI8-U",      "KOI8-U" }, | 
657  |  |     { "zh_CN.EUC",         "GB2312" }, | 
658  |  |     { "zh_TW.BIG5",        "BIG5" }, | 
659  |  |     { "zh_TW.Big5",        "BIG5" } | 
660  |  | #  define locale_table_defined  | 
661  |  | # endif  | 
662  |  | # if defined __DJGPP__                                      /* DOS / DJGPP 2.03 */  | 
663  |  |     /* The encodings given here may not all be correct.  | 
664  |  |        If you find that the encoding given for your language and  | 
665  |  |        country is not the one your DOS machine actually uses, just  | 
666  |  |        correct it in this file, and send a mail to  | 
667  |  |        Juan Manuel Guerrero <juan.guerrero@gmx.de>  | 
668  |  |        and <bug-gnulib@gnu.org>.  */  | 
669  |  |     { "C",     "ASCII" }, | 
670  |  |     { "ar",    "CP864" }, | 
671  |  |     { "ar_AE", "CP864" }, | 
672  |  |     { "ar_DZ", "CP864" }, | 
673  |  |     { "ar_EG", "CP864" }, | 
674  |  |     { "ar_IQ", "CP864" }, | 
675  |  |     { "ar_IR", "CP864" }, | 
676  |  |     { "ar_JO", "CP864" }, | 
677  |  |     { "ar_KW", "CP864" }, | 
678  |  |     { "ar_MA", "CP864" }, | 
679  |  |     { "ar_OM", "CP864" }, | 
680  |  |     { "ar_QA", "CP864" }, | 
681  |  |     { "ar_SA", "CP864" }, | 
682  |  |     { "ar_SY", "CP864" }, | 
683  |  |     { "be",    "CP866" }, | 
684  |  |     { "be_BE", "CP866" }, | 
685  |  |     { "bg",    "CP866" }, /* not CP855 ?? */ | 
686  |  |     { "bg_BG", "CP866" }, /* not CP855 ?? */ | 
687  |  |     { "ca",    "CP850" }, | 
688  |  |     { "ca_ES", "CP850" }, | 
689  |  |     { "cs",    "CP852" }, | 
690  |  |     { "cs_CZ", "CP852" }, | 
691  |  |     { "da",    "CP865" }, /* not CP850 ?? */ | 
692  |  |     { "da_DK", "CP865" }, /* not CP850 ?? */ | 
693  |  |     { "de",    "CP850" }, | 
694  |  |     { "de_AT", "CP850" }, | 
695  |  |     { "de_CH", "CP850" }, | 
696  |  |     { "de_DE", "CP850" }, | 
697  |  |     { "el",    "CP869" }, | 
698  |  |     { "el_GR", "CP869" }, | 
699  |  |     { "en",    "CP850" }, | 
700  |  |     { "en_AU", "CP850" }, /* not CP437 ?? */ | 
701  |  |     { "en_CA", "CP850" }, | 
702  |  |     { "en_GB", "CP850" }, | 
703  |  |     { "en_NZ", "CP437" }, | 
704  |  |     { "en_US", "CP437" }, | 
705  |  |     { "en_ZA", "CP850" }, /* not CP437 ?? */ | 
706  |  |     { "eo",    "CP850" }, | 
707  |  |     { "eo_EO", "CP850" }, | 
708  |  |     { "es",    "CP850" }, | 
709  |  |     { "es_AR", "CP850" }, | 
710  |  |     { "es_BO", "CP850" }, | 
711  |  |     { "es_CL", "CP850" }, | 
712  |  |     { "es_CO", "CP850" }, | 
713  |  |     { "es_CR", "CP850" }, | 
714  |  |     { "es_CU", "CP850" }, | 
715  |  |     { "es_DO", "CP850" }, | 
716  |  |     { "es_EC", "CP850" }, | 
717  |  |     { "es_ES", "CP850" }, | 
718  |  |     { "es_GT", "CP850" }, | 
719  |  |     { "es_HN", "CP850" }, | 
720  |  |     { "es_MX", "CP850" }, | 
721  |  |     { "es_NI", "CP850" }, | 
722  |  |     { "es_PA", "CP850" }, | 
723  |  |     { "es_PE", "CP850" }, | 
724  |  |     { "es_PY", "CP850" }, | 
725  |  |     { "es_SV", "CP850" }, | 
726  |  |     { "es_UY", "CP850" }, | 
727  |  |     { "es_VE", "CP850" }, | 
728  |  |     { "et",    "CP850" }, | 
729  |  |     { "et_EE", "CP850" }, | 
730  |  |     { "eu",    "CP850" }, | 
731  |  |     { "eu_ES", "CP850" }, | 
732  |  |     { "fi",    "CP850" }, | 
733  |  |     { "fi_FI", "CP850" }, | 
734  |  |     { "fr",    "CP850" }, | 
735  |  |     { "fr_BE", "CP850" }, | 
736  |  |     { "fr_CA", "CP850" }, | 
737  |  |     { "fr_CH", "CP850" }, | 
738  |  |     { "fr_FR", "CP850" }, | 
739  |  |     { "ga",    "CP850" }, | 
740  |  |     { "ga_IE", "CP850" }, | 
741  |  |     { "gd",    "CP850" }, | 
742  |  |     { "gd_GB", "CP850" }, | 
743  |  |     { "gl",    "CP850" }, | 
744  |  |     { "gl_ES", "CP850" }, | 
745  |  |     { "he",    "CP862" }, | 
746  |  |     { "he_IL", "CP862" }, | 
747  |  |     { "hr",    "CP852" }, | 
748  |  |     { "hr_HR", "CP852" }, | 
749  |  |     { "hu",    "CP852" }, | 
750  |  |     { "hu_HU", "CP852" }, | 
751  |  |     { "id",    "CP850" }, /* not CP437 ?? */ | 
752  |  |     { "id_ID", "CP850" }, /* not CP437 ?? */ | 
753  |  |     { "is",    "CP861" }, /* not CP850 ?? */ | 
754  |  |     { "is_IS", "CP861" }, /* not CP850 ?? */ | 
755  |  |     { "it",    "CP850" }, | 
756  |  |     { "it_CH", "CP850" }, | 
757  |  |     { "it_IT", "CP850" }, | 
758  |  |     { "ja",    "CP932" }, | 
759  |  |     { "ja_JP", "CP932" }, | 
760  |  |     { "kr",    "CP949" }, /* not CP934 ?? */ | 
761  |  |     { "kr_KR", "CP949" }, /* not CP934 ?? */ | 
762  |  |     { "lt",    "CP775" }, | 
763  |  |     { "lt_LT", "CP775" }, | 
764  |  |     { "lv",    "CP775" }, | 
765  |  |     { "lv_LV", "CP775" }, | 
766  |  |     { "mk",    "CP866" }, /* not CP855 ?? */ | 
767  |  |     { "mk_MK", "CP866" }, /* not CP855 ?? */ | 
768  |  |     { "mt",    "CP850" }, | 
769  |  |     { "mt_MT", "CP850" }, | 
770  |  |     { "nb",    "CP865" }, /* not CP850 ?? */ | 
771  |  |     { "nb_NO", "CP865" }, /* not CP850 ?? */ | 
772  |  |     { "nl",    "CP850" }, | 
773  |  |     { "nl_BE", "CP850" }, | 
774  |  |     { "nl_NL", "CP850" }, | 
775  |  |     { "nn",    "CP865" }, /* not CP850 ?? */ | 
776  |  |     { "nn_NO", "CP865" }, /* not CP850 ?? */ | 
777  |  |     { "no",    "CP865" }, /* not CP850 ?? */ | 
778  |  |     { "no_NO", "CP865" }, /* not CP850 ?? */ | 
779  |  |     { "pl",    "CP852" }, | 
780  |  |     { "pl_PL", "CP852" }, | 
781  |  |     { "pt",    "CP850" }, | 
782  |  |     { "pt_BR", "CP850" }, | 
783  |  |     { "pt_PT", "CP850" }, | 
784  |  |     { "ro",    "CP852" }, | 
785  |  |     { "ro_RO", "CP852" }, | 
786  |  |     { "ru",    "CP866" }, | 
787  |  |     { "ru_RU", "CP866" }, | 
788  |  |     { "sk",    "CP852" }, | 
789  |  |     { "sk_SK", "CP852" }, | 
790  |  |     { "sl",    "CP852" }, | 
791  |  |     { "sl_SI", "CP852" }, | 
792  |  |     { "sq",    "CP852" }, | 
793  |  |     { "sq_AL", "CP852" }, | 
794  |  |     { "sr",    "CP852" }, /* CP852 or CP866 or CP855 ?? */ | 
795  |  |     { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */ | 
796  |  |     { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */ | 
797  |  |     { "sv",    "CP850" }, | 
798  |  |     { "sv_SE", "CP850" }, | 
799  |  |     { "th",    "CP874" }, | 
800  |  |     { "th_TH", "CP874" }, | 
801  |  |     { "tr",    "CP857" }, | 
802  |  |     { "tr_TR", "CP857" }, | 
803  |  |     { "uk",    "CP1125" }, | 
804  |  |     { "uk_UA", "CP1125" }, | 
805  |  |     { "zh_CN", "GBK" }, | 
806  |  |     { "zh_TW", "CP950" } /* not CP938 ?? */ | 
807  |  | #  define locale_table_defined  | 
808  |  | # endif  | 
809  |  | # ifndef locale_table_defined  | 
810  |  |     /* Just a dummy entry, to avoid a C syntax error.  */  | 
811  |  |     { "", "" } | 
812  |  | # endif  | 
813  |  |   };  | 
814  |  |  | 
815  |  | #endif  | 
816  |  |  | 
817  |  |  | 
818  |  | /* Determine the current locale's character encoding, and canonicalize it  | 
819  |  |    into one of the canonical names listed below.  | 
820  |  |    The result must not be freed; it is statically allocated.  The result  | 
821  |  |    becomes invalid when setlocale() is used to change the global locale, or  | 
822  |  |    when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG  | 
823  |  |    is changed; threads in multithreaded programs should not do this.  | 
824  |  |    If the canonical name cannot be determined, the result is a non-canonical  | 
825  |  |    name.  */  | 
826  |  |  | 
827  |  | #ifdef STATIC  | 
828  |  | STATIC  | 
829  |  | #endif  | 
830  |  | const char *  | 
831  |  | locale_charset (void)  | 
832  | 0  | { | 
833  | 0  |   const char *codeset;  | 
834  |  |  | 
835  |  |   /* This function must be multithread-safe.  To achieve this without using  | 
836  |  |      thread-local storage, we use a simple strcpy or memcpy to fill this static  | 
837  |  |      buffer.  Filling it through, for example, strcpy + strcat would not be  | 
838  |  |      guaranteed to leave the buffer's contents intact if another thread is  | 
839  |  |      currently accessing it.  If necessary, the contents is first assembled in  | 
840  |  |      a stack-allocated buffer.  */  | 
841  |  | 
  | 
842  | 0  | #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2  | 
843  |  | 
  | 
844  | 0  | # if HAVE_LANGINFO_CODESET  | 
845  |  |  | 
846  |  |   /* Most systems support nl_langinfo (CODESET) nowadays.  */  | 
847  | 0  |   codeset = nl_langinfo (CODESET);  | 
848  |  | 
  | 
849  |  | #  ifdef __CYGWIN__  | 
850  |  |   /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always  | 
851  |  |      returns "US-ASCII".  Return the suffix of the locale name from the  | 
852  |  |      environment variables (if present) or the codepage as a number.  */  | 
853  |  |   if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)  | 
854  |  |     { | 
855  |  |       const char *locale;  | 
856  |  |       static char resultbuf[2 + 10 + 1];  | 
857  |  |  | 
858  |  |       locale = getenv ("LC_ALL"); | 
859  |  |       if (locale == NULL || locale[0] == '\0')  | 
860  |  |         { | 
861  |  |           locale = getenv ("LC_CTYPE"); | 
862  |  |           if (locale == NULL || locale[0] == '\0')  | 
863  |  |             locale = getenv ("LANG"); | 
864  |  |         }  | 
865  |  |       if (locale != NULL && locale[0] != '\0')  | 
866  |  |         { | 
867  |  |           /* If the locale name contains an encoding after the dot, return  | 
868  |  |              it.  */  | 
869  |  |           const char *dot = strchr (locale, '.');  | 
870  |  |  | 
871  |  |           if (dot != NULL)  | 
872  |  |             { | 
873  |  |               const char *modifier;  | 
874  |  |  | 
875  |  |               dot++;  | 
876  |  |               /* Look for the possible @... trailer and remove it, if any.  */  | 
877  |  |               modifier = strchr (dot, '@');  | 
878  |  |               if (modifier == NULL)  | 
879  |  |                 return dot;  | 
880  |  |               if (modifier - dot < sizeof (resultbuf))  | 
881  |  |                 { | 
882  |  |                   /* This way of filling resultbuf is multithread-safe.  */  | 
883  |  |                   memcpy (resultbuf, dot, modifier - dot);  | 
884  |  |                   resultbuf [modifier - dot] = '\0';  | 
885  |  |                   return resultbuf;  | 
886  |  |                 }  | 
887  |  |             }  | 
888  |  |         }  | 
889  |  |  | 
890  |  |       /* The Windows API has a function returning the locale's codepage as a  | 
891  |  |          number: GetACP().  This encoding is used by Cygwin, unless the user  | 
892  |  |          has set the environment variable CYGWIN=codepage:oem (which very few  | 
893  |  |          people do).  | 
894  |  |          Output directed to console windows needs to be converted (to  | 
895  |  |          GetOEMCP() if the console is using a raster font, or to  | 
896  |  |          GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does  | 
897  |  |          this conversion transparently (see winsup/cygwin/fhandler_console.cc),  | 
898  |  |          converting to GetConsoleOutputCP().  This leads to correct results,  | 
899  |  |          except when SetConsoleOutputCP has been called and a raster font is  | 
900  |  |          in use.  */  | 
901  |  |       { | 
902  |  |         char buf[2 + 10 + 1];  | 
903  |  |  | 
904  |  |         sprintf (buf, "CP%u", GetACP ());  | 
905  |  |         strcpy (resultbuf, buf);  | 
906  |  |         codeset = resultbuf;  | 
907  |  |       }  | 
908  |  |     }  | 
909  |  | #  endif  | 
910  |  | 
  | 
911  | 0  |   if (codeset == NULL)  | 
912  |  |     /* The canonical name cannot be determined.  */  | 
913  | 0  |     codeset = "";  | 
914  |  | 
  | 
915  |  | # elif defined WINDOWS_NATIVE  | 
916  |  |  | 
917  |  |   char buf[2 + 10 + 1];  | 
918  |  |   static char resultbuf[2 + 10 + 1];  | 
919  |  |  | 
920  |  |   /* The Windows API has a function returning the locale's codepage as  | 
921  |  |      a number, but the value doesn't change according to what the  | 
922  |  |      'setlocale' call specified.  So we use it as a last resort, in  | 
923  |  |      case the string returned by 'setlocale' doesn't specify the  | 
924  |  |      codepage.  */  | 
925  |  |   char *current_locale = setlocale (LC_CTYPE, NULL);  | 
926  |  |   char *pdot = strrchr (current_locale, '.');  | 
927  |  |  | 
928  |  |   if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))  | 
929  |  |     sprintf (buf, "CP%s", pdot + 1);  | 
930  |  |   else  | 
931  |  |     { | 
932  |  |       /* The Windows API has a function returning the locale's codepage as a  | 
933  |  |          number: GetACP().  | 
934  |  |          When the output goes to a console window, it needs to be provided in  | 
935  |  |          GetOEMCP() encoding if the console is using a raster font, or in  | 
936  |  |          GetConsoleOutputCP() encoding if it is using a TrueType font.  | 
937  |  |          But in GUI programs and for output sent to files and pipes, GetACP()  | 
938  |  |          encoding is the best bet.  */  | 
939  |  |       sprintf (buf, "CP%u", GetACP ());  | 
940  |  |     }  | 
941  |  |   /* For a locale name such as "French_France.65001", in Windows 10,  | 
942  |  |      setlocale now returns "French_France.utf8" instead.  */  | 
943  |  |   if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)  | 
944  |  |     codeset = "UTF-8";  | 
945  |  |   else  | 
946  |  |     { | 
947  |  |       strcpy (resultbuf, buf);  | 
948  |  |       codeset = resultbuf;  | 
949  |  |     }  | 
950  |  |  | 
951  |  | # elif defined OS2  | 
952  |  |  | 
953  |  |   const char *locale;  | 
954  |  |   static char resultbuf[2 + 10 + 1];  | 
955  |  |   ULONG cp[3];  | 
956  |  |   ULONG cplen;  | 
957  |  |  | 
958  |  |   codeset = NULL;  | 
959  |  |  | 
960  |  |   /* Allow user to override the codeset, as set in the operating system,  | 
961  |  |      with standard language environment variables.  */  | 
962  |  |   locale = getenv ("LC_ALL"); | 
963  |  |   if (locale == NULL || locale[0] == '\0')  | 
964  |  |     { | 
965  |  |       locale = getenv ("LC_CTYPE"); | 
966  |  |       if (locale == NULL || locale[0] == '\0')  | 
967  |  |         locale = getenv ("LANG"); | 
968  |  |     }  | 
969  |  |   if (locale != NULL && locale[0] != '\0')  | 
970  |  |     { | 
971  |  |       /* If the locale name contains an encoding after the dot, return it.  */  | 
972  |  |       const char *dot = strchr (locale, '.');  | 
973  |  |  | 
974  |  |       if (dot != NULL)  | 
975  |  |         { | 
976  |  |           const char *modifier;  | 
977  |  |  | 
978  |  |           dot++;  | 
979  |  |           /* Look for the possible @... trailer and remove it, if any.  */  | 
980  |  |           modifier = strchr (dot, '@');  | 
981  |  |           if (modifier == NULL)  | 
982  |  |             return dot;  | 
983  |  |           if (modifier - dot < sizeof (resultbuf))  | 
984  |  |             { | 
985  |  |               /* This way of filling resultbuf is multithread-safe.  */  | 
986  |  |               memcpy (resultbuf, dot, modifier - dot);  | 
987  |  |               resultbuf [modifier - dot] = '\0';  | 
988  |  |               return resultbuf;  | 
989  |  |             }  | 
990  |  |         }  | 
991  |  |  | 
992  |  |       /* For the POSIX locale, don't use the system's codepage.  */  | 
993  |  |       if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)  | 
994  |  |         codeset = "";  | 
995  |  |     }  | 
996  |  |  | 
997  |  |   if (codeset == NULL)  | 
998  |  |     { | 
999  |  |       /* OS/2 has a function returning the locale's codepage as a number.  */  | 
1000  |  |       if (DosQueryCp (sizeof (cp), cp, &cplen))  | 
1001  |  |         codeset = "";  | 
1002  |  |       else  | 
1003  |  |         { | 
1004  |  |           char buf[2 + 10 + 1];  | 
1005  |  |  | 
1006  |  |           sprintf (buf, "CP%u", cp[0]);  | 
1007  |  |           strcpy (resultbuf, buf);  | 
1008  |  |           codeset = resultbuf;  | 
1009  |  |         }  | 
1010  |  |     }  | 
1011  |  |  | 
1012  |  | # else  | 
1013  |  |  | 
1014  |  | #  error "Add code for other platforms here."  | 
1015  |  |  | 
1016  |  | # endif  | 
1017  |  |  | 
1018  |  |   /* Resolve alias.  */  | 
1019  | 0  |   { | 
1020  |  | # ifdef alias_table_defined  | 
1021  |  |     /* On some platforms, UTF-8 locales are the most frequently used ones.  | 
1022  |  |        Speed up the common case and slow down the less common cases by  | 
1023  |  |        testing for this case first.  */  | 
1024  |  | #  if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__  | 
1025  |  |     if (strcmp (codeset, "UTF-8") == 0)  | 
1026  |  |       goto done_table_lookup;  | 
1027  |  |     else  | 
1028  |  | #  endif  | 
1029  |  |       { | 
1030  |  |         const struct table_entry * const table = alias_table;  | 
1031  |  |         size_t const table_size =  | 
1032  |  |           sizeof (alias_table) / sizeof (struct table_entry);  | 
1033  |  |         /* The table is sorted.  Perform a binary search.  */  | 
1034  |  |         size_t hi = table_size;  | 
1035  |  |         size_t lo = 0;  | 
1036  |  |         while (lo < hi)  | 
1037  |  |           { | 
1038  |  |             /* Invariant:  | 
1039  |  |                for i < lo, strcmp (table[i].alias, codeset) < 0,  | 
1040  |  |                for i >= hi, strcmp (table[i].alias, codeset) > 0.  */  | 
1041  |  |             size_t mid = (hi + lo) >> 1; /* >= lo, < hi */  | 
1042  |  |             int cmp = strcmp (table[mid].alias, codeset);  | 
1043  |  |             if (cmp < 0)  | 
1044  |  |               lo = mid + 1;  | 
1045  |  |             else if (cmp > 0)  | 
1046  |  |               hi = mid;  | 
1047  |  |             else  | 
1048  |  |               { | 
1049  |  |                 /* Found an i with  | 
1050  |  |                      strcmp (table[i].alias, codeset) == 0.  */  | 
1051  |  |                 codeset = table[mid].canonical;  | 
1052  |  |                 goto done_table_lookup;  | 
1053  |  |               }  | 
1054  |  |           }  | 
1055  |  |       }  | 
1056  |  |     if (0)  | 
1057  |  |       done_table_lookup: {} | 
1058  |  |     else  | 
1059  |  | # endif  | 
1060  | 0  |       { | 
1061  |  |         /* Did not find it in the table.  */  | 
1062  |  |         /* On Mac OS X, all modern locales use the UTF-8 encoding.  | 
1063  |  |            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */  | 
1064  |  | # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__  | 
1065  |  |         codeset = "UTF-8";  | 
1066  |  | # else  | 
1067  |  |         /* Don't return an empty string.  GNU libc and GNU libiconv interpret  | 
1068  |  |            the empty string as denoting "the locale's character encoding",  | 
1069  |  |            thus GNU libiconv would call this function a second time.  */  | 
1070  | 0  |         if (codeset[0] == '\0')  | 
1071  | 0  |           codeset = "ASCII";  | 
1072  | 0  | # endif  | 
1073  | 0  |       }  | 
1074  | 0  |   }  | 
1075  |  | 
  | 
1076  |  | #else  | 
1077  |  |  | 
1078  |  |   /* On old systems which lack it, use setlocale or getenv.  */  | 
1079  |  |   const char *locale = NULL;  | 
1080  |  |  | 
1081  |  |   /* But most old systems don't have a complete set of locales.  Some  | 
1082  |  |      (like DJGPP) have only the C locale.  Therefore we don't use setlocale  | 
1083  |  |      here; it would return "C" when it doesn't support the locale name the  | 
1084  |  |      user has set.  */  | 
1085  |  | # if 0  | 
1086  |  |   locale = setlocale (LC_CTYPE, NULL);  | 
1087  |  | # endif  | 
1088  |  |   if (locale == NULL || locale[0] == '\0')  | 
1089  |  |     { | 
1090  |  |       locale = getenv ("LC_ALL"); | 
1091  |  |       if (locale == NULL || locale[0] == '\0')  | 
1092  |  |         { | 
1093  |  |           locale = getenv ("LC_CTYPE"); | 
1094  |  |           if (locale == NULL || locale[0] == '\0')  | 
1095  |  |             locale = getenv ("LANG"); | 
1096  |  |             if (locale == NULL)  | 
1097  |  |               locale = "";  | 
1098  |  |         }  | 
1099  |  |     }  | 
1100  |  |  | 
1101  |  |   /* Map locale name to canonical encoding name.  */  | 
1102  |  |   { | 
1103  |  | # ifdef locale_table_defined  | 
1104  |  |     const struct table_entry * const table = locale_table;  | 
1105  |  |     size_t const table_size =  | 
1106  |  |       sizeof (locale_table) / sizeof (struct table_entry);  | 
1107  |  |     /* The table is sorted.  Perform a binary search.  */  | 
1108  |  |     size_t hi = table_size;  | 
1109  |  |     size_t lo = 0;  | 
1110  |  |     while (lo < hi)  | 
1111  |  |       { | 
1112  |  |         /* Invariant:  | 
1113  |  |            for i < lo, strcmp (table[i].locale, locale) < 0,  | 
1114  |  |            for i >= hi, strcmp (table[i].locale, locale) > 0.  */  | 
1115  |  |         size_t mid = (hi + lo) >> 1; /* >= lo, < hi */  | 
1116  |  |         int cmp = strcmp (table[mid].locale, locale);  | 
1117  |  |         if (cmp < 0)  | 
1118  |  |           lo = mid + 1;  | 
1119  |  |         else if (cmp > 0)  | 
1120  |  |           hi = mid;  | 
1121  |  |         else  | 
1122  |  |           { | 
1123  |  |             /* Found an i with  | 
1124  |  |                  strcmp (table[i].locale, locale) == 0.  */  | 
1125  |  |             codeset = table[mid].canonical;  | 
1126  |  |             goto done_table_lookup;  | 
1127  |  |           }  | 
1128  |  |       }  | 
1129  |  |     if (0)  | 
1130  |  |       done_table_lookup: ;  | 
1131  |  |     else  | 
1132  |  | # endif  | 
1133  |  |       { | 
1134  |  |         /* Did not find it in the table.  */  | 
1135  |  |         /* On Mac OS X, all modern locales use the UTF-8 encoding.  | 
1136  |  |            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */  | 
1137  |  | # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__  | 
1138  |  |         codeset = "UTF-8";  | 
1139  |  | # else  | 
1140  |  |         /* The canonical name cannot be determined.  */  | 
1141  |  |         /* Don't return an empty string.  GNU libc and GNU libiconv interpret  | 
1142  |  |            the empty string as denoting "the locale's character encoding",  | 
1143  |  |            thus GNU libiconv would call this function a second time.  */  | 
1144  |  |         codeset = "ASCII";  | 
1145  |  | # endif  | 
1146  |  |       }  | 
1147  |  |   }  | 
1148  |  |  | 
1149  |  | #endif  | 
1150  |  | 
  | 
1151  |  | #ifdef DARWIN7  | 
1152  |  |   /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"  | 
1153  |  |      (the default codeset) does not work when MB_CUR_MAX is 1.  */  | 
1154  |  |   if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)  | 
1155  |  |     codeset = "ASCII";  | 
1156  |  | #endif  | 
1157  |  | 
  | 
1158  | 0  |   return codeset;  | 
1159  | 0  | }  |