Coverage Report

Created: 2025-03-06 06:58

/src/wget/lib/localcharset.c
Line
Count
Source (jump to first uncovered line)
1
/* Determine a canonical name for the current locale's character encoding.
2
3
   Copyright (C) 2000-2006, 2008-2025 Free Software Foundation, Inc.
4
5
   This file is free software: you can redistribute it and/or modify
6
   it under the terms of the GNU Lesser General Public License as
7
   published by the Free Software Foundation; either version 2.1 of the
8
   License, or (at your option) any later version.
9
10
   This file is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU Lesser General Public License for more details.
14
15
   You should have received a copy of the GNU Lesser General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18
/* Written by Bruno Haible <bruno@clisp.org>.  */
19
20
#include <config.h>
21
22
/* Specification.  */
23
#include "localcharset.h"
24
25
#include <stddef.h>
26
#include <stdio.h>
27
#include <string.h>
28
#include <stdlib.h>
29
30
#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
31
# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
32
#endif
33
34
#if defined _WIN32 && !defined __CYGWIN__
35
# define WINDOWS_NATIVE
36
# include <locale.h>
37
#endif
38
39
#if defined __EMX__
40
/* Assume EMX program runs on OS/2, even if compiled under DOS.  */
41
# ifndef OS2
42
#  define OS2
43
# endif
44
#endif
45
46
#if !defined WINDOWS_NATIVE
47
# if HAVE_LANGINFO_CODESET
48
#  include <langinfo.h>
49
# else
50
#  if 0 /* see comment regarding use of setlocale(), below */
51
#   include <locale.h>
52
#  endif
53
# endif
54
# ifdef __CYGWIN__
55
#  define WIN32_LEAN_AND_MEAN
56
#  include <windows.h>
57
# endif
58
#elif defined WINDOWS_NATIVE
59
# define WIN32_LEAN_AND_MEAN
60
# include <windows.h>
61
  /* For the use of setlocale() below, the Gnulib override in setlocale.c is
62
     not needed; see the platform lists in setlocale_null.m4.  */
63
# undef setlocale
64
#endif
65
#if defined OS2
66
# define INCL_DOS
67
# include <os2.h>
68
#endif
69
70
/* For MB_CUR_MAX_L */
71
#if defined DARWIN7
72
# include <xlocale.h>
73
#endif
74
75
76
#if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
77
78
/* On these platforms, we use a mapping from non-canonical encoding name
79
   to GNU canonical encoding name.  */
80
81
/* With glibc-2.1 or newer, we don't need any canonicalization,
82
   because glibc has iconv and both glibc and libiconv support all
83
   GNU canonical names directly.  */
84
# if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
85
86
struct table_entry
87
{
88
  const char alias[11+1];
89
  const char canonical[11+1];
90
};
91
92
/* Table of platform-dependent mappings, sorted in ascending order.  */
93
static const struct table_entry alias_table[] =
94
  {
95
#  if defined __FreeBSD__                                   /* FreeBSD */
96
  /*{ "ARMSCII-8",  "ARMSCII-8" },*/
97
    { "Big5",       "BIG5" },
98
    { "C",          "ASCII" },
99
  /*{ "CP1131",     "CP1131" },*/
100
  /*{ "CP1251",     "CP1251" },*/
101
  /*{ "CP866",      "CP866" },*/
102
  /*{ "GB18030",    "GB18030" },*/
103
  /*{ "GB2312",     "GB2312" },*/
104
  /*{ "GBK",        "GBK" },*/
105
  /*{ "ISCII-DEV",  "?" },*/
106
    { "ISO8859-1",  "ISO-8859-1" },
107
    { "ISO8859-13", "ISO-8859-13" },
108
    { "ISO8859-15", "ISO-8859-15" },
109
    { "ISO8859-2",  "ISO-8859-2" },
110
    { "ISO8859-5",  "ISO-8859-5" },
111
    { "ISO8859-7",  "ISO-8859-7" },
112
    { "ISO8859-9",  "ISO-8859-9" },
113
  /*{ "KOI8-R",     "KOI8-R" },*/
114
  /*{ "KOI8-U",     "KOI8-U" },*/
115
    { "SJIS",       "SHIFT_JIS" },
116
    { "US-ASCII",   "ASCII" },
117
    { "eucCN",      "GB2312" },
118
    { "eucJP",      "EUC-JP" },
119
    { "eucKR",      "EUC-KR" }
120
#   define alias_table_defined
121
#  endif
122
#  if defined __NetBSD__                                    /* NetBSD */
123
    { "646",        "ASCII" },
124
  /*{ "ARMSCII-8",  "ARMSCII-8" },*/
125
  /*{ "BIG5",       "BIG5" },*/
126
    { "Big5-HKSCS", "BIG5-HKSCS" },
127
  /*{ "CP1251",     "CP1251" },*/
128
  /*{ "CP866",      "CP866" },*/
129
  /*{ "GB18030",    "GB18030" },*/
130
  /*{ "GB2312",     "GB2312" },*/
131
    { "ISO8859-1",  "ISO-8859-1" },
132
    { "ISO8859-13", "ISO-8859-13" },
133
    { "ISO8859-15", "ISO-8859-15" },
134
    { "ISO8859-2",  "ISO-8859-2" },
135
    { "ISO8859-4",  "ISO-8859-4" },
136
    { "ISO8859-5",  "ISO-8859-5" },
137
    { "ISO8859-7",  "ISO-8859-7" },
138
  /*{ "KOI8-R",     "KOI8-R" },*/
139
  /*{ "KOI8-U",     "KOI8-U" },*/
140
  /*{ "PT154",      "PT154" },*/
141
    { "SJIS",       "SHIFT_JIS" },
142
    { "eucCN",      "GB2312" },
143
    { "eucJP",      "EUC-JP" },
144
    { "eucKR",      "EUC-KR" },
145
    { "eucTW",      "EUC-TW" }
146
#   define alias_table_defined
147
#  endif
148
#  if defined __OpenBSD__                                   /* OpenBSD */
149
    { "646",        "ASCII" },
150
    { "ISO8859-1",  "ISO-8859-1" },
151
    { "ISO8859-13", "ISO-8859-13" },
152
    { "ISO8859-15", "ISO-8859-15" },
153
    { "ISO8859-2",  "ISO-8859-2" },
154
    { "ISO8859-4",  "ISO-8859-4" },
155
    { "ISO8859-5",  "ISO-8859-5" },
156
    { "ISO8859-7",  "ISO-8859-7" },
157
    { "US-ASCII",   "ASCII" }
158
#   define alias_table_defined
159
#  endif
160
#  if defined __APPLE__ && defined __MACH__                 /* Mac OS X */
161
    /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
162
       useless:
163
       - It returns the empty string when LANG is set to a locale of the
164
         form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
165
         LC_CTYPE file.
166
       - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
167
         the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
168
       - The documentation says:
169
           "... all code that calls BSD system routines should ensure
170
            that the const *char parameters of these routines are in UTF-8
171
            encoding. All BSD system functions expect their string
172
            parameters to be in UTF-8 encoding and nothing else."
173
         It also says
174
           "An additional caveat is that string parameters for files,
175
            paths, and other file-system entities must be in canonical
176
            UTF-8. In a canonical UTF-8 Unicode string, all decomposable
177
            characters are decomposed ..."
178
         but this is not true: You can pass non-decomposed UTF-8 strings
179
         to file system functions, and it is the OS which will convert
180
         them to decomposed UTF-8 before accessing the file system.
181
       - The Apple Terminal application displays UTF-8 by default.
182
       - However, other applications are free to use different encodings:
183
         - xterm uses ISO-8859-1 by default.
184
         - TextEdit uses MacRoman by default.
185
       We prefer UTF-8 over decomposed UTF-8-MAC because one should
186
       minimize the use of decomposed Unicode. Unfortunately, through the
187
       Darwin file system, decomposed UTF-8 strings are leaked into user
188
       space nevertheless.
189
       Then there are also the locales with encodings other than US-ASCII
190
       and UTF-8. These locales can be occasionally useful to users (e.g.
191
       when grepping through ISO-8859-1 encoded text files), when all their
192
       file names are in US-ASCII.
193
     */
194
    { "ARMSCII-8",  "ARMSCII-8" },
195
    { "Big5",       "BIG5" },
196
    { "Big5HKSCS",  "BIG5-HKSCS" },
197
    { "CP1131",     "CP1131" },
198
    { "CP1251",     "CP1251" },
199
    { "CP866",      "CP866" },
200
    { "CP949",      "CP949" },
201
    { "GB18030",    "GB18030" },
202
    { "GB2312",     "GB2312" },
203
    { "GBK",        "GBK" },
204
  /*{ "ISCII-DEV",  "?" },*/
205
    { "ISO8859-1",  "ISO-8859-1" },
206
    { "ISO8859-13", "ISO-8859-13" },
207
    { "ISO8859-15", "ISO-8859-15" },
208
    { "ISO8859-2",  "ISO-8859-2" },
209
    { "ISO8859-4",  "ISO-8859-4" },
210
    { "ISO8859-5",  "ISO-8859-5" },
211
    { "ISO8859-7",  "ISO-8859-7" },
212
    { "ISO8859-9",  "ISO-8859-9" },
213
    { "KOI8-R",     "KOI8-R" },
214
    { "KOI8-U",     "KOI8-U" },
215
    { "PT154",      "PT154" },
216
    { "SJIS",       "SHIFT_JIS" },
217
    { "eucCN",      "GB2312" },
218
    { "eucJP",      "EUC-JP" },
219
    { "eucKR",      "EUC-KR" }
220
#   define alias_table_defined
221
#  endif
222
#  if defined _AIX                                          /* AIX */
223
  /*{ "GBK",        "GBK" },*/
224
    { "IBM-1046",   "CP1046" },
225
    { "IBM-1124",   "CP1124" },
226
    { "IBM-1129",   "CP1129" },
227
    { "IBM-1252",   "CP1252" },
228
    { "IBM-850",    "CP850" },
229
    { "IBM-856",    "CP856" },
230
    { "IBM-921",    "ISO-8859-13" },
231
    { "IBM-922",    "CP922" },
232
    { "IBM-932",    "CP932" },
233
    { "IBM-943",    "CP943" },
234
    { "IBM-eucCN",  "GB2312" },
235
    { "IBM-eucJP",  "EUC-JP" },
236
    { "IBM-eucKR",  "EUC-KR" },
237
    { "IBM-eucTW",  "EUC-TW" },
238
    { "ISO8859-1",  "ISO-8859-1" },
239
    { "ISO8859-15", "ISO-8859-15" },
240
    { "ISO8859-2",  "ISO-8859-2" },
241
    { "ISO8859-5",  "ISO-8859-5" },
242
    { "ISO8859-6",  "ISO-8859-6" },
243
    { "ISO8859-7",  "ISO-8859-7" },
244
    { "ISO8859-8",  "ISO-8859-8" },
245
    { "ISO8859-9",  "ISO-8859-9" },
246
    { "TIS-620",    "TIS-620" },
247
  /*{ "UTF-8",      "UTF-8" },*/
248
    { "big5",       "BIG5" }
249
#   define alias_table_defined
250
#  endif
251
#  if defined __hpux                                        /* HP-UX */
252
    { "SJIS",      "SHIFT_JIS" },
253
    { "arabic8",   "HP-ARABIC8" },
254
    { "big5",      "BIG5" },
255
    { "cp1251",    "CP1251" },
256
    { "eucJP",     "EUC-JP" },
257
    { "eucKR",     "EUC-KR" },
258
    { "eucTW",     "EUC-TW" },
259
    { "gb18030",   "GB18030" },
260
    { "greek8",    "HP-GREEK8" },
261
    { "hebrew8",   "HP-HEBREW8" },
262
    { "hkbig5",    "BIG5-HKSCS" },
263
    { "hp15CN",    "GB2312" },
264
    { "iso88591",  "ISO-8859-1" },
265
    { "iso885913", "ISO-8859-13" },
266
    { "iso885915", "ISO-8859-15" },
267
    { "iso88592",  "ISO-8859-2" },
268
    { "iso88594",  "ISO-8859-4" },
269
    { "iso88595",  "ISO-8859-5" },
270
    { "iso88596",  "ISO-8859-6" },
271
    { "iso88597",  "ISO-8859-7" },
272
    { "iso88598",  "ISO-8859-8" },
273
    { "iso88599",  "ISO-8859-9" },
274
    { "kana8",     "HP-KANA8" },
275
    { "koi8r",     "KOI8-R" },
276
    { "roman8",    "HP-ROMAN8" },
277
    { "tis620",    "TIS-620" },
278
    { "turkish8",  "HP-TURKISH8" },
279
    { "utf8",      "UTF-8" }
280
#   define alias_table_defined
281
#  endif
282
#  if defined __sgi                                         /* IRIX */
283
    { "ISO8859-1",  "ISO-8859-1" },
284
    { "ISO8859-15", "ISO-8859-15" },
285
    { "ISO8859-2",  "ISO-8859-2" },
286
    { "ISO8859-5",  "ISO-8859-5" },
287
    { "ISO8859-7",  "ISO-8859-7" },
288
    { "ISO8859-9",  "ISO-8859-9" },
289
    { "eucCN",      "GB2312" },
290
    { "eucJP",      "EUC-JP" },
291
    { "eucKR",      "EUC-KR" },
292
    { "eucTW",      "EUC-TW" }
293
#   define alias_table_defined
294
#  endif
295
#  if defined __osf__                                       /* OSF/1 */
296
  /*{ "GBK",        "GBK" },*/
297
    { "ISO8859-1",  "ISO-8859-1" },
298
    { "ISO8859-15", "ISO-8859-15" },
299
    { "ISO8859-2",  "ISO-8859-2" },
300
    { "ISO8859-4",  "ISO-8859-4" },
301
    { "ISO8859-5",  "ISO-8859-5" },
302
    { "ISO8859-7",  "ISO-8859-7" },
303
    { "ISO8859-8",  "ISO-8859-8" },
304
    { "ISO8859-9",  "ISO-8859-9" },
305
    { "KSC5601",    "CP949" },
306
    { "SJIS",       "SHIFT_JIS" },
307
    { "TACTIS",     "TIS-620" },
308
  /*{ "UTF-8",      "UTF-8" },*/
309
    { "big5",       "BIG5" },
310
    { "cp850",      "CP850" },
311
    { "dechanyu",   "DEC-HANYU" },
312
    { "dechanzi",   "GB2312" },
313
    { "deckanji",   "DEC-KANJI" },
314
    { "deckorean",  "EUC-KR" },
315
    { "eucJP",      "EUC-JP" },
316
    { "eucKR",      "EUC-KR" },
317
    { "eucTW",      "EUC-TW" },
318
    { "sdeckanji",  "EUC-JP" }
319
#   define alias_table_defined
320
#  endif
321
#  if defined __sun                                         /* Solaris */
322
    { "5601",        "EUC-KR" },
323
    { "646",         "ASCII" },
324
  /*{ "BIG5",        "BIG5" },*/
325
    { "Big5-HKSCS",  "BIG5-HKSCS" },
326
    { "GB18030",     "GB18030" },
327
  /*{ "GBK",         "GBK" },*/
328
    { "ISO8859-1",   "ISO-8859-1" },
329
    { "ISO8859-11",  "TIS-620" },
330
    { "ISO8859-13",  "ISO-8859-13" },
331
    { "ISO8859-15",  "ISO-8859-15" },
332
    { "ISO8859-2",   "ISO-8859-2" },
333
    { "ISO8859-3",   "ISO-8859-3" },
334
    { "ISO8859-4",   "ISO-8859-4" },
335
    { "ISO8859-5",   "ISO-8859-5" },
336
    { "ISO8859-6",   "ISO-8859-6" },
337
    { "ISO8859-7",   "ISO-8859-7" },
338
    { "ISO8859-8",   "ISO-8859-8" },
339
    { "ISO8859-9",   "ISO-8859-9" },
340
    { "PCK",         "SHIFT_JIS" },
341
    { "TIS620.2533", "TIS-620" },
342
  /*{ "UTF-8",       "UTF-8" },*/
343
    { "ansi-1251",   "CP1251" },
344
    { "cns11643",    "EUC-TW" },
345
    { "eucJP",       "EUC-JP" },
346
    { "gb2312",      "GB2312" },
347
    { "koi8-r",      "KOI8-R" }
348
#   define alias_table_defined
349
#  endif
350
#  if defined __minix                                       /* Minix */
351
    { "646", "ASCII" }
352
#   define alias_table_defined
353
#  endif
354
#  if defined WINDOWS_NATIVE || defined __CYGWIN__          /* Windows */
355
    { "CP1361",  "JOHAB" },
356
    { "CP20127", "ASCII" },
357
    { "CP20866", "KOI8-R" },
358
    { "CP20936", "GB2312" },
359
    { "CP21866", "KOI8-RU" },
360
    { "CP28591", "ISO-8859-1" },
361
    { "CP28592", "ISO-8859-2" },
362
    { "CP28593", "ISO-8859-3" },
363
    { "CP28594", "ISO-8859-4" },
364
    { "CP28595", "ISO-8859-5" },
365
    { "CP28596", "ISO-8859-6" },
366
    { "CP28597", "ISO-8859-7" },
367
    { "CP28598", "ISO-8859-8" },
368
    { "CP28599", "ISO-8859-9" },
369
    { "CP28605", "ISO-8859-15" },
370
    { "CP38598", "ISO-8859-8" },
371
    { "CP51932", "EUC-JP" },
372
    { "CP51936", "GB2312" },
373
    { "CP51949", "EUC-KR" },
374
    { "CP51950", "EUC-TW" },
375
    { "CP54936", "GB18030" },
376
    { "CP65001", "UTF-8" },
377
    { "CP936",   "GBK" }
378
#   define alias_table_defined
379
#  endif
380
#  if defined OS2                                           /* OS/2 */
381
    /* The list of encodings is taken from "List of OS/2 Codepages"
382
       by Alex Taylor:
383
       <https://altsan.org/os2/toolkits/uls/index.html#codepages>.
384
       See also "__convcp() of kLIBC":
385
       <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>.  */
386
    { "CP1004",        "CP1252" },
387
  /*{ "CP1041",        "CP943" },*/
388
  /*{ "CP1088",        "CP949" },*/
389
    { "CP1089",        "ISO-8859-6" },
390
  /*{ "CP1114",        "CP950" },*/
391
  /*{ "CP1115",        "GB2312" },*/
392
    { "CP1208",        "UTF-8" },
393
  /*{ "CP1380",        "GB2312" },*/
394
    { "CP1381",        "GB2312" },
395
    { "CP1383",        "GB2312" },
396
    { "CP1386",        "GBK" },
397
  /*{ "CP301",         "CP943" },*/
398
    { "CP3372",        "EUC-JP" },
399
    { "CP4946",        "CP850" },
400
  /*{ "CP5048",        "JIS_X0208-1990" },*/
401
  /*{ "CP5049",        "JIS_X0212-1990" },*/
402
  /*{ "CP5067",        "KS_C_5601-1987" },*/
403
    { "CP813",         "ISO-8859-7" },
404
    { "CP819",         "ISO-8859-1" },
405
    { "CP878",         "KOI8-R" },
406
  /*{ "CP897",         "CP943" },*/
407
    { "CP912",         "ISO-8859-2" },
408
    { "CP913",         "ISO-8859-3" },
409
    { "CP914",         "ISO-8859-4" },
410
    { "CP915",         "ISO-8859-5" },
411
    { "CP916",         "ISO-8859-8" },
412
    { "CP920",         "ISO-8859-9" },
413
    { "CP921",         "ISO-8859-13" },
414
    { "CP923",         "ISO-8859-15" },
415
  /*{ "CP941",         "CP943" },*/
416
  /*{ "CP947",         "CP950" },*/
417
  /*{ "CP951",         "CP949" },*/
418
  /*{ "CP952",         "JIS_X0208-1990" },*/
419
  /*{ "CP953",         "JIS_X0212-1990" },*/
420
    { "CP954",         "EUC-JP" },
421
    { "CP964",         "EUC-TW" },
422
    { "CP970",         "EUC-KR" },
423
  /*{ "CP971",         "KS_C_5601-1987" },*/
424
    { "IBM-1004",      "CP1252" },
425
  /*{ "IBM-1006",      "?" },*/
426
  /*{ "IBM-1008",      "?" },*/
427
  /*{ "IBM-1041",      "CP943" },*/
428
  /*{ "IBM-1051",      "?" },*/
429
  /*{ "IBM-1088",      "CP949" },*/
430
    { "IBM-1089",      "ISO-8859-6" },
431
  /*{ "IBM-1098",      "?" },*/
432
  /*{ "IBM-1114",      "CP950" },*/
433
  /*{ "IBM-1115",      "GB2312" },*/
434
  /*{ "IBM-1116",      "?" },*/
435
  /*{ "IBM-1117",      "?" },*/
436
  /*{ "IBM-1118",      "?" },*/
437
  /*{ "IBM-1119",      "?" },*/
438
    { "IBM-1124",      "CP1124" },
439
    { "IBM-1125",      "CP1125" },
440
    { "IBM-1131",      "CP1131" },
441
    { "IBM-1208",      "UTF-8" },
442
    { "IBM-1250",      "CP1250" },
443
    { "IBM-1251",      "CP1251" },
444
    { "IBM-1252",      "CP1252" },
445
    { "IBM-1253",      "CP1253" },
446
    { "IBM-1254",      "CP1254" },
447
    { "IBM-1255",      "CP1255" },
448
    { "IBM-1256",      "CP1256" },
449
    { "IBM-1257",      "CP1257" },
450
  /*{ "IBM-1275",      "?" },*/
451
  /*{ "IBM-1276",      "?" },*/
452
  /*{ "IBM-1277",      "?" },*/
453
  /*{ "IBM-1280",      "?" },*/
454
  /*{ "IBM-1281",      "?" },*/
455
  /*{ "IBM-1282",      "?" },*/
456
  /*{ "IBM-1283",      "?" },*/
457
  /*{ "IBM-1380",      "GB2312" },*/
458
    { "IBM-1381",      "GB2312" },
459
    { "IBM-1383",      "GB2312" },
460
    { "IBM-1386",      "GBK" },
461
  /*{ "IBM-301",       "CP943" },*/
462
    { "IBM-3372",      "EUC-JP" },
463
    { "IBM-367",       "ASCII" },
464
    { "IBM-437",       "CP437" },
465
    { "IBM-4946",      "CP850" },
466
  /*{ "IBM-5048",      "JIS_X0208-1990" },*/
467
  /*{ "IBM-5049",      "JIS_X0212-1990" },*/
468
  /*{ "IBM-5067",      "KS_C_5601-1987" },*/
469
    { "IBM-813",       "ISO-8859-7" },
470
    { "IBM-819",       "ISO-8859-1" },
471
    { "IBM-850",       "CP850" },
472
  /*{ "IBM-851",       "?" },*/
473
    { "IBM-852",       "CP852" },
474
    { "IBM-855",       "CP855" },
475
    { "IBM-856",       "CP856" },
476
    { "IBM-857",       "CP857" },
477
  /*{ "IBM-859",       "?" },*/
478
    { "IBM-860",       "CP860" },
479
    { "IBM-861",       "CP861" },
480
    { "IBM-862",       "CP862" },
481
    { "IBM-863",       "CP863" },
482
    { "IBM-864",       "CP864" },
483
    { "IBM-865",       "CP865" },
484
    { "IBM-866",       "CP866" },
485
  /*{ "IBM-868",       "?" },*/
486
    { "IBM-869",       "CP869" },
487
    { "IBM-874",       "CP874" },
488
    { "IBM-878",       "KOI8-R" },
489
  /*{ "IBM-895",       "?" },*/
490
  /*{ "IBM-897",       "CP943" },*/
491
  /*{ "IBM-907",       "?" },*/
492
  /*{ "IBM-909",       "?" },*/
493
    { "IBM-912",       "ISO-8859-2" },
494
    { "IBM-913",       "ISO-8859-3" },
495
    { "IBM-914",       "ISO-8859-4" },
496
    { "IBM-915",       "ISO-8859-5" },
497
    { "IBM-916",       "ISO-8859-8" },
498
    { "IBM-920",       "ISO-8859-9" },
499
    { "IBM-921",       "ISO-8859-13" },
500
    { "IBM-922",       "CP922" },
501
    { "IBM-923",       "ISO-8859-15" },
502
    { "IBM-932",       "CP932" },
503
  /*{ "IBM-941",       "CP943" },*/
504
  /*{ "IBM-942",       "?" },*/
505
    { "IBM-943",       "CP943" },
506
  /*{ "IBM-947",       "CP950" },*/
507
    { "IBM-949",       "CP949" },
508
    { "IBM-950",       "CP950" },
509
  /*{ "IBM-951",       "CP949" },*/
510
  /*{ "IBM-952",       "JIS_X0208-1990" },*/
511
  /*{ "IBM-953",       "JIS_X0212-1990" },*/
512
    { "IBM-954",       "EUC-JP" },
513
  /*{ "IBM-955",       "?" },*/
514
    { "IBM-964",       "EUC-TW" },
515
    { "IBM-970",       "EUC-KR" },
516
  /*{ "IBM-971",       "KS_C_5601-1987" },*/
517
    { "IBM-eucCN",     "GB2312" },
518
    { "IBM-eucJP",     "EUC-JP" },
519
    { "IBM-eucKR",     "EUC-KR" },
520
    { "IBM-eucTW",     "EUC-TW" },
521
    { "IBM33722",      "EUC-JP" },
522
    { "ISO8859-1",     "ISO-8859-1" },
523
    { "ISO8859-2",     "ISO-8859-2" },
524
    { "ISO8859-3",     "ISO-8859-3" },
525
    { "ISO8859-4",     "ISO-8859-4" },
526
    { "ISO8859-5",     "ISO-8859-5" },
527
    { "ISO8859-6",     "ISO-8859-6" },
528
    { "ISO8859-7",     "ISO-8859-7" },
529
    { "ISO8859-8",     "ISO-8859-8" },
530
    { "ISO8859-9",     "ISO-8859-9" },
531
  /*{ "JISX0201-1976", "JISX0201-1976" },*/
532
  /*{ "JISX0208-1978", "?" },*/
533
  /*{ "JISX0208-1983", "JIS_X0208-1983" },*/
534
  /*{ "JISX0208-1990", "JIS_X0208-1990" },*/
535
  /*{ "JISX0212-1990", "JIS_X0212-1990" },*/
536
  /*{ "KSC5601-1987",  "KS_C_5601-1987" },*/
537
    { "SJIS-1",        "CP943" },
538
    { "SJIS-2",        "CP943" },
539
    { "eucJP",         "EUC-JP" },
540
    { "eucKR",         "EUC-KR" },
541
    { "eucTW-1993",    "EUC-TW" }
542
#   define alias_table_defined
543
#  endif
544
#  if defined VMS                                           /* OpenVMS */
545
    /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
546
       "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
547
       section 10.7 "Handling Different Character Sets".  */
548
    { "DECHANYU",  "DEC-HANYU" },
549
    { "DECHANZI",  "GB2312" },
550
    { "DECKANJI",  "DEC-KANJI" },
551
    { "DECKOREAN", "EUC-KR" },
552
    { "ISO8859-1", "ISO-8859-1" },
553
    { "ISO8859-2", "ISO-8859-2" },
554
    { "ISO8859-5", "ISO-8859-5" },
555
    { "ISO8859-7", "ISO-8859-7" },
556
    { "ISO8859-8", "ISO-8859-8" },
557
    { "ISO8859-9", "ISO-8859-9" },
558
    { "SDECKANJI", "EUC-JP" },
559
    { "SJIS",      "SHIFT_JIS" },
560
    { "eucJP",     "EUC-JP" },
561
    { "eucTW",     "EUC-TW" }
562
#   define alias_table_defined
563
#  endif
564
#  ifndef alias_table_defined
565
    /* Just a dummy entry, to avoid a C syntax error.  */
566
    { "", "" }
567
#  endif
568
  };
569
570
# endif
571
572
#else
573
574
/* On these platforms, we use a mapping from locale name to GNU canonical
575
   encoding name.  */
576
577
struct table_entry
578
{
579
  const char locale[17+1];
580
  const char canonical[11+1];
581
};
582
583
/* Table of platform-dependent mappings, sorted in ascending order.  */
584
static const struct table_entry locale_table[] =
585
  {
586
# if defined __FreeBSD__                                    /* FreeBSD 4.2 */
587
    { "cs_CZ.ISO_8859-2",  "ISO-8859-2" },
588
    { "da_DK.DIS_8859-15", "ISO-8859-15" },
589
    { "da_DK.ISO_8859-1",  "ISO-8859-1" },
590
    { "de_AT.DIS_8859-15", "ISO-8859-15" },
591
    { "de_AT.ISO_8859-1",  "ISO-8859-1" },
592
    { "de_CH.DIS_8859-15", "ISO-8859-15" },
593
    { "de_CH.ISO_8859-1",  "ISO-8859-1" },
594
    { "de_DE.DIS_8859-15", "ISO-8859-15" },
595
    { "de_DE.ISO_8859-1",  "ISO-8859-1" },
596
    { "en_AU.DIS_8859-15", "ISO-8859-15" },
597
    { "en_AU.ISO_8859-1",  "ISO-8859-1" },
598
    { "en_CA.DIS_8859-15", "ISO-8859-15" },
599
    { "en_CA.ISO_8859-1",  "ISO-8859-1" },
600
    { "en_GB.DIS_8859-15", "ISO-8859-15" },
601
    { "en_GB.ISO_8859-1",  "ISO-8859-1" },
602
    { "en_US.DIS_8859-15", "ISO-8859-15" },
603
    { "en_US.ISO_8859-1",  "ISO-8859-1" },
604
    { "es_ES.DIS_8859-15", "ISO-8859-15" },
605
    { "es_ES.ISO_8859-1",  "ISO-8859-1" },
606
    { "fi_FI.DIS_8859-15", "ISO-8859-15" },
607
    { "fi_FI.ISO_8859-1",  "ISO-8859-1" },
608
    { "fr_BE.DIS_8859-15", "ISO-8859-15" },
609
    { "fr_BE.ISO_8859-1",  "ISO-8859-1" },
610
    { "fr_CA.DIS_8859-15", "ISO-8859-15" },
611
    { "fr_CA.ISO_8859-1",  "ISO-8859-1" },
612
    { "fr_CH.DIS_8859-15", "ISO-8859-15" },
613
    { "fr_CH.ISO_8859-1",  "ISO-8859-1" },
614
    { "fr_FR.DIS_8859-15", "ISO-8859-15" },
615
    { "fr_FR.ISO_8859-1",  "ISO-8859-1" },
616
    { "hr_HR.ISO_8859-2",  "ISO-8859-2" },
617
    { "hu_HU.ISO_8859-2",  "ISO-8859-2" },
618
    { "is_IS.DIS_8859-15", "ISO-8859-15" },
619
    { "is_IS.ISO_8859-1",  "ISO-8859-1" },
620
    { "it_CH.DIS_8859-15", "ISO-8859-15" },
621
    { "it_CH.ISO_8859-1",  "ISO-8859-1" },
622
    { "it_IT.DIS_8859-15", "ISO-8859-15" },
623
    { "it_IT.ISO_8859-1",  "ISO-8859-1" },
624
    { "ja_JP.EUC",         "EUC-JP" },
625
    { "ja_JP.SJIS",        "SHIFT_JIS" },
626
    { "ja_JP.Shift_JIS",   "SHIFT_JIS" },
627
    { "ko_KR.EUC",         "EUC-KR" },
628
    { "la_LN.ASCII",       "ASCII" },
629
    { "la_LN.DIS_8859-15", "ISO-8859-15" },
630
    { "la_LN.ISO_8859-1",  "ISO-8859-1" },
631
    { "la_LN.ISO_8859-2",  "ISO-8859-2" },
632
    { "la_LN.ISO_8859-4",  "ISO-8859-4" },
633
    { "lt_LN.ASCII",       "ASCII" },
634
    { "lt_LN.DIS_8859-15", "ISO-8859-15" },
635
    { "lt_LN.ISO_8859-1",  "ISO-8859-1" },
636
    { "lt_LN.ISO_8859-2",  "ISO-8859-2" },
637
    { "lt_LT.ISO_8859-4",  "ISO-8859-4" },
638
    { "nl_BE.DIS_8859-15", "ISO-8859-15" },
639
    { "nl_BE.ISO_8859-1",  "ISO-8859-1" },
640
    { "nl_NL.DIS_8859-15", "ISO-8859-15" },
641
    { "nl_NL.ISO_8859-1",  "ISO-8859-1" },
642
    { "no_NO.DIS_8859-15", "ISO-8859-15" },
643
    { "no_NO.ISO_8859-1",  "ISO-8859-1" },
644
    { "pl_PL.ISO_8859-2",  "ISO-8859-2" },
645
    { "pt_PT.DIS_8859-15", "ISO-8859-15" },
646
    { "pt_PT.ISO_8859-1",  "ISO-8859-1" },
647
    { "ru_RU.CP866",       "CP866" },
648
    { "ru_RU.ISO_8859-5",  "ISO-8859-5" },
649
    { "ru_RU.KOI8-R",      "KOI8-R" },
650
    { "ru_SU.CP866",       "CP866" },
651
    { "ru_SU.ISO_8859-5",  "ISO-8859-5" },
652
    { "ru_SU.KOI8-R",      "KOI8-R" },
653
    { "sl_SI.ISO_8859-2",  "ISO-8859-2" },
654
    { "sv_SE.DIS_8859-15", "ISO-8859-15" },
655
    { "sv_SE.ISO_8859-1",  "ISO-8859-1" },
656
    { "uk_UA.KOI8-U",      "KOI8-U" },
657
    { "zh_CN.EUC",         "GB2312" },
658
    { "zh_TW.BIG5",        "BIG5" },
659
    { "zh_TW.Big5",        "BIG5" }
660
#  define locale_table_defined
661
# endif
662
# if defined __DJGPP__                                      /* DOS / DJGPP 2.03 */
663
    /* The encodings given here may not all be correct.
664
       If you find that the encoding given for your language and
665
       country is not the one your DOS machine actually uses, just
666
       correct it in this file, and send a mail to
667
       Juan Manuel Guerrero <juan.guerrero@gmx.de>
668
       and <bug-gnulib@gnu.org>.  */
669
    { "C",     "ASCII" },
670
    { "ar",    "CP864" },
671
    { "ar_AE", "CP864" },
672
    { "ar_DZ", "CP864" },
673
    { "ar_EG", "CP864" },
674
    { "ar_IQ", "CP864" },
675
    { "ar_IR", "CP864" },
676
    { "ar_JO", "CP864" },
677
    { "ar_KW", "CP864" },
678
    { "ar_MA", "CP864" },
679
    { "ar_OM", "CP864" },
680
    { "ar_QA", "CP864" },
681
    { "ar_SA", "CP864" },
682
    { "ar_SY", "CP864" },
683
    { "be",    "CP866" },
684
    { "be_BE", "CP866" },
685
    { "bg",    "CP866" }, /* not CP855 ?? */
686
    { "bg_BG", "CP866" }, /* not CP855 ?? */
687
    { "ca",    "CP850" },
688
    { "ca_ES", "CP850" },
689
    { "cs",    "CP852" },
690
    { "cs_CZ", "CP852" },
691
    { "da",    "CP865" }, /* not CP850 ?? */
692
    { "da_DK", "CP865" }, /* not CP850 ?? */
693
    { "de",    "CP850" },
694
    { "de_AT", "CP850" },
695
    { "de_CH", "CP850" },
696
    { "de_DE", "CP850" },
697
    { "el",    "CP869" },
698
    { "el_GR", "CP869" },
699
    { "en",    "CP850" },
700
    { "en_AU", "CP850" }, /* not CP437 ?? */
701
    { "en_CA", "CP850" },
702
    { "en_GB", "CP850" },
703
    { "en_NZ", "CP437" },
704
    { "en_US", "CP437" },
705
    { "en_ZA", "CP850" }, /* not CP437 ?? */
706
    { "eo",    "CP850" },
707
    { "eo_EO", "CP850" },
708
    { "es",    "CP850" },
709
    { "es_AR", "CP850" },
710
    { "es_BO", "CP850" },
711
    { "es_CL", "CP850" },
712
    { "es_CO", "CP850" },
713
    { "es_CR", "CP850" },
714
    { "es_CU", "CP850" },
715
    { "es_DO", "CP850" },
716
    { "es_EC", "CP850" },
717
    { "es_ES", "CP850" },
718
    { "es_GT", "CP850" },
719
    { "es_HN", "CP850" },
720
    { "es_MX", "CP850" },
721
    { "es_NI", "CP850" },
722
    { "es_PA", "CP850" },
723
    { "es_PE", "CP850" },
724
    { "es_PY", "CP850" },
725
    { "es_SV", "CP850" },
726
    { "es_UY", "CP850" },
727
    { "es_VE", "CP850" },
728
    { "et",    "CP850" },
729
    { "et_EE", "CP850" },
730
    { "eu",    "CP850" },
731
    { "eu_ES", "CP850" },
732
    { "fi",    "CP850" },
733
    { "fi_FI", "CP850" },
734
    { "fr",    "CP850" },
735
    { "fr_BE", "CP850" },
736
    { "fr_CA", "CP850" },
737
    { "fr_CH", "CP850" },
738
    { "fr_FR", "CP850" },
739
    { "ga",    "CP850" },
740
    { "ga_IE", "CP850" },
741
    { "gd",    "CP850" },
742
    { "gd_GB", "CP850" },
743
    { "gl",    "CP850" },
744
    { "gl_ES", "CP850" },
745
    { "he",    "CP862" },
746
    { "he_IL", "CP862" },
747
    { "hr",    "CP852" },
748
    { "hr_HR", "CP852" },
749
    { "hu",    "CP852" },
750
    { "hu_HU", "CP852" },
751
    { "id",    "CP850" }, /* not CP437 ?? */
752
    { "id_ID", "CP850" }, /* not CP437 ?? */
753
    { "is",    "CP861" }, /* not CP850 ?? */
754
    { "is_IS", "CP861" }, /* not CP850 ?? */
755
    { "it",    "CP850" },
756
    { "it_CH", "CP850" },
757
    { "it_IT", "CP850" },
758
    { "ja",    "CP932" },
759
    { "ja_JP", "CP932" },
760
    { "kr",    "CP949" }, /* not CP934 ?? */
761
    { "kr_KR", "CP949" }, /* not CP934 ?? */
762
    { "lt",    "CP775" },
763
    { "lt_LT", "CP775" },
764
    { "lv",    "CP775" },
765
    { "lv_LV", "CP775" },
766
    { "mk",    "CP866" }, /* not CP855 ?? */
767
    { "mk_MK", "CP866" }, /* not CP855 ?? */
768
    { "mt",    "CP850" },
769
    { "mt_MT", "CP850" },
770
    { "nb",    "CP865" }, /* not CP850 ?? */
771
    { "nb_NO", "CP865" }, /* not CP850 ?? */
772
    { "nl",    "CP850" },
773
    { "nl_BE", "CP850" },
774
    { "nl_NL", "CP850" },
775
    { "nn",    "CP865" }, /* not CP850 ?? */
776
    { "nn_NO", "CP865" }, /* not CP850 ?? */
777
    { "no",    "CP865" }, /* not CP850 ?? */
778
    { "no_NO", "CP865" }, /* not CP850 ?? */
779
    { "pl",    "CP852" },
780
    { "pl_PL", "CP852" },
781
    { "pt",    "CP850" },
782
    { "pt_BR", "CP850" },
783
    { "pt_PT", "CP850" },
784
    { "ro",    "CP852" },
785
    { "ro_RO", "CP852" },
786
    { "ru",    "CP866" },
787
    { "ru_RU", "CP866" },
788
    { "sk",    "CP852" },
789
    { "sk_SK", "CP852" },
790
    { "sl",    "CP852" },
791
    { "sl_SI", "CP852" },
792
    { "sq",    "CP852" },
793
    { "sq_AL", "CP852" },
794
    { "sr",    "CP852" }, /* CP852 or CP866 or CP855 ?? */
795
    { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
796
    { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
797
    { "sv",    "CP850" },
798
    { "sv_SE", "CP850" },
799
    { "th",    "CP874" },
800
    { "th_TH", "CP874" },
801
    { "tr",    "CP857" },
802
    { "tr_TR", "CP857" },
803
    { "uk",    "CP1125" },
804
    { "uk_UA", "CP1125" },
805
    { "zh_CN", "GBK" },
806
    { "zh_TW", "CP950" } /* not CP938 ?? */
807
#  define locale_table_defined
808
# endif
809
# ifndef locale_table_defined
810
    /* Just a dummy entry, to avoid a C syntax error.  */
811
    { "", "" }
812
# endif
813
  };
814
815
#endif
816
817
818
/* Determine the current locale's character encoding, and canonicalize it
819
   into one of the canonical names listed below.
820
   The result must not be freed; it is statically allocated.  The result
821
   becomes invalid when setlocale() is used to change the global locale, or
822
   when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG
823
   is changed; threads in multithreaded programs should not do this.
824
   If the canonical name cannot be determined, the result is a non-canonical
825
   name.  */
826
827
#ifdef STATIC
828
STATIC
829
#endif
830
const char *
831
locale_charset (void)
832
0
{
833
0
  const char *codeset;
834
835
  /* This function must be multithread-safe.  To achieve this without using
836
     thread-local storage, we use a simple strcpy or memcpy to fill this static
837
     buffer.  Filling it through, for example, strcpy + strcat would not be
838
     guaranteed to leave the buffer's contents intact if another thread is
839
     currently accessing it.  If necessary, the contents is first assembled in
840
     a stack-allocated buffer.  */
841
842
0
#if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
843
844
0
# if HAVE_LANGINFO_CODESET
845
846
  /* Most systems support nl_langinfo (CODESET) nowadays.  */
847
0
  codeset = nl_langinfo (CODESET);
848
849
#  ifdef __CYGWIN__
850
  /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
851
     returns "US-ASCII".  Return the suffix of the locale name from the
852
     environment variables (if present) or the codepage as a number.  */
853
  if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
854
    {
855
      const char *locale;
856
      static char resultbuf[2 + 10 + 1];
857
858
      locale = getenv ("LC_ALL");
859
      if (locale == NULL || locale[0] == '\0')
860
        {
861
          locale = getenv ("LC_CTYPE");
862
          if (locale == NULL || locale[0] == '\0')
863
            locale = getenv ("LANG");
864
        }
865
      if (locale != NULL && locale[0] != '\0')
866
        {
867
          /* If the locale name contains an encoding after the dot, return
868
             it.  */
869
          const char *dot = strchr (locale, '.');
870
871
          if (dot != NULL)
872
            {
873
              const char *modifier;
874
875
              dot++;
876
              /* Look for the possible @... trailer and remove it, if any.  */
877
              modifier = strchr (dot, '@');
878
              if (modifier == NULL)
879
                return dot;
880
              if (modifier - dot < sizeof (resultbuf))
881
                {
882
                  /* This way of filling resultbuf is multithread-safe.  */
883
                  memcpy (resultbuf, dot, modifier - dot);
884
                  resultbuf [modifier - dot] = '\0';
885
                  return resultbuf;
886
                }
887
            }
888
        }
889
890
      /* The Windows API has a function returning the locale's codepage as a
891
         number: GetACP().  This encoding is used by Cygwin, unless the user
892
         has set the environment variable CYGWIN=codepage:oem (which very few
893
         people do).
894
         Output directed to console windows needs to be converted (to
895
         GetOEMCP() if the console is using a raster font, or to
896
         GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
897
         this conversion transparently (see winsup/cygwin/fhandler_console.cc),
898
         converting to GetConsoleOutputCP().  This leads to correct results,
899
         except when SetConsoleOutputCP has been called and a raster font is
900
         in use.  */
901
      {
902
        char buf[2 + 10 + 1];
903
904
        sprintf (buf, "CP%u", GetACP ());
905
        strcpy (resultbuf, buf);
906
        codeset = resultbuf;
907
      }
908
    }
909
#  endif
910
911
0
  if (codeset == NULL)
912
    /* The canonical name cannot be determined.  */
913
0
    codeset = "";
914
915
# elif defined WINDOWS_NATIVE
916
917
  char buf[2 + 10 + 1];
918
  static char resultbuf[2 + 10 + 1];
919
920
  /* The Windows API has a function returning the locale's codepage as
921
     a number, but the value doesn't change according to what the
922
     'setlocale' call specified.  So we use it as a last resort, in
923
     case the string returned by 'setlocale' doesn't specify the
924
     codepage.  */
925
  char *current_locale = setlocale (LC_CTYPE, NULL);
926
  char *pdot = strrchr (current_locale, '.');
927
928
  if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
929
    sprintf (buf, "CP%s", pdot + 1);
930
  else
931
    {
932
      /* The Windows API has a function returning the locale's codepage as a
933
         number: GetACP().
934
         When the output goes to a console window, it needs to be provided in
935
         GetOEMCP() encoding if the console is using a raster font, or in
936
         GetConsoleOutputCP() encoding if it is using a TrueType font.
937
         But in GUI programs and for output sent to files and pipes, GetACP()
938
         encoding is the best bet.  */
939
      sprintf (buf, "CP%u", GetACP ());
940
    }
941
  /* For a locale name such as "French_France.65001", in Windows 10,
942
     setlocale now returns "French_France.utf8" instead, or in the UTF-8
943
     environment (with modern system settings) "fr_FR.UTF-8".  */
944
  if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0
945
      || strcmp (buf + 2, "UTF-8") == 0)
946
    codeset = "UTF-8";
947
  else
948
    {
949
      strcpy (resultbuf, buf);
950
      codeset = resultbuf;
951
    }
952
953
# elif defined OS2
954
955
  const char *locale;
956
  static char resultbuf[2 + 10 + 1];
957
  ULONG cp[3];
958
  ULONG cplen;
959
960
  codeset = NULL;
961
962
  /* Allow user to override the codeset, as set in the operating system,
963
     with standard language environment variables.  */
964
  locale = getenv ("LC_ALL");
965
  if (locale == NULL || locale[0] == '\0')
966
    {
967
      locale = getenv ("LC_CTYPE");
968
      if (locale == NULL || locale[0] == '\0')
969
        locale = getenv ("LANG");
970
    }
971
  if (locale != NULL && locale[0] != '\0')
972
    {
973
      /* If the locale name contains an encoding after the dot, return it.  */
974
      const char *dot = strchr (locale, '.');
975
976
      if (dot != NULL)
977
        {
978
          const char *modifier;
979
980
          dot++;
981
          /* Look for the possible @... trailer and remove it, if any.  */
982
          modifier = strchr (dot, '@');
983
          if (modifier == NULL)
984
            return dot;
985
          if (modifier - dot < sizeof (resultbuf))
986
            {
987
              /* This way of filling resultbuf is multithread-safe.  */
988
              memcpy (resultbuf, dot, modifier - dot);
989
              resultbuf [modifier - dot] = '\0';
990
              return resultbuf;
991
            }
992
        }
993
994
      /* For the POSIX locale, don't use the system's codepage.  */
995
      if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
996
        codeset = "";
997
    }
998
999
  if (codeset == NULL)
1000
    {
1001
      /* OS/2 has a function returning the locale's codepage as a number.  */
1002
      if (DosQueryCp (sizeof (cp), cp, &cplen))
1003
        codeset = "";
1004
      else
1005
        {
1006
          char buf[2 + 10 + 1];
1007
1008
          sprintf (buf, "CP%u", cp[0]);
1009
          strcpy (resultbuf, buf);
1010
          codeset = resultbuf;
1011
        }
1012
    }
1013
1014
# else
1015
1016
#  error "Add code for other platforms here."
1017
1018
# endif
1019
1020
  /* Resolve alias.  */
1021
0
  {
1022
# ifdef alias_table_defined
1023
    /* On some platforms, UTF-8 locales are the most frequently used ones.
1024
       Speed up the common case and slow down the less common cases by
1025
       testing for this case first.  */
1026
#  if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
1027
    if (strcmp (codeset, "UTF-8") == 0)
1028
      goto done_table_lookup;
1029
    else
1030
#  endif
1031
      {
1032
        const struct table_entry * const table = alias_table;
1033
        size_t const table_size =
1034
          sizeof (alias_table) / sizeof (struct table_entry);
1035
        /* The table is sorted.  Perform a binary search.  */
1036
        size_t hi = table_size;
1037
        size_t lo = 0;
1038
        while (lo < hi)
1039
          {
1040
            /* Invariant:
1041
               for i < lo, strcmp (table[i].alias, codeset) < 0,
1042
               for i >= hi, strcmp (table[i].alias, codeset) > 0.  */
1043
            size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1044
            int cmp = strcmp (table[mid].alias, codeset);
1045
            if (cmp < 0)
1046
              lo = mid + 1;
1047
            else if (cmp > 0)
1048
              hi = mid;
1049
            else
1050
              {
1051
                /* Found an i with
1052
                     strcmp (table[i].alias, codeset) == 0.  */
1053
                codeset = table[mid].canonical;
1054
                goto done_table_lookup;
1055
              }
1056
          }
1057
      }
1058
    if (0)
1059
      done_table_lookup: {}
1060
    else
1061
# endif
1062
0
      {
1063
        /* Did not find it in the table.  */
1064
        /* On Mac OS X, all modern locales use the UTF-8 encoding.
1065
           BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
1066
# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1067
        codeset = "UTF-8";
1068
# else
1069
        /* Don't return an empty string.  GNU libc and GNU libiconv interpret
1070
           the empty string as denoting "the locale's character encoding",
1071
           thus GNU libiconv would call this function a second time.  */
1072
0
        if (codeset[0] == '\0')
1073
0
          codeset = "ASCII";
1074
0
# endif
1075
0
      }
1076
0
  }
1077
1078
#else
1079
1080
  /* On old systems which lack it, use setlocale or getenv.  */
1081
  const char *locale = NULL;
1082
1083
  /* But most old systems don't have a complete set of locales.  Some
1084
     (like DJGPP) have only the C locale.  Therefore we don't use setlocale
1085
     here; it would return "C" when it doesn't support the locale name the
1086
     user has set.  */
1087
# if 0
1088
  locale = setlocale (LC_CTYPE, NULL);
1089
# endif
1090
  if (locale == NULL || locale[0] == '\0')
1091
    {
1092
      locale = getenv ("LC_ALL");
1093
      if (locale == NULL || locale[0] == '\0')
1094
        {
1095
          locale = getenv ("LC_CTYPE");
1096
          if (locale == NULL || locale[0] == '\0')
1097
            locale = getenv ("LANG");
1098
            if (locale == NULL)
1099
              locale = "";
1100
        }
1101
    }
1102
1103
  /* Map locale name to canonical encoding name.  */
1104
  {
1105
# ifdef locale_table_defined
1106
    const struct table_entry * const table = locale_table;
1107
    size_t const table_size =
1108
      sizeof (locale_table) / sizeof (struct table_entry);
1109
    /* The table is sorted.  Perform a binary search.  */
1110
    size_t hi = table_size;
1111
    size_t lo = 0;
1112
    while (lo < hi)
1113
      {
1114
        /* Invariant:
1115
           for i < lo, strcmp (table[i].locale, locale) < 0,
1116
           for i >= hi, strcmp (table[i].locale, locale) > 0.  */
1117
        size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1118
        int cmp = strcmp (table[mid].locale, locale);
1119
        if (cmp < 0)
1120
          lo = mid + 1;
1121
        else if (cmp > 0)
1122
          hi = mid;
1123
        else
1124
          {
1125
            /* Found an i with
1126
                 strcmp (table[i].locale, locale) == 0.  */
1127
            codeset = table[mid].canonical;
1128
            goto done_table_lookup;
1129
          }
1130
      }
1131
    if (0)
1132
      done_table_lookup: ;
1133
    else
1134
# endif
1135
      {
1136
        /* Did not find it in the table.  */
1137
        /* On Mac OS X, all modern locales use the UTF-8 encoding.
1138
           BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
1139
# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1140
        codeset = "UTF-8";
1141
# else
1142
        /* The canonical name cannot be determined.  */
1143
        /* Don't return an empty string.  GNU libc and GNU libiconv interpret
1144
           the empty string as denoting "the locale's character encoding",
1145
           thus GNU libiconv would call this function a second time.  */
1146
        codeset = "ASCII";
1147
# endif
1148
      }
1149
  }
1150
1151
#endif
1152
1153
#ifdef DARWIN7
1154
  /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
1155
     (the default codeset) does not work when MB_CUR_MAX is 1.  */
1156
  if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
1157
    codeset = "ASCII";
1158
#endif
1159
1160
0
  return codeset;
1161
0
}