Coverage Report

Created: 2026-01-06 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn2/gl/localcharset.c
Line
Count
Source
1
/* Determine a canonical name for the current locale's character encoding.
2
3
   Copyright (C) 2000-2006, 2008-2025 Free Software Foundation, Inc.
4
5
   This file is free software: you can redistribute it and/or modify
6
   it under the terms of the GNU Lesser General Public License as
7
   published by the Free Software Foundation; either version 2.1 of the
8
   License, or (at your option) any later version.
9
10
   This file is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU Lesser General Public License for more details.
14
15
   You should have received a copy of the GNU Lesser General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18
/* Written by Bruno Haible <bruno@clisp.org>.  */
19
20
#include <config.h>
21
22
/* Specification.  */
23
#include "localcharset.h"
24
25
#include <stddef.h>
26
#include <stdio.h>
27
#include <string.h>
28
#include <stdlib.h>
29
30
#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
31
# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
32
#endif
33
34
#if defined _WIN32 && !defined __CYGWIN__
35
# define WINDOWS_NATIVE
36
# include <locale.h>
37
#endif
38
39
#if defined __EMX__
40
/* Assume EMX program runs on OS/2, even if compiled under DOS.  */
41
# ifndef OS2
42
#  define OS2
43
# endif
44
#endif
45
46
#if !defined WINDOWS_NATIVE
47
# if HAVE_LANGINFO_CODESET
48
#  include <langinfo.h>
49
# else
50
#  if 0 /* see comment regarding use of setlocale(), below */
51
#   include <locale.h>
52
#  endif
53
# endif
54
# ifdef __CYGWIN__
55
#  define WIN32_LEAN_AND_MEAN
56
#  include <windows.h>
57
# endif
58
#elif defined WINDOWS_NATIVE
59
# define WIN32_LEAN_AND_MEAN
60
# include <windows.h>
61
  /* For the use of setlocale() below, the Gnulib override in setlocale.c is
62
     not needed; see the platform lists in setlocale_null.m4.  */
63
# undef setlocale
64
#endif
65
#if defined OS2
66
# define INCL_DOS
67
# include <os2.h>
68
#endif
69
70
/* For MB_CUR_MAX_L */
71
#if defined DARWIN7
72
# include <xlocale.h>
73
#endif
74
75
76
#if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
77
78
/* On these platforms, we use a mapping from non-canonical encoding name
79
   to GNU canonical encoding name.  */
80
81
/* With glibc-2.1 or newer, we don't need any canonicalization,
82
   because glibc has iconv and both glibc and libiconv support all
83
   GNU canonical names directly.  */
84
# if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
85
86
struct table_entry
87
{
88
  const char alias[11+1];
89
  const char canonical[11+1];
90
};
91
92
/* Table of platform-dependent mappings, sorted in ascending order.  */
93
static const struct table_entry alias_table[] =
94
  {
95
#  if defined __FreeBSD__                                   /* FreeBSD */
96
  /*{ "ARMSCII-8",  "ARMSCII-8" },*/
97
    { "Big5",       "BIG5" },
98
    { "C",          "ASCII" },
99
  /*{ "CP1131",     "CP1131" },*/
100
  /*{ "CP1251",     "CP1251" },*/
101
  /*{ "CP866",      "CP866" },*/
102
  /*{ "GB18030",    "GB18030" },*/
103
  /*{ "GB2312",     "GB2312" },*/
104
  /*{ "GBK",        "GBK" },*/
105
  /*{ "ISCII-DEV",  "?" },*/
106
    { "ISO8859-1",  "ISO-8859-1" },
107
    { "ISO8859-13", "ISO-8859-13" },
108
    { "ISO8859-15", "ISO-8859-15" },
109
    { "ISO8859-2",  "ISO-8859-2" },
110
    { "ISO8859-5",  "ISO-8859-5" },
111
    { "ISO8859-7",  "ISO-8859-7" },
112
    { "ISO8859-9",  "ISO-8859-9" },
113
  /*{ "KOI8-R",     "KOI8-R" },*/
114
  /*{ "KOI8-U",     "KOI8-U" },*/
115
    { "SJIS",       "SHIFT_JIS" },
116
    { "US-ASCII",   "ASCII" },
117
    { "eucCN",      "GB2312" },
118
    { "eucJP",      "EUC-JP" },
119
    { "eucKR",      "EUC-KR" }
120
#   define alias_table_defined
121
#  endif
122
#  if defined __NetBSD__                                    /* NetBSD */
123
    { "646",        "ASCII" },
124
  /*{ "ARMSCII-8",  "ARMSCII-8" },*/
125
  /*{ "BIG5",       "BIG5" },*/
126
    { "Big5-HKSCS", "BIG5-HKSCS" },
127
  /*{ "CP1251",     "CP1251" },*/
128
  /*{ "CP866",      "CP866" },*/
129
  /*{ "GB18030",    "GB18030" },*/
130
  /*{ "GB2312",     "GB2312" },*/
131
    { "ISO8859-1",  "ISO-8859-1" },
132
    { "ISO8859-13", "ISO-8859-13" },
133
    { "ISO8859-15", "ISO-8859-15" },
134
    { "ISO8859-2",  "ISO-8859-2" },
135
    { "ISO8859-4",  "ISO-8859-4" },
136
    { "ISO8859-5",  "ISO-8859-5" },
137
    { "ISO8859-7",  "ISO-8859-7" },
138
  /*{ "KOI8-R",     "KOI8-R" },*/
139
  /*{ "KOI8-U",     "KOI8-U" },*/
140
  /*{ "PT154",      "PT154" },*/
141
    { "SJIS",       "SHIFT_JIS" },
142
    { "eucCN",      "GB2312" },
143
    { "eucJP",      "EUC-JP" },
144
    { "eucKR",      "EUC-KR" },
145
    { "eucTW",      "EUC-TW" }
146
#   define alias_table_defined
147
#  endif
148
#  if defined __OpenBSD__                                   /* OpenBSD */
149
    { "646",        "ASCII" },
150
    { "ISO8859-1",  "ISO-8859-1" },
151
    { "ISO8859-13", "ISO-8859-13" },
152
    { "ISO8859-15", "ISO-8859-15" },
153
    { "ISO8859-2",  "ISO-8859-2" },
154
    { "ISO8859-4",  "ISO-8859-4" },
155
    { "ISO8859-5",  "ISO-8859-5" },
156
    { "ISO8859-7",  "ISO-8859-7" },
157
    { "US-ASCII",   "ASCII" }
158
#   define alias_table_defined
159
#  endif
160
#  if defined __APPLE__ && defined __MACH__                 /* Mac OS X */
161
    /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
162
       useless:
163
       - It returns the empty string when LANG is set to a locale of the
164
         form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
165
         LC_CTYPE file.
166
       - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
167
         the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
168
       - The documentation says:
169
           "... all code that calls BSD system routines should ensure
170
            that the const *char parameters of these routines are in UTF-8
171
            encoding. All BSD system functions expect their string
172
            parameters to be in UTF-8 encoding and nothing else."
173
         It also says
174
           "An additional caveat is that string parameters for files,
175
            paths, and other file-system entities must be in canonical
176
            UTF-8. In a canonical UTF-8 Unicode string, all decomposable
177
            characters are decomposed ..."
178
         but this is not true: You can pass non-decomposed UTF-8 strings
179
         to file system functions, and it is the OS which will convert
180
         them to decomposed UTF-8 before accessing the file system.
181
       - The Apple Terminal application displays UTF-8 by default.
182
       - However, other applications are free to use different encodings:
183
         - xterm uses ISO-8859-1 by default.
184
         - TextEdit uses MacRoman by default.
185
       We prefer UTF-8 over decomposed UTF-8-MAC because one should
186
       minimize the use of decomposed Unicode. Unfortunately, through the
187
       Darwin file system, decomposed UTF-8 strings are leaked into user
188
       space nevertheless.
189
       Then there are also the locales with encodings other than US-ASCII
190
       and UTF-8. These locales can be occasionally useful to users (e.g.
191
       when grepping through ISO-8859-1 encoded text files), when all their
192
       file names are in US-ASCII.
193
     */
194
    { "ARMSCII-8",  "ARMSCII-8" },
195
    { "Big5",       "BIG5" },
196
    { "Big5HKSCS",  "BIG5-HKSCS" },
197
    { "CP1131",     "CP1131" },
198
    { "CP1251",     "CP1251" },
199
    { "CP866",      "CP866" },
200
    { "CP949",      "CP949" },
201
    { "GB18030",    "GB18030" },
202
    { "GB2312",     "GB2312" },
203
    { "GBK",        "GBK" },
204
  /*{ "ISCII-DEV",  "?" },*/
205
    { "ISO8859-1",  "ISO-8859-1" },
206
    { "ISO8859-13", "ISO-8859-13" },
207
    { "ISO8859-15", "ISO-8859-15" },
208
    { "ISO8859-2",  "ISO-8859-2" },
209
    { "ISO8859-4",  "ISO-8859-4" },
210
    { "ISO8859-5",  "ISO-8859-5" },
211
    { "ISO8859-7",  "ISO-8859-7" },
212
    { "ISO8859-9",  "ISO-8859-9" },
213
    { "KOI8-R",     "KOI8-R" },
214
    { "KOI8-U",     "KOI8-U" },
215
    { "PT154",      "PT154" },
216
    { "SJIS",       "SHIFT_JIS" },
217
    { "eucCN",      "GB2312" },
218
    { "eucJP",      "EUC-JP" },
219
    { "eucKR",      "EUC-KR" }
220
#   define alias_table_defined
221
#  endif
222
#  if defined _AIX                                          /* AIX */
223
  /*{ "GBK",        "GBK" },*/
224
    { "IBM-1046",   "CP1046" },
225
    { "IBM-1124",   "CP1124" },
226
    { "IBM-1129",   "CP1129" },
227
    { "IBM-1252",   "CP1252" },
228
    { "IBM-850",    "CP850" },
229
    { "IBM-856",    "CP856" },
230
    { "IBM-921",    "ISO-8859-13" },
231
    { "IBM-922",    "CP922" },
232
    { "IBM-932",    "CP932" },
233
    { "IBM-943",    "CP943" },
234
    { "IBM-eucCN",  "GB2312" },
235
    { "IBM-eucJP",  "EUC-JP" },
236
    { "IBM-eucKR",  "EUC-KR" },
237
    { "IBM-eucTW",  "EUC-TW" },
238
    { "ISO8859-1",  "ISO-8859-1" },
239
    { "ISO8859-15", "ISO-8859-15" },
240
    { "ISO8859-2",  "ISO-8859-2" },
241
    { "ISO8859-5",  "ISO-8859-5" },
242
    { "ISO8859-6",  "ISO-8859-6" },
243
    { "ISO8859-7",  "ISO-8859-7" },
244
    { "ISO8859-8",  "ISO-8859-8" },
245
    { "ISO8859-9",  "ISO-8859-9" },
246
    { "TIS-620",    "TIS-620" },
247
  /*{ "UTF-8",      "UTF-8" },*/
248
    { "big5",       "BIG5" }
249
#   define alias_table_defined
250
#  endif
251
#  if defined __hpux                                        /* HP-UX */
252
    { "SJIS",      "SHIFT_JIS" },
253
    { "arabic8",   "HP-ARABIC8" },
254
    { "big5",      "BIG5" },
255
    { "cp1251",    "CP1251" },
256
    { "eucJP",     "EUC-JP" },
257
    { "eucKR",     "EUC-KR" },
258
    { "eucTW",     "EUC-TW" },
259
    { "gb18030",   "GB18030" },
260
    { "greek8",    "HP-GREEK8" },
261
    { "hebrew8",   "HP-HEBREW8" },
262
    { "hkbig5",    "BIG5-HKSCS" },
263
    { "hp15CN",    "GB2312" },
264
    { "iso88591",  "ISO-8859-1" },
265
    { "iso885913", "ISO-8859-13" },
266
    { "iso885915", "ISO-8859-15" },
267
    { "iso88592",  "ISO-8859-2" },
268
    { "iso88594",  "ISO-8859-4" },
269
    { "iso88595",  "ISO-8859-5" },
270
    { "iso88596",  "ISO-8859-6" },
271
    { "iso88597",  "ISO-8859-7" },
272
    { "iso88598",  "ISO-8859-8" },
273
    { "iso88599",  "ISO-8859-9" },
274
    { "kana8",     "HP-KANA8" },
275
    { "koi8r",     "KOI8-R" },
276
    { "roman8",    "HP-ROMAN8" },
277
    { "tis620",    "TIS-620" },
278
    { "turkish8",  "HP-TURKISH8" },
279
    { "utf8",      "UTF-8" }
280
#   define alias_table_defined
281
#  endif
282
#  if defined __sun                                         /* Solaris */
283
    { "5601",        "EUC-KR" },
284
    { "646",         "ASCII" },
285
  /*{ "BIG5",        "BIG5" },*/
286
    { "Big5-HKSCS",  "BIG5-HKSCS" },
287
    { "GB18030",     "GB18030" },
288
  /*{ "GBK",         "GBK" },*/
289
    { "ISO8859-1",   "ISO-8859-1" },
290
    { "ISO8859-11",  "TIS-620" },
291
    { "ISO8859-13",  "ISO-8859-13" },
292
    { "ISO8859-15",  "ISO-8859-15" },
293
    { "ISO8859-2",   "ISO-8859-2" },
294
    { "ISO8859-3",   "ISO-8859-3" },
295
    { "ISO8859-4",   "ISO-8859-4" },
296
    { "ISO8859-5",   "ISO-8859-5" },
297
    { "ISO8859-6",   "ISO-8859-6" },
298
    { "ISO8859-7",   "ISO-8859-7" },
299
    { "ISO8859-8",   "ISO-8859-8" },
300
    { "ISO8859-9",   "ISO-8859-9" },
301
    { "PCK",         "SHIFT_JIS" },
302
    { "TIS620.2533", "TIS-620" },
303
  /*{ "UTF-8",       "UTF-8" },*/
304
    { "ansi-1251",   "CP1251" },
305
    { "cns11643",    "EUC-TW" },
306
    { "eucJP",       "EUC-JP" },
307
    { "gb2312",      "GB2312" },
308
    { "koi8-r",      "KOI8-R" }
309
#   define alias_table_defined
310
#  endif
311
#  if defined __minix                                       /* Minix */
312
    { "646", "ASCII" }
313
#   define alias_table_defined
314
#  endif
315
#  if defined WINDOWS_NATIVE || defined __CYGWIN__          /* Windows */
316
    { "CP1361",  "JOHAB" },
317
    { "CP20127", "ASCII" },
318
    { "CP20866", "KOI8-R" },
319
    { "CP20936", "GB2312" },
320
    { "CP21866", "KOI8-RU" },
321
    { "CP28591", "ISO-8859-1" },
322
    { "CP28592", "ISO-8859-2" },
323
    { "CP28593", "ISO-8859-3" },
324
    { "CP28594", "ISO-8859-4" },
325
    { "CP28595", "ISO-8859-5" },
326
    { "CP28596", "ISO-8859-6" },
327
    { "CP28597", "ISO-8859-7" },
328
    { "CP28598", "ISO-8859-8" },
329
    { "CP28599", "ISO-8859-9" },
330
    { "CP28605", "ISO-8859-15" },
331
    { "CP38598", "ISO-8859-8" },
332
    { "CP51932", "EUC-JP" },
333
    { "CP51936", "GB2312" },
334
    { "CP51949", "EUC-KR" },
335
    { "CP51950", "EUC-TW" },
336
    { "CP54936", "GB18030" },
337
    { "CP65001", "UTF-8" },
338
    { "CP936",   "GBK" }
339
#   define alias_table_defined
340
#  endif
341
#  if defined OS2                                           /* OS/2 */
342
    /* The list of encodings is taken from "List of OS/2 Codepages"
343
       by Alex Taylor:
344
       <https://altsan.org/os2/toolkits/uls/index.html#codepages>.
345
       See also "__convcp() of kLIBC":
346
       <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>.  */
347
    { "CP1004",        "CP1252" },
348
  /*{ "CP1041",        "CP943" },*/
349
  /*{ "CP1088",        "CP949" },*/
350
    { "CP1089",        "ISO-8859-6" },
351
  /*{ "CP1114",        "CP950" },*/
352
  /*{ "CP1115",        "GB2312" },*/
353
    { "CP1208",        "UTF-8" },
354
  /*{ "CP1380",        "GB2312" },*/
355
    { "CP1381",        "GB2312" },
356
    { "CP1383",        "GB2312" },
357
    { "CP1386",        "GBK" },
358
  /*{ "CP301",         "CP943" },*/
359
    { "CP3372",        "EUC-JP" },
360
    { "CP4946",        "CP850" },
361
  /*{ "CP5048",        "JIS_X0208-1990" },*/
362
  /*{ "CP5049",        "JIS_X0212-1990" },*/
363
  /*{ "CP5067",        "KS_C_5601-1987" },*/
364
    { "CP813",         "ISO-8859-7" },
365
    { "CP819",         "ISO-8859-1" },
366
    { "CP878",         "KOI8-R" },
367
  /*{ "CP897",         "CP943" },*/
368
    { "CP912",         "ISO-8859-2" },
369
    { "CP913",         "ISO-8859-3" },
370
    { "CP914",         "ISO-8859-4" },
371
    { "CP915",         "ISO-8859-5" },
372
    { "CP916",         "ISO-8859-8" },
373
    { "CP920",         "ISO-8859-9" },
374
    { "CP921",         "ISO-8859-13" },
375
    { "CP923",         "ISO-8859-15" },
376
  /*{ "CP941",         "CP943" },*/
377
  /*{ "CP947",         "CP950" },*/
378
  /*{ "CP951",         "CP949" },*/
379
  /*{ "CP952",         "JIS_X0208-1990" },*/
380
  /*{ "CP953",         "JIS_X0212-1990" },*/
381
    { "CP954",         "EUC-JP" },
382
    { "CP964",         "EUC-TW" },
383
    { "CP970",         "EUC-KR" },
384
  /*{ "CP971",         "KS_C_5601-1987" },*/
385
    { "IBM-1004",      "CP1252" },
386
  /*{ "IBM-1006",      "?" },*/
387
  /*{ "IBM-1008",      "?" },*/
388
  /*{ "IBM-1041",      "CP943" },*/
389
  /*{ "IBM-1051",      "?" },*/
390
  /*{ "IBM-1088",      "CP949" },*/
391
    { "IBM-1089",      "ISO-8859-6" },
392
  /*{ "IBM-1098",      "?" },*/
393
  /*{ "IBM-1114",      "CP950" },*/
394
  /*{ "IBM-1115",      "GB2312" },*/
395
  /*{ "IBM-1116",      "?" },*/
396
  /*{ "IBM-1117",      "?" },*/
397
  /*{ "IBM-1118",      "?" },*/
398
  /*{ "IBM-1119",      "?" },*/
399
    { "IBM-1124",      "CP1124" },
400
    { "IBM-1125",      "CP1125" },
401
    { "IBM-1131",      "CP1131" },
402
    { "IBM-1208",      "UTF-8" },
403
    { "IBM-1250",      "CP1250" },
404
    { "IBM-1251",      "CP1251" },
405
    { "IBM-1252",      "CP1252" },
406
    { "IBM-1253",      "CP1253" },
407
    { "IBM-1254",      "CP1254" },
408
    { "IBM-1255",      "CP1255" },
409
    { "IBM-1256",      "CP1256" },
410
    { "IBM-1257",      "CP1257" },
411
  /*{ "IBM-1275",      "?" },*/
412
  /*{ "IBM-1276",      "?" },*/
413
  /*{ "IBM-1277",      "?" },*/
414
  /*{ "IBM-1280",      "?" },*/
415
  /*{ "IBM-1281",      "?" },*/
416
  /*{ "IBM-1282",      "?" },*/
417
  /*{ "IBM-1283",      "?" },*/
418
  /*{ "IBM-1380",      "GB2312" },*/
419
    { "IBM-1381",      "GB2312" },
420
    { "IBM-1383",      "GB2312" },
421
    { "IBM-1386",      "GBK" },
422
  /*{ "IBM-301",       "CP943" },*/
423
    { "IBM-3372",      "EUC-JP" },
424
    { "IBM-367",       "ASCII" },
425
    { "IBM-437",       "CP437" },
426
    { "IBM-4946",      "CP850" },
427
  /*{ "IBM-5048",      "JIS_X0208-1990" },*/
428
  /*{ "IBM-5049",      "JIS_X0212-1990" },*/
429
  /*{ "IBM-5067",      "KS_C_5601-1987" },*/
430
    { "IBM-813",       "ISO-8859-7" },
431
    { "IBM-819",       "ISO-8859-1" },
432
    { "IBM-850",       "CP850" },
433
  /*{ "IBM-851",       "?" },*/
434
    { "IBM-852",       "CP852" },
435
    { "IBM-855",       "CP855" },
436
    { "IBM-856",       "CP856" },
437
    { "IBM-857",       "CP857" },
438
  /*{ "IBM-859",       "?" },*/
439
    { "IBM-860",       "CP860" },
440
    { "IBM-861",       "CP861" },
441
    { "IBM-862",       "CP862" },
442
    { "IBM-863",       "CP863" },
443
    { "IBM-864",       "CP864" },
444
    { "IBM-865",       "CP865" },
445
    { "IBM-866",       "CP866" },
446
  /*{ "IBM-868",       "?" },*/
447
    { "IBM-869",       "CP869" },
448
    { "IBM-874",       "CP874" },
449
    { "IBM-878",       "KOI8-R" },
450
  /*{ "IBM-895",       "?" },*/
451
  /*{ "IBM-897",       "CP943" },*/
452
  /*{ "IBM-907",       "?" },*/
453
  /*{ "IBM-909",       "?" },*/
454
    { "IBM-912",       "ISO-8859-2" },
455
    { "IBM-913",       "ISO-8859-3" },
456
    { "IBM-914",       "ISO-8859-4" },
457
    { "IBM-915",       "ISO-8859-5" },
458
    { "IBM-916",       "ISO-8859-8" },
459
    { "IBM-920",       "ISO-8859-9" },
460
    { "IBM-921",       "ISO-8859-13" },
461
    { "IBM-922",       "CP922" },
462
    { "IBM-923",       "ISO-8859-15" },
463
    { "IBM-932",       "CP932" },
464
  /*{ "IBM-941",       "CP943" },*/
465
  /*{ "IBM-942",       "?" },*/
466
    { "IBM-943",       "CP943" },
467
  /*{ "IBM-947",       "CP950" },*/
468
    { "IBM-949",       "CP949" },
469
    { "IBM-950",       "CP950" },
470
  /*{ "IBM-951",       "CP949" },*/
471
  /*{ "IBM-952",       "JIS_X0208-1990" },*/
472
  /*{ "IBM-953",       "JIS_X0212-1990" },*/
473
    { "IBM-954",       "EUC-JP" },
474
  /*{ "IBM-955",       "?" },*/
475
    { "IBM-964",       "EUC-TW" },
476
    { "IBM-970",       "EUC-KR" },
477
  /*{ "IBM-971",       "KS_C_5601-1987" },*/
478
    { "IBM-eucCN",     "GB2312" },
479
    { "IBM-eucJP",     "EUC-JP" },
480
    { "IBM-eucKR",     "EUC-KR" },
481
    { "IBM-eucTW",     "EUC-TW" },
482
    { "IBM33722",      "EUC-JP" },
483
    { "ISO8859-1",     "ISO-8859-1" },
484
    { "ISO8859-2",     "ISO-8859-2" },
485
    { "ISO8859-3",     "ISO-8859-3" },
486
    { "ISO8859-4",     "ISO-8859-4" },
487
    { "ISO8859-5",     "ISO-8859-5" },
488
    { "ISO8859-6",     "ISO-8859-6" },
489
    { "ISO8859-7",     "ISO-8859-7" },
490
    { "ISO8859-8",     "ISO-8859-8" },
491
    { "ISO8859-9",     "ISO-8859-9" },
492
  /*{ "JISX0201-1976", "JISX0201-1976" },*/
493
  /*{ "JISX0208-1978", "?" },*/
494
  /*{ "JISX0208-1983", "JIS_X0208-1983" },*/
495
  /*{ "JISX0208-1990", "JIS_X0208-1990" },*/
496
  /*{ "JISX0212-1990", "JIS_X0212-1990" },*/
497
  /*{ "KSC5601-1987",  "KS_C_5601-1987" },*/
498
    { "SJIS-1",        "CP943" },
499
    { "SJIS-2",        "CP943" },
500
    { "eucJP",         "EUC-JP" },
501
    { "eucKR",         "EUC-KR" },
502
    { "eucTW-1993",    "EUC-TW" }
503
#   define alias_table_defined
504
#  endif
505
#  if defined VMS                                           /* OpenVMS */
506
    /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
507
       "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
508
       section 10.7 "Handling Different Character Sets".  */
509
    { "DECHANYU",  "DEC-HANYU" },
510
    { "DECHANZI",  "GB2312" },
511
    { "DECKANJI",  "DEC-KANJI" },
512
    { "DECKOREAN", "EUC-KR" },
513
    { "ISO8859-1", "ISO-8859-1" },
514
    { "ISO8859-2", "ISO-8859-2" },
515
    { "ISO8859-5", "ISO-8859-5" },
516
    { "ISO8859-7", "ISO-8859-7" },
517
    { "ISO8859-8", "ISO-8859-8" },
518
    { "ISO8859-9", "ISO-8859-9" },
519
    { "SDECKANJI", "EUC-JP" },
520
    { "SJIS",      "SHIFT_JIS" },
521
    { "eucJP",     "EUC-JP" },
522
    { "eucTW",     "EUC-TW" }
523
#   define alias_table_defined
524
#  endif
525
#  ifndef alias_table_defined
526
    /* Just a dummy entry, to avoid a C syntax error.  */
527
    { "", "" }
528
#  endif
529
  };
530
531
# endif
532
533
#else
534
535
/* On these platforms, we use a mapping from locale name to GNU canonical
536
   encoding name.  */
537
538
struct table_entry
539
{
540
  const char locale[17+1];
541
  const char canonical[11+1];
542
};
543
544
/* Table of platform-dependent mappings, sorted in ascending order.  */
545
static const struct table_entry locale_table[] =
546
  {
547
# if defined __FreeBSD__                                    /* FreeBSD 4.2 */
548
    { "cs_CZ.ISO_8859-2",  "ISO-8859-2" },
549
    { "da_DK.DIS_8859-15", "ISO-8859-15" },
550
    { "da_DK.ISO_8859-1",  "ISO-8859-1" },
551
    { "de_AT.DIS_8859-15", "ISO-8859-15" },
552
    { "de_AT.ISO_8859-1",  "ISO-8859-1" },
553
    { "de_CH.DIS_8859-15", "ISO-8859-15" },
554
    { "de_CH.ISO_8859-1",  "ISO-8859-1" },
555
    { "de_DE.DIS_8859-15", "ISO-8859-15" },
556
    { "de_DE.ISO_8859-1",  "ISO-8859-1" },
557
    { "en_AU.DIS_8859-15", "ISO-8859-15" },
558
    { "en_AU.ISO_8859-1",  "ISO-8859-1" },
559
    { "en_CA.DIS_8859-15", "ISO-8859-15" },
560
    { "en_CA.ISO_8859-1",  "ISO-8859-1" },
561
    { "en_GB.DIS_8859-15", "ISO-8859-15" },
562
    { "en_GB.ISO_8859-1",  "ISO-8859-1" },
563
    { "en_US.DIS_8859-15", "ISO-8859-15" },
564
    { "en_US.ISO_8859-1",  "ISO-8859-1" },
565
    { "es_ES.DIS_8859-15", "ISO-8859-15" },
566
    { "es_ES.ISO_8859-1",  "ISO-8859-1" },
567
    { "fi_FI.DIS_8859-15", "ISO-8859-15" },
568
    { "fi_FI.ISO_8859-1",  "ISO-8859-1" },
569
    { "fr_BE.DIS_8859-15", "ISO-8859-15" },
570
    { "fr_BE.ISO_8859-1",  "ISO-8859-1" },
571
    { "fr_CA.DIS_8859-15", "ISO-8859-15" },
572
    { "fr_CA.ISO_8859-1",  "ISO-8859-1" },
573
    { "fr_CH.DIS_8859-15", "ISO-8859-15" },
574
    { "fr_CH.ISO_8859-1",  "ISO-8859-1" },
575
    { "fr_FR.DIS_8859-15", "ISO-8859-15" },
576
    { "fr_FR.ISO_8859-1",  "ISO-8859-1" },
577
    { "hr_HR.ISO_8859-2",  "ISO-8859-2" },
578
    { "hu_HU.ISO_8859-2",  "ISO-8859-2" },
579
    { "is_IS.DIS_8859-15", "ISO-8859-15" },
580
    { "is_IS.ISO_8859-1",  "ISO-8859-1" },
581
    { "it_CH.DIS_8859-15", "ISO-8859-15" },
582
    { "it_CH.ISO_8859-1",  "ISO-8859-1" },
583
    { "it_IT.DIS_8859-15", "ISO-8859-15" },
584
    { "it_IT.ISO_8859-1",  "ISO-8859-1" },
585
    { "ja_JP.EUC",         "EUC-JP" },
586
    { "ja_JP.SJIS",        "SHIFT_JIS" },
587
    { "ja_JP.Shift_JIS",   "SHIFT_JIS" },
588
    { "ko_KR.EUC",         "EUC-KR" },
589
    { "la_LN.ASCII",       "ASCII" },
590
    { "la_LN.DIS_8859-15", "ISO-8859-15" },
591
    { "la_LN.ISO_8859-1",  "ISO-8859-1" },
592
    { "la_LN.ISO_8859-2",  "ISO-8859-2" },
593
    { "la_LN.ISO_8859-4",  "ISO-8859-4" },
594
    { "lt_LN.ASCII",       "ASCII" },
595
    { "lt_LN.DIS_8859-15", "ISO-8859-15" },
596
    { "lt_LN.ISO_8859-1",  "ISO-8859-1" },
597
    { "lt_LN.ISO_8859-2",  "ISO-8859-2" },
598
    { "lt_LT.ISO_8859-4",  "ISO-8859-4" },
599
    { "nl_BE.DIS_8859-15", "ISO-8859-15" },
600
    { "nl_BE.ISO_8859-1",  "ISO-8859-1" },
601
    { "nl_NL.DIS_8859-15", "ISO-8859-15" },
602
    { "nl_NL.ISO_8859-1",  "ISO-8859-1" },
603
    { "no_NO.DIS_8859-15", "ISO-8859-15" },
604
    { "no_NO.ISO_8859-1",  "ISO-8859-1" },
605
    { "pl_PL.ISO_8859-2",  "ISO-8859-2" },
606
    { "pt_PT.DIS_8859-15", "ISO-8859-15" },
607
    { "pt_PT.ISO_8859-1",  "ISO-8859-1" },
608
    { "ru_RU.CP866",       "CP866" },
609
    { "ru_RU.ISO_8859-5",  "ISO-8859-5" },
610
    { "ru_RU.KOI8-R",      "KOI8-R" },
611
    { "ru_SU.CP866",       "CP866" },
612
    { "ru_SU.ISO_8859-5",  "ISO-8859-5" },
613
    { "ru_SU.KOI8-R",      "KOI8-R" },
614
    { "sl_SI.ISO_8859-2",  "ISO-8859-2" },
615
    { "sv_SE.DIS_8859-15", "ISO-8859-15" },
616
    { "sv_SE.ISO_8859-1",  "ISO-8859-1" },
617
    { "uk_UA.KOI8-U",      "KOI8-U" },
618
    { "zh_CN.EUC",         "GB2312" },
619
    { "zh_TW.BIG5",        "BIG5" },
620
    { "zh_TW.Big5",        "BIG5" }
621
#  define locale_table_defined
622
# endif
623
# if defined __DJGPP__                                      /* DOS / DJGPP 2.03 */
624
    /* The encodings given here may not all be correct.
625
       If you find that the encoding given for your language and
626
       country is not the one your DOS machine actually uses, just
627
       correct it in this file, and send a mail to
628
       Juan Manuel Guerrero <juan.guerrero@gmx.de>
629
       and <bug-gnulib@gnu.org>.  */
630
    { "C",     "ASCII" },
631
    { "ar",    "CP864" },
632
    { "ar_AE", "CP864" },
633
    { "ar_DZ", "CP864" },
634
    { "ar_EG", "CP864" },
635
    { "ar_IQ", "CP864" },
636
    { "ar_IR", "CP864" },
637
    { "ar_JO", "CP864" },
638
    { "ar_KW", "CP864" },
639
    { "ar_MA", "CP864" },
640
    { "ar_OM", "CP864" },
641
    { "ar_QA", "CP864" },
642
    { "ar_SA", "CP864" },
643
    { "ar_SY", "CP864" },
644
    { "be",    "CP866" },
645
    { "be_BE", "CP866" },
646
    { "bg",    "CP866" }, /* not CP855 ?? */
647
    { "bg_BG", "CP866" }, /* not CP855 ?? */
648
    { "ca",    "CP850" },
649
    { "ca_ES", "CP850" },
650
    { "cs",    "CP852" },
651
    { "cs_CZ", "CP852" },
652
    { "da",    "CP865" }, /* not CP850 ?? */
653
    { "da_DK", "CP865" }, /* not CP850 ?? */
654
    { "de",    "CP850" },
655
    { "de_AT", "CP850" },
656
    { "de_CH", "CP850" },
657
    { "de_DE", "CP850" },
658
    { "el",    "CP869" },
659
    { "el_GR", "CP869" },
660
    { "en",    "CP850" },
661
    { "en_AU", "CP850" }, /* not CP437 ?? */
662
    { "en_CA", "CP850" },
663
    { "en_GB", "CP850" },
664
    { "en_NZ", "CP437" },
665
    { "en_US", "CP437" },
666
    { "en_ZA", "CP850" }, /* not CP437 ?? */
667
    { "eo",    "CP850" },
668
    { "eo_EO", "CP850" },
669
    { "es",    "CP850" },
670
    { "es_AR", "CP850" },
671
    { "es_BO", "CP850" },
672
    { "es_CL", "CP850" },
673
    { "es_CO", "CP850" },
674
    { "es_CR", "CP850" },
675
    { "es_CU", "CP850" },
676
    { "es_DO", "CP850" },
677
    { "es_EC", "CP850" },
678
    { "es_ES", "CP850" },
679
    { "es_GT", "CP850" },
680
    { "es_HN", "CP850" },
681
    { "es_MX", "CP850" },
682
    { "es_NI", "CP850" },
683
    { "es_PA", "CP850" },
684
    { "es_PE", "CP850" },
685
    { "es_PY", "CP850" },
686
    { "es_SV", "CP850" },
687
    { "es_UY", "CP850" },
688
    { "es_VE", "CP850" },
689
    { "et",    "CP850" },
690
    { "et_EE", "CP850" },
691
    { "eu",    "CP850" },
692
    { "eu_ES", "CP850" },
693
    { "fi",    "CP850" },
694
    { "fi_FI", "CP850" },
695
    { "fr",    "CP850" },
696
    { "fr_BE", "CP850" },
697
    { "fr_CA", "CP850" },
698
    { "fr_CH", "CP850" },
699
    { "fr_FR", "CP850" },
700
    { "ga",    "CP850" },
701
    { "ga_IE", "CP850" },
702
    { "gd",    "CP850" },
703
    { "gd_GB", "CP850" },
704
    { "gl",    "CP850" },
705
    { "gl_ES", "CP850" },
706
    { "he",    "CP862" },
707
    { "he_IL", "CP862" },
708
    { "hr",    "CP852" },
709
    { "hr_HR", "CP852" },
710
    { "hu",    "CP852" },
711
    { "hu_HU", "CP852" },
712
    { "id",    "CP850" }, /* not CP437 ?? */
713
    { "id_ID", "CP850" }, /* not CP437 ?? */
714
    { "is",    "CP861" }, /* not CP850 ?? */
715
    { "is_IS", "CP861" }, /* not CP850 ?? */
716
    { "it",    "CP850" },
717
    { "it_CH", "CP850" },
718
    { "it_IT", "CP850" },
719
    { "ja",    "CP932" },
720
    { "ja_JP", "CP932" },
721
    { "kr",    "CP949" }, /* not CP934 ?? */
722
    { "kr_KR", "CP949" }, /* not CP934 ?? */
723
    { "lt",    "CP775" },
724
    { "lt_LT", "CP775" },
725
    { "lv",    "CP775" },
726
    { "lv_LV", "CP775" },
727
    { "mk",    "CP866" }, /* not CP855 ?? */
728
    { "mk_MK", "CP866" }, /* not CP855 ?? */
729
    { "mt",    "CP850" },
730
    { "mt_MT", "CP850" },
731
    { "nb",    "CP865" }, /* not CP850 ?? */
732
    { "nb_NO", "CP865" }, /* not CP850 ?? */
733
    { "nl",    "CP850" },
734
    { "nl_BE", "CP850" },
735
    { "nl_NL", "CP850" },
736
    { "nn",    "CP865" }, /* not CP850 ?? */
737
    { "nn_NO", "CP865" }, /* not CP850 ?? */
738
    { "no",    "CP865" }, /* not CP850 ?? */
739
    { "no_NO", "CP865" }, /* not CP850 ?? */
740
    { "pl",    "CP852" },
741
    { "pl_PL", "CP852" },
742
    { "pt",    "CP850" },
743
    { "pt_BR", "CP850" },
744
    { "pt_PT", "CP850" },
745
    { "ro",    "CP852" },
746
    { "ro_RO", "CP852" },
747
    { "ru",    "CP866" },
748
    { "ru_RU", "CP866" },
749
    { "sk",    "CP852" },
750
    { "sk_SK", "CP852" },
751
    { "sl",    "CP852" },
752
    { "sl_SI", "CP852" },
753
    { "sq",    "CP852" },
754
    { "sq_AL", "CP852" },
755
    { "sr",    "CP852" }, /* CP852 or CP866 or CP855 ?? */
756
    { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
757
    { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
758
    { "sv",    "CP850" },
759
    { "sv_SE", "CP850" },
760
    { "th",    "CP874" },
761
    { "th_TH", "CP874" },
762
    { "tr",    "CP857" },
763
    { "tr_TR", "CP857" },
764
    { "uk",    "CP1125" },
765
    { "uk_UA", "CP1125" },
766
    { "zh_CN", "GBK" },
767
    { "zh_TW", "CP950" } /* not CP938 ?? */
768
#  define locale_table_defined
769
# endif
770
# ifndef locale_table_defined
771
    /* Just a dummy entry, to avoid a C syntax error.  */
772
    { "", "" }
773
# endif
774
  };
775
776
#endif
777
778
779
/* Determine the current locale's character encoding, and canonicalize it
780
   into one of the canonical names listed below.
781
   The result must not be freed; it is statically allocated.  The result
782
   becomes invalid when setlocale() is used to change the global locale, or
783
   when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG
784
   is changed; threads in multithreaded programs should not do this.
785
   If the canonical name cannot be determined, the result is a non-canonical
786
   name.  */
787
788
#ifdef STATIC
789
STATIC
790
#endif
791
const char *
792
locale_charset (void)
793
0
{
794
0
  const char *codeset;
795
796
  /* This function must be multithread-safe.  To achieve this without using
797
     thread-local storage, we use a simple strcpy or memcpy to fill this static
798
     buffer.  Filling it through, for example, strcpy + strcat would not be
799
     guaranteed to leave the buffer's contents intact if another thread is
800
     currently accessing it.  If necessary, the contents is first assembled in
801
     a stack-allocated buffer.  */
802
803
0
#if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
804
805
0
# if HAVE_LANGINFO_CODESET
806
807
  /* Most systems support nl_langinfo (CODESET) nowadays.  */
808
0
  codeset = nl_langinfo (CODESET);
809
810
#  ifdef __CYGWIN__
811
  /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
812
     returns "US-ASCII".  Return the suffix of the locale name from the
813
     environment variables (if present) or the codepage as a number.  */
814
  if (codeset != NULL && streq (codeset, "US-ASCII"))
815
    {
816
      const char *locale;
817
      static char resultbuf[2 + 10 + 1];
818
819
      locale = getenv ("LC_ALL");
820
      if (locale == NULL || locale[0] == '\0')
821
        {
822
          locale = getenv ("LC_CTYPE");
823
          if (locale == NULL || locale[0] == '\0')
824
            locale = getenv ("LANG");
825
        }
826
      if (locale != NULL && locale[0] != '\0')
827
        {
828
          /* If the locale name contains an encoding after the dot, return
829
             it.  */
830
          const char *dot = strchr (locale, '.');
831
832
          if (dot != NULL)
833
            {
834
              const char *modifier;
835
836
              dot++;
837
              /* Look for the possible @... trailer and remove it, if any.  */
838
              modifier = strchr (dot, '@');
839
              if (modifier == NULL)
840
                return dot;
841
              if (modifier - dot < sizeof (resultbuf))
842
                {
843
                  /* This way of filling resultbuf is multithread-safe.  */
844
                  memcpy (resultbuf, dot, modifier - dot);
845
                  resultbuf [modifier - dot] = '\0';
846
                  return resultbuf;
847
                }
848
            }
849
        }
850
851
      /* The Windows API has a function returning the locale's codepage as a
852
         number: GetACP().  This encoding is used by Cygwin, unless the user
853
         has set the environment variable CYGWIN=codepage:oem (which very few
854
         people do).
855
         Output directed to console windows needs to be converted (to
856
         GetOEMCP() if the console is using a raster font, or to
857
         GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
858
         this conversion transparently (see winsup/cygwin/fhandler_console.cc),
859
         converting to GetConsoleOutputCP().  This leads to correct results,
860
         except when SetConsoleOutputCP has been called and a raster font is
861
         in use.  */
862
      {
863
        char buf[2 + 10 + 1];
864
865
        sprintf (buf, "CP%u", GetACP ());
866
        strcpy (resultbuf, buf);
867
        codeset = resultbuf;
868
      }
869
    }
870
#  endif
871
872
0
  if (codeset == NULL)
873
    /* The canonical name cannot be determined.  */
874
0
    codeset = "";
875
876
# elif defined WINDOWS_NATIVE
877
878
  char buf[2 + 10 + 1];
879
  static char resultbuf[2 + 10 + 1];
880
881
  /* The Windows API has a function returning the locale's codepage as
882
     a number, but the value doesn't change according to what the
883
     'setlocale' call specified.  So we use it as a last resort, in
884
     case the string returned by 'setlocale' doesn't specify the
885
     codepage.  */
886
  char *current_locale = setlocale (LC_CTYPE, NULL);
887
  char *pdot = strrchr (current_locale, '.');
888
889
  if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
890
    sprintf (buf, "CP%s", pdot + 1);
891
  else
892
    {
893
      /* The Windows API has a function returning the locale's codepage as a
894
         number: GetACP().
895
         When the output goes to a console window, it needs to be provided in
896
         GetOEMCP() encoding if the console is using a raster font, or in
897
         GetConsoleOutputCP() encoding if it is using a TrueType font.
898
         But in GUI programs and for output sent to files and pipes, GetACP()
899
         encoding is the best bet.  */
900
      sprintf (buf, "CP%u", GetACP ());
901
    }
902
  /* For a locale name such as "French_France.65001", in Windows 10,
903
     setlocale now returns "French_France.utf8" instead, or in the UTF-8
904
     environment (with modern system settings) "fr_FR.UTF-8".  */
905
  if (streq (buf + 2, "65001") || streq (buf + 2, "utf8") || streq (buf + 2, "UTF-8"))
906
    codeset = "UTF-8";
907
  else
908
    {
909
      strcpy (resultbuf, buf);
910
      codeset = resultbuf;
911
    }
912
913
# elif defined OS2
914
915
  const char *locale;
916
  static char resultbuf[2 + 10 + 1];
917
  ULONG cp[3];
918
  ULONG cplen;
919
920
  codeset = NULL;
921
922
  /* Allow user to override the codeset, as set in the operating system,
923
     with standard language environment variables.  */
924
  locale = getenv ("LC_ALL");
925
  if (locale == NULL || locale[0] == '\0')
926
    {
927
      locale = getenv ("LC_CTYPE");
928
      if (locale == NULL || locale[0] == '\0')
929
        locale = getenv ("LANG");
930
    }
931
  if (locale != NULL && locale[0] != '\0')
932
    {
933
      /* If the locale name contains an encoding after the dot, return it.  */
934
      const char *dot = strchr (locale, '.');
935
936
      if (dot != NULL)
937
        {
938
          const char *modifier;
939
940
          dot++;
941
          /* Look for the possible @... trailer and remove it, if any.  */
942
          modifier = strchr (dot, '@');
943
          if (modifier == NULL)
944
            return dot;
945
          if (modifier - dot < sizeof (resultbuf))
946
            {
947
              /* This way of filling resultbuf is multithread-safe.  */
948
              memcpy (resultbuf, dot, modifier - dot);
949
              resultbuf [modifier - dot] = '\0';
950
              return resultbuf;
951
            }
952
        }
953
954
      /* For the POSIX locale, don't use the system's codepage.  */
955
      if (streq (locale, "C") || streq (locale, "POSIX"))
956
        codeset = "";
957
    }
958
959
  if (codeset == NULL)
960
    {
961
      /* OS/2 has a function returning the locale's codepage as a number.  */
962
      if (DosQueryCp (sizeof (cp), cp, &cplen))
963
        codeset = "";
964
      else
965
        {
966
          char buf[2 + 10 + 1];
967
968
          sprintf (buf, "CP%u", cp[0]);
969
          strcpy (resultbuf, buf);
970
          codeset = resultbuf;
971
        }
972
    }
973
974
# else
975
976
#  error "Add code for other platforms here."
977
978
# endif
979
980
  /* Resolve alias.  */
981
0
  {
982
# ifdef alias_table_defined
983
    /* On some platforms, UTF-8 locales are the most frequently used ones.
984
       Speed up the common case and slow down the less common cases by
985
       testing for this case first.  */
986
#  if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
987
    if (streq (codeset, "UTF-8"))
988
      goto done_table_lookup;
989
    else
990
#  endif
991
      {
992
        const struct table_entry * const table = alias_table;
993
        size_t const table_size =
994
          sizeof (alias_table) / sizeof (struct table_entry);
995
        /* The table is sorted.  Perform a binary search.  */
996
        size_t hi = table_size;
997
        size_t lo = 0;
998
        while (lo < hi)
999
          {
1000
            /* Invariant:
1001
               for i < lo, strcmp (table[i].alias, codeset) < 0,
1002
               for i >= hi, strcmp (table[i].alias, codeset) > 0.  */
1003
            size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1004
            int cmp = strcmp (table[mid].alias, codeset);
1005
            if (cmp < 0)
1006
              lo = mid + 1;
1007
            else if (cmp > 0)
1008
              hi = mid;
1009
            else
1010
              {
1011
                /* Found an i with
1012
                     strcmp (table[i].alias, codeset) == 0.  */
1013
                codeset = table[mid].canonical;
1014
                goto done_table_lookup;
1015
              }
1016
          }
1017
      }
1018
    if (0)
1019
      done_table_lookup: {}
1020
    else
1021
# endif
1022
0
      {
1023
        /* Did not find it in the table.  */
1024
        /* On Mac OS X, all modern locales use the UTF-8 encoding.
1025
           BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
1026
# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1027
        codeset = "UTF-8";
1028
# else
1029
        /* Don't return an empty string.  GNU libc and GNU libiconv interpret
1030
           the empty string as denoting "the locale's character encoding",
1031
           thus GNU libiconv would call this function a second time.  */
1032
0
        if (codeset[0] == '\0')
1033
0
          codeset = "ASCII";
1034
0
# endif
1035
0
      }
1036
0
  }
1037
1038
#else
1039
1040
  /* On old systems which lack it, use setlocale or getenv.  */
1041
  const char *locale = NULL;
1042
1043
  /* But most old systems don't have a complete set of locales.  Some
1044
     (like DJGPP) have only the C locale.  Therefore we don't use setlocale
1045
     here; it would return "C" when it doesn't support the locale name the
1046
     user has set.  */
1047
# if 0
1048
  locale = setlocale (LC_CTYPE, NULL);
1049
# endif
1050
  if (locale == NULL || locale[0] == '\0')
1051
    {
1052
      locale = getenv ("LC_ALL");
1053
      if (locale == NULL || locale[0] == '\0')
1054
        {
1055
          locale = getenv ("LC_CTYPE");
1056
          if (locale == NULL || locale[0] == '\0')
1057
            locale = getenv ("LANG");
1058
            if (locale == NULL)
1059
              locale = "";
1060
        }
1061
    }
1062
1063
  /* Map locale name to canonical encoding name.  */
1064
  {
1065
# ifdef locale_table_defined
1066
    const struct table_entry * const table = locale_table;
1067
    size_t const table_size =
1068
      sizeof (locale_table) / sizeof (struct table_entry);
1069
    /* The table is sorted.  Perform a binary search.  */
1070
    size_t hi = table_size;
1071
    size_t lo = 0;
1072
    while (lo < hi)
1073
      {
1074
        /* Invariant:
1075
           for i < lo, strcmp (table[i].locale, locale) < 0,
1076
           for i >= hi, strcmp (table[i].locale, locale) > 0.  */
1077
        size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1078
        int cmp = strcmp (table[mid].locale, locale);
1079
        if (cmp < 0)
1080
          lo = mid + 1;
1081
        else if (cmp > 0)
1082
          hi = mid;
1083
        else
1084
          {
1085
            /* Found an i with
1086
                 strcmp (table[i].locale, locale) == 0.  */
1087
            codeset = table[mid].canonical;
1088
            goto done_table_lookup;
1089
          }
1090
      }
1091
    if (0)
1092
      done_table_lookup: ;
1093
    else
1094
# endif
1095
      {
1096
        /* Did not find it in the table.  */
1097
        /* On Mac OS X, all modern locales use the UTF-8 encoding.
1098
           BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
1099
# if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1100
        codeset = "UTF-8";
1101
# else
1102
        /* The canonical name cannot be determined.  */
1103
        /* Don't return an empty string.  GNU libc and GNU libiconv interpret
1104
           the empty string as denoting "the locale's character encoding",
1105
           thus GNU libiconv would call this function a second time.  */
1106
        codeset = "ASCII";
1107
# endif
1108
      }
1109
  }
1110
1111
#endif
1112
1113
#ifdef DARWIN7
1114
  /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
1115
     (the default codeset) does not work when MB_CUR_MAX is 1.  */
1116
  if (streq (codeset, "UTF-8") && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
1117
    codeset = "ASCII";
1118
#endif
1119
1120
0
  return codeset;
1121
0
}