/src/tinysparql/subprojects/glib-2.80.3/glib/libcharset/localcharset.c

Source (jump to first uncovered line)
/* Determine a canonical name for the current locale's character encoding.

   Copyright (C) 2000-2006 Free Software Foundation, Inc.

   This program is free software; you can redistribute it and/or modify it
   under the terms of the GNU Library General Public License as published
   by the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Library General Public License for more details.

   You should have received a copy of the GNU Library General Public
   License along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
   USA.  */

/* Written by Bruno Haible <bruno@clisp.org>.  */

#include "config.h"

/* Specification.  */
#include "localcharset.h"

#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#if defined _WIN32 || defined __WIN32__
# define WIN32_NATIVE
#endif

#if defined __EMX__
/* Assume EMX program runs on OS/2, even if compiled under DOS.  */
# define OS2
#endif

#if !defined WIN32_NATIVE
# if HAVE_LANGINFO_CODESET
#  include <langinfo.h>
# else
#  if 0 /* see comment below */
#   include <locale.h>
#  endif
# endif
# ifdef __CYGWIN__
#  define WIN32_LEAN_AND_MEAN
#  include <windows.h>
# endif
#elif defined WIN32_NATIVE
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#endif
#if defined OS2
# define INCL_DOS
# include <os2.h>
#endif

#if ENABLE_RELOCATABLE
# include "relocatable.h"
#else
# define relocate(pathname) (pathname)
#endif

/* Get GLIB_CHARSETALIAS_DIR.  */
#ifndef GLIB_CHARSETALIAS_DIR
# define GLIB_CHARSETALIAS_DIR LIBDIR
#endif

#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
  /* Win32, Cygwin, OS/2, DOS */
# define ISSLASH(C) ((C) == '/' || (C) == '\\')
#endif

#ifndef DIRECTORY_SEPARATOR
# define DIRECTORY_SEPARATOR '/'
#endif

#ifndef ISSLASH
# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
#endif

#if HAVE_DECL_GETC_UNLOCKED
# undef getc
# define getc getc_unlocked
#endif

/* The following static variable is declared 'volatile' to avoid a
   possible multithread problem in the function get_charset_aliases. If we
   are running in a threaded environment, and if two threads initialize
   'charset_aliases' simultaneously, both will produce the same value,
   and everything will be ok if the two assignments to 'charset_aliases'
   are atomic. But I don't know what will happen if the two assignments mix.  */
#if __STDC__ != 1
# define volatile /* empty */
#endif
/* Pointer to the contents of the charset.alias file, if it has already been
   read, else NULL.  Its format is:
   ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0'  */
static const char * volatile charset_aliases;

/* Return a pointer to the contents of the charset.alias file.  */
const char *
_g_locale_get_charset_aliases (void)
{
  const char *cp;

  cp = charset_aliases;
  if (cp == NULL)
    {
#if !(defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
      FILE *fp;
      const char *dir;
      const char *base = "charset.alias";
      char *file_name;

      dir = relocate (GLIB_CHARSETALIAS_DIR);

      /* Concatenate dir and base into freshly allocated file_name.  */
      {
  size_t dir_len = strlen (dir);
  size_t base_len = strlen (base);
  int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
  file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
  if (file_name != NULL)
    {
      memcpy (file_name, dir, dir_len);
      if (add_slash)
        file_name[dir_len] = DIRECTORY_SEPARATOR;
      memcpy (file_name + dir_len + add_slash, base, base_len + 1);
    }
      }

      if (file_name == NULL || (fp = fopen (file_name, "r")) == NULL)
  /* Out of memory or file not found, treat it as empty.  */
  cp = "";
      else
  {
    /* Parse the file's contents.  */
    char *res_ptr = NULL;
    size_t res_size = 0;

    for (;;)
      {
        int c;
        char buf1[50+1];
        char buf2[50+1];
        size_t l1, l2;
        char *old_res_ptr;

        c = getc (fp);
        if (c == EOF)
    break;
        if (c == '\n' || c == ' ' || c == '\t')
    continue;
        if (c == '#')
    {
      /* Skip comment, to end of line.  */
      do
        c = getc (fp);
      while (!(c == EOF || c == '\n'));
      if (c == EOF)
        break;
      continue;
    }
        ungetc (c, fp);
        if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
    break;
        l1 = strlen (buf1);
        l2 = strlen (buf2);
        old_res_ptr = res_ptr;
        if (res_size == 0)
    {
      res_size = l1 + 1 + l2 + 1;
      res_ptr = (char *) malloc (res_size + 1);
    }
        else
    {
      res_size += l1 + 1 + l2 + 1;
      res_ptr = (char *) realloc (res_ptr, res_size + 1);
    }
        if (res_ptr == NULL)
    {
      /* Out of memory. */
      res_size = 0;
      if (old_res_ptr != NULL)
        free (old_res_ptr);
      break;
    }
        strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
        strcpy (res_ptr + res_size - (l2 + 1), buf2);
      }
    fclose (fp);
    if (res_size == 0)
      cp = "";
    else
      {
        *(res_ptr + res_size) = '\0';
        cp = res_ptr;
      }
  }

      if (file_name != NULL)
  free (file_name);

#else

# if defined VMS
      /* To avoid the troubles of an extra file charset.alias_vms in the
   sources of many GNU packages, simply inline the aliases here.  */
      /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
   "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
   section 10.7 "Handling Different Character Sets".  */
      cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
     "ISO8859-2" "\0" "ISO-8859-2" "\0"
     "ISO8859-5" "\0" "ISO-8859-5" "\0"
     "ISO8859-7" "\0" "ISO-8859-7" "\0"
     "ISO8859-8" "\0" "ISO-8859-8" "\0"
     "ISO8859-9" "\0" "ISO-8859-9" "\0"
     /* Japanese */
     "eucJP" "\0" "EUC-JP" "\0"
     "SJIS" "\0" "SHIFT_JIS" "\0"
     "DECKANJI" "\0" "DEC-KANJI" "\0"
     "SDECKANJI" "\0" "EUC-JP" "\0"
     /* Chinese */
     "eucTW" "\0" "EUC-TW" "\0"
     "DECHANYU" "\0" "DEC-HANYU" "\0"
     "DECHANZI" "\0" "GB2312" "\0"
     /* Korean */
     "DECKOREAN" "\0" "EUC-KR" "\0";
# endif

# if defined WIN32_NATIVE || defined __CYGWIN__
      /* To avoid the troubles of installing a separate file in the same
   directory as the DLL and of retrieving the DLL's directory at
   runtime, simply inline the aliases here.  */

      cp = "CP936" "\0" "GBK" "\0"
     "CP1361" "\0" "JOHAB" "\0"
     "CP20127" "\0" "ASCII" "\0"
     "CP20866" "\0" "KOI8-R" "\0"
     "CP20936" "\0" "GB2312" "\0"
     "CP21866" "\0" "KOI8-RU" "\0"
     "CP28591" "\0" "ISO-8859-1" "\0"
     "CP28592" "\0" "ISO-8859-2" "\0"
     "CP28593" "\0" "ISO-8859-3" "\0"
     "CP28594" "\0" "ISO-8859-4" "\0"
     "CP28595" "\0" "ISO-8859-5" "\0"
     "CP28596" "\0" "ISO-8859-6" "\0"
     "CP28597" "\0" "ISO-8859-7" "\0"
     "CP28598" "\0" "ISO-8859-8" "\0"
     "CP28599" "\0" "ISO-8859-9" "\0"
     "CP28605" "\0" "ISO-8859-15" "\0"
     "CP38598" "\0" "ISO-8859-8" "\0"
     "CP51932" "\0" "EUC-JP" "\0"
     "CP51936" "\0" "GB2312" "\0"
     "CP51949" "\0" "EUC-KR" "\0"
     "CP51950" "\0" "EUC-TW" "\0"
     "CP54936" "\0" "GB18030" "\0"
     "CP65001" "\0" "UTF-8" "\0";
# endif
#endif

      charset_aliases = cp;
    }

  return cp;
}

/* Determine the current locale's character encoding, and canonicalize it
   into one of the canonical names listed in config.charset.
   The result must not be freed; it is statically allocated.
   If the canonical name cannot be determined, the result is a non-canonical
   name.  */

const char *
_g_locale_charset_raw (void)
{
  const char *codeset;

#if !(defined WIN32_NATIVE || defined OS2)

# if HAVE_LANGINFO_CODESET

  /* Most systems support nl_langinfo (CODESET) nowadays.  */
  codeset = nl_langinfo (CODESET);

#  ifdef __CYGWIN__
  /* Cygwin 2006 does not have locales.  nl_langinfo (CODESET) always
     returns "US-ASCII".  As long as this is not fixed, return the suffix
     of the locale name from the environment variables (if present) or
     the codepage as a number.  */
  if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
    {
      const char *locale;
      static char buf[2 + 10 + 1];

      locale = getenv ("LC_ALL");
      if (locale == NULL || locale[0] == '\0')
  {
    locale = getenv ("LC_CTYPE");
    if (locale == NULL || locale[0] == '\0')
      locale = getenv ("LANG");
  }
      if (locale != NULL && locale[0] != '\0')
  {
    /* If the locale name contains an encoding after the dot, return
       it.  */
    const char *dot = strchr (locale, '.');

    if (dot != NULL)
      {
        const char *modifier;

        dot++;
        /* Look for the possible @... trailer and remove it, if any.  */
        modifier = strchr (dot, '@');
        if (modifier == NULL)
    return dot;
        if (modifier - dot < sizeof (buf))
    {
      memcpy (buf, dot, modifier - dot);
      buf [modifier - dot] = '\0';
      return buf;
    }
      }
  }

      /* Woe32 has a function returning the locale's codepage as a number.  */
      sprintf (buf, "CP%u", GetACP ());
      codeset = buf;
    }
#  endif

# else

  /* On old systems which lack it, use setlocale or getenv.  */
  const char *locale = NULL;

  /* But most old systems don't have a complete set of locales.  Some
     (like SunOS 4 or DJGPP) have only the C locale.  Therefore we don't
     use setlocale here; it would return "C" when it doesn't support the
     locale name the user has set.  */
#  if 0
  locale = setlocale (LC_CTYPE, NULL);
#  endif
  if (locale == NULL || locale[0] == '\0')
    {
      locale = getenv ("LC_ALL");
      if (locale == NULL || locale[0] == '\0')
  {
    locale = getenv ("LC_CTYPE");
    if (locale == NULL || locale[0] == '\0')
      locale = getenv ("LANG");
  }
    }

  /* On some old systems, one used to set locale = "iso8859_1". On others,
     you set it to "language_COUNTRY.charset". In any case, we resolve it
     through the charset.alias file.  */
  codeset = locale;

# endif

#elif defined WIN32_NATIVE

  static char buf[2 + 10 + 1];

  /* Woe32 has a function returning the locale's codepage as a number.  */
  sprintf (buf, "CP%u", GetACP ());
  codeset = buf;

#elif defined OS2

  const char *locale;
  static char buf[2 + 10 + 1];
  ULONG cp[3];
  ULONG cplen;

  /* Allow user to override the codeset, as set in the operating system,
     with standard language environment variables.  */
  locale = getenv ("LC_ALL");
  if (locale == NULL || locale[0] == '\0')
    {
      locale = getenv ("LC_CTYPE");
      if (locale == NULL || locale[0] == '\0')
  locale = getenv ("LANG");
    }
  if (locale != NULL && locale[0] != '\0')
    {
      /* If the locale name contains an encoding after the dot, return it.  */
      const char *dot = strchr (locale, '.');

      if (dot != NULL)
  {
    const char *modifier;

    dot++;
    /* Look for the possible @... trailer and remove it, if any.  */
    modifier = strchr (dot, '@');
    if (modifier == NULL)
      return dot;
    if (modifier - dot < sizeof (buf))
      {
        memcpy (buf, dot, modifier - dot);
        buf [modifier - dot] = '\0';
        return buf;
      }
  }

      /* Resolve through the charset.alias file.  */
      codeset = locale;
    }
  else
    {
      /* OS/2 has a function returning the locale's codepage as a number.  */
      if (DosQueryCp (sizeof (cp), cp, &cplen))
  codeset = "";
      else
  {
    sprintf (buf, "CP%u", cp[0]);
    codeset = buf;
  }
    }

#endif

  return codeset;
}

const char *
_g_locale_charset_unalias (const char *codeset)
{
  const char *aliases;

  if (codeset == NULL)
    /* The canonical name cannot be determined.  */
    codeset = "";

  /* Resolve alias. */
  for (aliases = _g_locale_get_charset_aliases ();
       *aliases != '\0';
       aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
    if (strcmp (codeset, aliases) == 0
  || (aliases[0] == '*' && aliases[1] == '\0'))
      {
  codeset = aliases + strlen (aliases) + 1;
  break;
      }

  /* Don't return an empty string.  GNU libc and GNU libiconv interpret
     the empty string as denoting "the locale's character encoding",
     thus GNU libiconv would call this function a second time.  */
  if (codeset[0] == '\0')
    codeset = "ASCII";

  return codeset;
}

Coverage Report

Created: 2025-07-18 06:10

Line	Count	Source (jump to first uncovered line)
1		/* Determine a canonical name for the current locale's character encoding.
2
3		Copyright (C) 2000-2006 Free Software Foundation, Inc.
4
5		This program is free software; you can redistribute it and/or modify it
6		under the terms of the GNU Library General Public License as published
7		by the Free Software Foundation; either version 2, or (at your option)
8		any later version.
9
10		This program is distributed in the hope that it will be useful,
11		but WITHOUT ANY WARRANTY; without even the implied warranty of
12		MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13		Library General Public License for more details.
14
15		You should have received a copy of the GNU Library General Public
16		License along with this program; if not, write to the Free Software
17		Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18		USA. */
19
20		/* Written by Bruno Haible <bruno@clisp.org>. */
21
22		#include "config.h"
23
24		/* Specification. */
25		#include "localcharset.h"
26
27		#include <stddef.h>
28		#include <stdio.h>
29		#include <string.h>
30		#include <stdlib.h>
31
32		#if defined _WIN32 \|\| defined __WIN32__
33		# define WIN32_NATIVE
34		#endif
35
36		#if defined __EMX__
37		/* Assume EMX program runs on OS/2, even if compiled under DOS. */
38		# define OS2
39		#endif
40
41		#if !defined WIN32_NATIVE
42		# if HAVE_LANGINFO_CODESET
43		# include <langinfo.h>
44		# else
45		# if 0 /* see comment below */
46		# include <locale.h>
47		# endif
48		# endif
49		# ifdef __CYGWIN__
50		# define WIN32_LEAN_AND_MEAN
51		# include <windows.h>
52		# endif
53		#elif defined WIN32_NATIVE
54		# define WIN32_LEAN_AND_MEAN
55		# include <windows.h>
56		#endif
57		#if defined OS2
58		# define INCL_DOS
59		# include <os2.h>
60		#endif
61
62		#if ENABLE_RELOCATABLE
63		# include "relocatable.h"
64		#else
65	4	# define relocate(pathname) (pathname)
66		#endif
67
68		/* Get GLIB_CHARSETALIAS_DIR. */
69		#ifndef GLIB_CHARSETALIAS_DIR
70		# define GLIB_CHARSETALIAS_DIR LIBDIR
71		#endif
72
73		#if defined _WIN32 \|\| defined __WIN32__ \|\| defined __CYGWIN__ \|\| defined __EMX__ \|\| defined __DJGPP__
74		/* Win32, Cygwin, OS/2, DOS */
75		# define ISSLASH(C) ((C) == '/' \|\| (C) == '\\')
76		#endif
77
78		#ifndef DIRECTORY_SEPARATOR
79	8	# define DIRECTORY_SEPARATOR '/'
80		#endif
81
82		#ifndef ISSLASH
83	4	# define ISSLASH(C) ((C) == DIRECTORY_SEPARATOR)
84		#endif
85
86		#if HAVE_DECL_GETC_UNLOCKED
87		# undef getc
88		# define getc getc_unlocked
89		#endif
90
91		/* The following static variable is declared 'volatile' to avoid a
92		possible multithread problem in the function get_charset_aliases. If we
93		are running in a threaded environment, and if two threads initialize
94		'charset_aliases' simultaneously, both will produce the same value,
95		and everything will be ok if the two assignments to 'charset_aliases'
96		are atomic. But I don't know what will happen if the two assignments mix. */
97		#if __STDC__ != 1
98		# define volatile /* empty */
99		#endif
100		/* Pointer to the contents of the charset.alias file, if it has already been
101		read, else NULL. Its format is:
102		ALIAS_1 '\0' CANONICAL_1 '\0' ... ALIAS_n '\0' CANONICAL_n '\0' '\0' */
103		static const char * volatile charset_aliases;
104
105		/* Return a pointer to the contents of the charset.alias file. */
106		const char *
107		_g_locale_get_charset_aliases (void)
108	5	{
109	5	const char *cp;
110
111	5	cp = charset_aliases;
112	5	if (cp == NULL)
113	4	{
114	4	#if !(defined VMS \|\| defined WIN32_NATIVE \|\| defined __CYGWIN__)
115	4	FILE *fp;
116	4	const char *dir;
117	4	const char *base = "charset.alias";
118	4	char *file_name;
119
120	4	dir = relocate (GLIB_CHARSETALIAS_DIR);
121
122		/* Concatenate dir and base into freshly allocated file_name. */
123	4	{
124	4	size_t dir_len = strlen (dir);
125	4	size_t base_len = strlen (base);
126	4	int add_slash = (dir_len > 0 && !ISSLASH (dir[dir_len - 1]));
127	4	file_name = (char *) malloc (dir_len + add_slash + base_len + 1);
128	4	if (file_name != NULL)
129	4	{
130	4	memcpy (file_name, dir, dir_len);
131	4	if (add_slash)
132	4	file_name[dir_len] = DIRECTORY_SEPARATOR;
133	4	memcpy (file_name + dir_len + add_slash, base, base_len + 1);
134	4	}
135	4	}
136
137	4	if (file_name == NULL \|\| (fp = fopen (file_name, "r")) == NULL)
138		/* Out of memory or file not found, treat it as empty. */
139	4	cp = "";
140	0	else
141	0	{
142		/* Parse the file's contents. */
143	0	char *res_ptr = NULL;
144	0	size_t res_size = 0;
145
146	0	for (;;)
147	0	{
148	0	int c;
149	0	char buf1[50+1];
150	0	char buf2[50+1];
151	0	size_t l1, l2;
152	0	char *old_res_ptr;
153
154	0	c = getc (fp);
155	0	if (c == EOF)
156	0	break;
157	0	if (c == '\n' \|\| c == ' ' \|\| c == '\t')
158	0	continue;
159	0	if (c == '#')
160	0	{
161		/* Skip comment, to end of line. */
162	0	do
163	0	c = getc (fp);
164	0	while (!(c == EOF \|\| c == '\n'));
165	0	if (c == EOF)
166	0	break;
167	0	continue;
168	0	}
169	0	ungetc (c, fp);
170	0	if (fscanf (fp, "%50s %50s", buf1, buf2) < 2)
171	0	break;
172	0	l1 = strlen (buf1);
173	0	l2 = strlen (buf2);
174	0	old_res_ptr = res_ptr;
175	0	if (res_size == 0)
176	0	{
177	0	res_size = l1 + 1 + l2 + 1;
178	0	res_ptr = (char *) malloc (res_size + 1);
179	0	}
180	0	else
181	0	{
182	0	res_size += l1 + 1 + l2 + 1;
183	0	res_ptr = (char *) realloc (res_ptr, res_size + 1);
184	0	}
185	0	if (res_ptr == NULL)
186	0	{
187		/* Out of memory. */
188	0	res_size = 0;
189	0	if (old_res_ptr != NULL)
190	0	free (old_res_ptr);
191	0	break;
192	0	}
193	0	strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
194	0	strcpy (res_ptr + res_size - (l2 + 1), buf2);
195	0	}
196	0	fclose (fp);
197	0	if (res_size == 0)
198	0	cp = "";
199	0	else
200	0	{
201	0	*(res_ptr + res_size) = '\0';
202	0	cp = res_ptr;
203	0	}
204	0	}
205
206	4	if (file_name != NULL)
207	4	free (file_name);
208
209		#else
210
211		# if defined VMS
212		/* To avoid the troubles of an extra file charset.alias_vms in the
213		sources of many GNU packages, simply inline the aliases here. */
214		/* The list of encodings is taken from the OpenVMS 7.3-1 documentation
215		"Compaq C Run-Time Library Reference Manual for OpenVMS systems"
216		section 10.7 "Handling Different Character Sets". */
217		cp = "ISO8859-1" "\0" "ISO-8859-1" "\0"
218		"ISO8859-2" "\0" "ISO-8859-2" "\0"
219		"ISO8859-5" "\0" "ISO-8859-5" "\0"
220		"ISO8859-7" "\0" "ISO-8859-7" "\0"
221		"ISO8859-8" "\0" "ISO-8859-8" "\0"
222		"ISO8859-9" "\0" "ISO-8859-9" "\0"
223		/* Japanese */
224		"eucJP" "\0" "EUC-JP" "\0"
225		"SJIS" "\0" "SHIFT_JIS" "\0"
226		"DECKANJI" "\0" "DEC-KANJI" "\0"
227		"SDECKANJI" "\0" "EUC-JP" "\0"
228		/* Chinese */
229		"eucTW" "\0" "EUC-TW" "\0"
230		"DECHANYU" "\0" "DEC-HANYU" "\0"
231		"DECHANZI" "\0" "GB2312" "\0"
232		/* Korean */
233		"DECKOREAN" "\0" "EUC-KR" "\0";
234		# endif
235
236		# if defined WIN32_NATIVE \|\| defined __CYGWIN__
237		/* To avoid the troubles of installing a separate file in the same
238		directory as the DLL and of retrieving the DLL's directory at
239		runtime, simply inline the aliases here. */
240
241		cp = "CP936" "\0" "GBK" "\0"
242		"CP1361" "\0" "JOHAB" "\0"
243		"CP20127" "\0" "ASCII" "\0"
244		"CP20866" "\0" "KOI8-R" "\0"
245		"CP20936" "\0" "GB2312" "\0"
246		"CP21866" "\0" "KOI8-RU" "\0"
247		"CP28591" "\0" "ISO-8859-1" "\0"
248		"CP28592" "\0" "ISO-8859-2" "\0"
249		"CP28593" "\0" "ISO-8859-3" "\0"
250		"CP28594" "\0" "ISO-8859-4" "\0"
251		"CP28595" "\0" "ISO-8859-5" "\0"
252		"CP28596" "\0" "ISO-8859-6" "\0"
253		"CP28597" "\0" "ISO-8859-7" "\0"
254		"CP28598" "\0" "ISO-8859-8" "\0"
255		"CP28599" "\0" "ISO-8859-9" "\0"
256		"CP28605" "\0" "ISO-8859-15" "\0"
257		"CP38598" "\0" "ISO-8859-8" "\0"
258		"CP51932" "\0" "EUC-JP" "\0"
259		"CP51936" "\0" "GB2312" "\0"
260		"CP51949" "\0" "EUC-KR" "\0"
261		"CP51950" "\0" "EUC-TW" "\0"
262		"CP54936" "\0" "GB18030" "\0"
263		"CP65001" "\0" "UTF-8" "\0";
264		# endif
265		#endif
266
267	4	charset_aliases = cp;
268	4	}
269
270	5	return cp;
271	5	}
272
273		/* Determine the current locale's character encoding, and canonicalize it
274		into one of the canonical names listed in config.charset.
275		The result must not be freed; it is statically allocated.
276		If the canonical name cannot be determined, the result is a non-canonical
277		name. */
278
279		const char *
280		_g_locale_charset_raw (void)
281	1.14k	{
282	1.14k	const char *codeset;
283
284	1.14k	#if !(defined WIN32_NATIVE \|\| defined OS2)
285
286	1.14k	# if HAVE_LANGINFO_CODESET
287
288		/* Most systems support nl_langinfo (CODESET) nowadays. */
289	1.14k	codeset = nl_langinfo (CODESET);
290
291		# ifdef __CYGWIN__
292		/* Cygwin 2006 does not have locales. nl_langinfo (CODESET) always
293		returns "US-ASCII". As long as this is not fixed, return the suffix
294		of the locale name from the environment variables (if present) or
295		the codepage as a number. */
296		if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
297		{
298		const char *locale;
299		static char buf[2 + 10 + 1];
300
301		locale = getenv ("LC_ALL");
302		if (locale == NULL \|\| locale[0] == '\0')
303		{
304		locale = getenv ("LC_CTYPE");
305		if (locale == NULL \|\| locale[0] == '\0')
306		locale = getenv ("LANG");
307		}
308		if (locale != NULL && locale[0] != '\0')
309		{
310		/* If the locale name contains an encoding after the dot, return
311		it. */
312		const char *dot = strchr (locale, '.');
313
314		if (dot != NULL)
315		{
316		const char *modifier;
317
318		dot++;
319		/* Look for the possible @... trailer and remove it, if any. */
320		modifier = strchr (dot, '@');
321		if (modifier == NULL)
322		return dot;
323		if (modifier - dot < sizeof (buf))
324		{
325		memcpy (buf, dot, modifier - dot);
326		buf [modifier - dot] = '\0';
327		return buf;
328		}
329		}
330		}
331
332		/* Woe32 has a function returning the locale's codepage as a number. */
333		sprintf (buf, "CP%u", GetACP ());
334		codeset = buf;
335		}
336		# endif
337
338		# else
339
340		/* On old systems which lack it, use setlocale or getenv. */
341		const char *locale = NULL;
342
343		/* But most old systems don't have a complete set of locales. Some
344		(like SunOS 4 or DJGPP) have only the C locale. Therefore we don't
345		use setlocale here; it would return "C" when it doesn't support the
346		locale name the user has set. */
347		# if 0
348		locale = setlocale (LC_CTYPE, NULL);
349		# endif
350		if (locale == NULL \|\| locale[0] == '\0')
351		{
352		locale = getenv ("LC_ALL");
353		if (locale == NULL \|\| locale[0] == '\0')
354		{
355		locale = getenv ("LC_CTYPE");
356		if (locale == NULL \|\| locale[0] == '\0')
357		locale = getenv ("LANG");
358		}
359		}
360
361		/* On some old systems, one used to set locale = "iso8859_1". On others,
362		you set it to "language_COUNTRY.charset". In any case, we resolve it
363		through the charset.alias file. */
364		codeset = locale;
365
366		# endif
367
368		#elif defined WIN32_NATIVE
369
370		static char buf[2 + 10 + 1];
371
372		/* Woe32 has a function returning the locale's codepage as a number. */
373		sprintf (buf, "CP%u", GetACP ());
374		codeset = buf;
375
376		#elif defined OS2
377
378		const char *locale;
379		static char buf[2 + 10 + 1];
380		ULONG cp[3];
381		ULONG cplen;
382
383		/* Allow user to override the codeset, as set in the operating system,
384		with standard language environment variables. */
385		locale = getenv ("LC_ALL");
386		if (locale == NULL \|\| locale[0] == '\0')
387		{
388		locale = getenv ("LC_CTYPE");
389		if (locale == NULL \|\| locale[0] == '\0')
390		locale = getenv ("LANG");
391		}
392		if (locale != NULL && locale[0] != '\0')
393		{
394		/* If the locale name contains an encoding after the dot, return it. */
395		const char *dot = strchr (locale, '.');
396
397		if (dot != NULL)
398		{
399		const char *modifier;
400
401		dot++;
402		/* Look for the possible @... trailer and remove it, if any. */
403		modifier = strchr (dot, '@');
404		if (modifier == NULL)
405		return dot;
406		if (modifier - dot < sizeof (buf))
407		{
408		memcpy (buf, dot, modifier - dot);
409		buf [modifier - dot] = '\0';
410		return buf;
411		}
412		}
413
414		/* Resolve through the charset.alias file. */
415		codeset = locale;
416		}
417		else
418		{
419		/* OS/2 has a function returning the locale's codepage as a number. */
420		if (DosQueryCp (sizeof (cp), cp, &cplen))
421		codeset = "";
422		else
423		{
424		sprintf (buf, "CP%u", cp[0]);
425		codeset = buf;
426		}
427		}
428
429		#endif
430
431	1.14k	return codeset;
432	1.14k	}
433
434		const char *
435		_g_locale_charset_unalias (const char *codeset)
436	5	{
437	5	const char *aliases;
438
439	5	if (codeset == NULL)
440		/* The canonical name cannot be determined. */
441	0	codeset = "";
442
443		/* Resolve alias. */
444	5	for (aliases = _g_locale_get_charset_aliases ();
445	5	*aliases != '\0';
446	5	aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
447	0	if (strcmp (codeset, aliases) == 0
448	0	\|\| (aliases[0] == '*' && aliases[1] == '\0'))
449	0	{
450	0	codeset = aliases + strlen (aliases) + 1;
451	0	break;
452	0	}
453
454		/* Don't return an empty string. GNU libc and GNU libiconv interpret
455		the empty string as denoting "the locale's character encoding",
456		thus GNU libiconv would call this function a second time. */
457	5	if (codeset[0] == '\0')
458	0	codeset = "ASCII";
459
460	5	return codeset;
461	5	}