/src/postgres/src/port/chklocale.c

Source (jump to first uncovered line)
/*-------------------------------------------------------------------------
 *
 * chklocale.c
 *    Functions for handling locale-related info
 *
 *
 * Copyright (c) 1996-2025, PostgreSQL Global Development Group
 *
 *
 * IDENTIFICATION
 *    src/port/chklocale.c
 *
 *-------------------------------------------------------------------------
 */

#ifndef FRONTEND
#include "postgres.h"
#else
#include "postgres_fe.h"
#endif

#ifndef WIN32
#include <langinfo.h>
#endif

#include "mb/pg_wchar.h"


/*
 * This table needs to recognize all the CODESET spellings for supported
 * backend encodings, as well as frontend-only encodings where possible
 * (the latter case is currently only needed for initdb to recognize
 * error situations).  On Windows, we rely on entries for codepage
 * numbers (CPnnn).
 *
 * Note that we search the table with pg_strcasecmp(), so variant
 * capitalizations don't need their own entries.
 */
struct encoding_match
{
  enum pg_enc pg_enc_code;
  const char *system_enc_name;
};

static const struct encoding_match encoding_match_list[] = {
  {PG_EUC_JP, "EUC-JP"},
  {PG_EUC_JP, "eucJP"},
  {PG_EUC_JP, "IBM-eucJP"},
  {PG_EUC_JP, "sdeckanji"},
  {PG_EUC_JP, "CP20932"},

  {PG_EUC_CN, "EUC-CN"},
  {PG_EUC_CN, "eucCN"},
  {PG_EUC_CN, "IBM-eucCN"},
  {PG_EUC_CN, "GB2312"},
  {PG_EUC_CN, "dechanzi"},
  {PG_EUC_CN, "CP20936"},

  {PG_EUC_KR, "EUC-KR"},
  {PG_EUC_KR, "eucKR"},
  {PG_EUC_KR, "IBM-eucKR"},
  {PG_EUC_KR, "deckorean"},
  {PG_EUC_KR, "5601"},
  {PG_EUC_KR, "CP51949"},

  {PG_EUC_TW, "EUC-TW"},
  {PG_EUC_TW, "eucTW"},
  {PG_EUC_TW, "IBM-eucTW"},
  {PG_EUC_TW, "cns11643"},
  /* No codepage for EUC-TW ? */

  {PG_UTF8, "UTF-8"},
  {PG_UTF8, "utf8"},
  {PG_UTF8, "CP65001"},

  {PG_LATIN1, "ISO-8859-1"},
  {PG_LATIN1, "ISO8859-1"},
  {PG_LATIN1, "iso88591"},
  {PG_LATIN1, "CP28591"},

  {PG_LATIN2, "ISO-8859-2"},
  {PG_LATIN2, "ISO8859-2"},
  {PG_LATIN2, "iso88592"},
  {PG_LATIN2, "CP28592"},

  {PG_LATIN3, "ISO-8859-3"},
  {PG_LATIN3, "ISO8859-3"},
  {PG_LATIN3, "iso88593"},
  {PG_LATIN3, "CP28593"},

  {PG_LATIN4, "ISO-8859-4"},
  {PG_LATIN4, "ISO8859-4"},
  {PG_LATIN4, "iso88594"},
  {PG_LATIN4, "CP28594"},

  {PG_LATIN5, "ISO-8859-9"},
  {PG_LATIN5, "ISO8859-9"},
  {PG_LATIN5, "iso88599"},
  {PG_LATIN5, "CP28599"},

  {PG_LATIN6, "ISO-8859-10"},
  {PG_LATIN6, "ISO8859-10"},
  {PG_LATIN6, "iso885910"},

  {PG_LATIN7, "ISO-8859-13"},
  {PG_LATIN7, "ISO8859-13"},
  {PG_LATIN7, "iso885913"},

  {PG_LATIN8, "ISO-8859-14"},
  {PG_LATIN8, "ISO8859-14"},
  {PG_LATIN8, "iso885914"},

  {PG_LATIN9, "ISO-8859-15"},
  {PG_LATIN9, "ISO8859-15"},
  {PG_LATIN9, "iso885915"},
  {PG_LATIN9, "CP28605"},

  {PG_LATIN10, "ISO-8859-16"},
  {PG_LATIN10, "ISO8859-16"},
  {PG_LATIN10, "iso885916"},

  {PG_KOI8R, "KOI8-R"},
  {PG_KOI8R, "CP20866"},

  {PG_KOI8U, "KOI8-U"},
  {PG_KOI8U, "CP21866"},

  {PG_WIN866, "CP866"},
  {PG_WIN874, "CP874"},
  {PG_WIN1250, "CP1250"},
  {PG_WIN1251, "CP1251"},
  {PG_WIN1251, "ansi-1251"},
  {PG_WIN1252, "CP1252"},
  {PG_WIN1253, "CP1253"},
  {PG_WIN1254, "CP1254"},
  {PG_WIN1255, "CP1255"},
  {PG_WIN1256, "CP1256"},
  {PG_WIN1257, "CP1257"},
  {PG_WIN1258, "CP1258"},

  {PG_ISO_8859_5, "ISO-8859-5"},
  {PG_ISO_8859_5, "ISO8859-5"},
  {PG_ISO_8859_5, "iso88595"},
  {PG_ISO_8859_5, "CP28595"},

  {PG_ISO_8859_6, "ISO-8859-6"},
  {PG_ISO_8859_6, "ISO8859-6"},
  {PG_ISO_8859_6, "iso88596"},
  {PG_ISO_8859_6, "CP28596"},

  {PG_ISO_8859_7, "ISO-8859-7"},
  {PG_ISO_8859_7, "ISO8859-7"},
  {PG_ISO_8859_7, "iso88597"},
  {PG_ISO_8859_7, "CP28597"},

  {PG_ISO_8859_8, "ISO-8859-8"},
  {PG_ISO_8859_8, "ISO8859-8"},
  {PG_ISO_8859_8, "iso88598"},
  {PG_ISO_8859_8, "CP28598"},

  {PG_SJIS, "SJIS"},
  {PG_SJIS, "PCK"},
  {PG_SJIS, "CP932"},
  {PG_SJIS, "SHIFT_JIS"},

  {PG_BIG5, "BIG5"},
  {PG_BIG5, "BIG5HKSCS"},
  {PG_BIG5, "Big5-HKSCS"},
  {PG_BIG5, "CP950"},

  {PG_GBK, "GBK"},
  {PG_GBK, "CP936"},

  {PG_UHC, "UHC"},
  {PG_UHC, "CP949"},

  {PG_JOHAB, "JOHAB"},
  {PG_JOHAB, "CP1361"},

  {PG_GB18030, "GB18030"},
  {PG_GB18030, "CP54936"},

  {PG_SHIFT_JIS_2004, "SJIS_2004"},

  {PG_SQL_ASCII, "US-ASCII"},

  {PG_SQL_ASCII, NULL}    /* end marker */
};

#ifdef WIN32
/*
 * On Windows, use CP<code page number> instead of CODESET.
 *
 * This routine uses GetLocaleInfoEx() to parse short locale names like
 * "de-DE", "fr-FR", etc.  If those cannot be parsed correctly process falls
 * back to the pre-VS-2010 manual parsing done with using
 * <Language>_<Country>.<CodePage> as a base.
 *
 * Returns a malloc()'d string for the caller to free.
 */
static char *
win32_get_codeset(const char *ctype)
{
  char     *r = NULL;
  char     *codepage;
  uint32    cp;
  WCHAR   wctype[LOCALE_NAME_MAX_LENGTH];

  memset(wctype, 0, sizeof(wctype));
  MultiByteToWideChar(CP_ACP, 0, ctype, -1, wctype, LOCALE_NAME_MAX_LENGTH);

  if (GetLocaleInfoEx(wctype,
            LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
            (LPWSTR) &cp, sizeof(cp) / sizeof(WCHAR)) > 0)
  {
    r = malloc(16);     /* excess */
    if (r != NULL)
    {
      /*
       * If the return value is CP_ACP that means no ANSI code page is
       * available, so only Unicode can be used for the locale.
       */
      if (cp == CP_ACP)
        strcpy(r, "utf8");
      else
        sprintf(r, "CP%u", cp);
    }
  }
  else
  {
    /*
     * Locale format on Win32 is <Language>_<Country>.<CodePage>.  For
     * example, English_United States.1252.  If we see digits after the
     * last dot, assume it's a codepage number.  Otherwise, we might be
     * dealing with a Unix-style locale string; Windows' setlocale() will
     * take those even though GetLocaleInfoEx() won't, so we end up here.
     * In that case, just return what's after the last dot and hope we can
     * find it in our table.
     */
    codepage = strrchr(ctype, '.');
    if (codepage != NULL)
    {
      size_t    ln;

      codepage++;
      ln = strlen(codepage);
      r = malloc(ln + 3);
      if (r != NULL)
      {
        if (strspn(codepage, "0123456789") == ln)
          sprintf(r, "CP%s", codepage);
        else
          strcpy(r, codepage);
      }
    }
  }

  return r;
}

#ifndef FRONTEND
/*
 * Given a Windows code page identifier, find the corresponding PostgreSQL
 * encoding.  Issue a warning and return -1 if none found.
 */
int
pg_codepage_to_encoding(UINT cp)
{
  char    sys[16];
  int     i;

  sprintf(sys, "CP%u", cp);

  /* Check the table */
  for (i = 0; encoding_match_list[i].system_enc_name; i++)
    if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
      return encoding_match_list[i].pg_enc_code;

  ereport(WARNING,
      (errmsg("could not determine encoding for codeset \"%s\"", sys)));

  return -1;
}
#endif
#endif              /* WIN32 */

/*
 * Given a setting for LC_CTYPE, return the Postgres ID of the associated
 * encoding, if we can determine it.  Return -1 if we can't determine it.
 *
 * Pass in NULL to get the encoding for the current locale setting.
 * Pass "" to get the encoding selected by the server's environment.
 *
 * If the result is PG_SQL_ASCII, callers should treat it as being compatible
 * with any desired encoding.
 *
 * If running in the backend and write_message is false, this function must
 * cope with the possibility that elog() and palloc() are not yet usable.
 */
int
pg_get_encoding_from_locale(const char *ctype, bool write_message)
{
  char     *sys;
  int     i;

#ifndef WIN32
  locale_t  loc;
#endif

  /* Get the CODESET property, and also LC_CTYPE if not passed in */
  if (!ctype)
    ctype = setlocale(LC_CTYPE, NULL);


  /* If locale is C or POSIX, we can allow all encodings */
  if (pg_strcasecmp(ctype, "C") == 0 ||
    pg_strcasecmp(ctype, "POSIX") == 0)
    return PG_SQL_ASCII;


#ifndef WIN32
  loc = newlocale(LC_CTYPE_MASK, ctype, (locale_t) 0);
  if (loc == (locale_t) 0)
    return -1;       /* bogus ctype passed in? */

  sys = nl_langinfo_l(CODESET, loc);
  if (sys)
    sys = strdup(sys);

  freelocale(loc);
#else
  sys = win32_get_codeset(ctype);
#endif

  if (!sys)
    return -1;       /* out of memory; unlikely */

  /* Check the table */
  for (i = 0; encoding_match_list[i].system_enc_name; i++)
  {
    if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
    {
      free(sys);
      return encoding_match_list[i].pg_enc_code;
    }
  }

  /* Special-case kluges for particular platforms go here */

#ifdef __darwin__

  /*
   * Current macOS has many locales that report an empty string for CODESET,
   * but they all seem to actually use UTF-8.
   */
  if (strlen(sys) == 0)
  {
    free(sys);
    return PG_UTF8;
  }
#endif

  /*
   * We print a warning if we got a CODESET string but couldn't recognize
   * it.  This means we need another entry in the table.
   */
  if (write_message)
  {
#ifdef FRONTEND
    fprintf(stderr, _("could not determine encoding for locale \"%s\": codeset is \"%s\""),
        ctype, sys);
    /* keep newline separate so there's only one translatable string */
    fputc('\n', stderr);
#else
    ereport(WARNING,
        (errmsg("could not determine encoding for locale \"%s\": codeset is \"%s\"",
            ctype, sys)));
#endif
  }

  free(sys);
  return -1;
}

Coverage Report

Created: 2025-08-12 06:43

Line	Count	Source (jump to first uncovered line)
1		/*-------------------------------------------------------------------------
2		*
3		* chklocale.c
4		* Functions for handling locale-related info
5		*
6		*
7		* Copyright (c) 1996-2025, PostgreSQL Global Development Group
8		*
9		*
10		* IDENTIFICATION
11		* src/port/chklocale.c
12		*
13		*-------------------------------------------------------------------------
14		*/
15
16		#ifndef FRONTEND
17		#include "postgres.h"
18		#else
19		#include "postgres_fe.h"
20		#endif
21
22		#ifndef WIN32
23		#include <langinfo.h>
24		#endif
25
26		#include "mb/pg_wchar.h"
27
28
29		/*
30		* This table needs to recognize all the CODESET spellings for supported
31		* backend encodings, as well as frontend-only encodings where possible
32		* (the latter case is currently only needed for initdb to recognize
33		* error situations). On Windows, we rely on entries for codepage
34		* numbers (CPnnn).
35		*
36		* Note that we search the table with pg_strcasecmp(), so variant
37		* capitalizations don't need their own entries.
38		*/
39		struct encoding_match
40		{
41		enum pg_enc pg_enc_code;
42		const char *system_enc_name;
43		};
44
45		static const struct encoding_match encoding_match_list[] = {
46		{PG_EUC_JP, "EUC-JP"},
47		{PG_EUC_JP, "eucJP"},
48		{PG_EUC_JP, "IBM-eucJP"},
49		{PG_EUC_JP, "sdeckanji"},
50		{PG_EUC_JP, "CP20932"},
51
52		{PG_EUC_CN, "EUC-CN"},
53		{PG_EUC_CN, "eucCN"},
54		{PG_EUC_CN, "IBM-eucCN"},
55		{PG_EUC_CN, "GB2312"},
56		{PG_EUC_CN, "dechanzi"},
57		{PG_EUC_CN, "CP20936"},
58
59		{PG_EUC_KR, "EUC-KR"},
60		{PG_EUC_KR, "eucKR"},
61		{PG_EUC_KR, "IBM-eucKR"},
62		{PG_EUC_KR, "deckorean"},
63		{PG_EUC_KR, "5601"},
64		{PG_EUC_KR, "CP51949"},
65
66		{PG_EUC_TW, "EUC-TW"},
67		{PG_EUC_TW, "eucTW"},
68		{PG_EUC_TW, "IBM-eucTW"},
69		{PG_EUC_TW, "cns11643"},
70		/* No codepage for EUC-TW ? */
71
72		{PG_UTF8, "UTF-8"},
73		{PG_UTF8, "utf8"},
74		{PG_UTF8, "CP65001"},
75
76		{PG_LATIN1, "ISO-8859-1"},
77		{PG_LATIN1, "ISO8859-1"},
78		{PG_LATIN1, "iso88591"},
79		{PG_LATIN1, "CP28591"},
80
81		{PG_LATIN2, "ISO-8859-2"},
82		{PG_LATIN2, "ISO8859-2"},
83		{PG_LATIN2, "iso88592"},
84		{PG_LATIN2, "CP28592"},
85
86		{PG_LATIN3, "ISO-8859-3"},
87		{PG_LATIN3, "ISO8859-3"},
88		{PG_LATIN3, "iso88593"},
89		{PG_LATIN3, "CP28593"},
90
91		{PG_LATIN4, "ISO-8859-4"},
92		{PG_LATIN4, "ISO8859-4"},
93		{PG_LATIN4, "iso88594"},
94		{PG_LATIN4, "CP28594"},
95
96		{PG_LATIN5, "ISO-8859-9"},
97		{PG_LATIN5, "ISO8859-9"},
98		{PG_LATIN5, "iso88599"},
99		{PG_LATIN5, "CP28599"},
100
101		{PG_LATIN6, "ISO-8859-10"},
102		{PG_LATIN6, "ISO8859-10"},
103		{PG_LATIN6, "iso885910"},
104
105		{PG_LATIN7, "ISO-8859-13"},
106		{PG_LATIN7, "ISO8859-13"},
107		{PG_LATIN7, "iso885913"},
108
109		{PG_LATIN8, "ISO-8859-14"},
110		{PG_LATIN8, "ISO8859-14"},
111		{PG_LATIN8, "iso885914"},
112
113		{PG_LATIN9, "ISO-8859-15"},
114		{PG_LATIN9, "ISO8859-15"},
115		{PG_LATIN9, "iso885915"},
116		{PG_LATIN9, "CP28605"},
117
118		{PG_LATIN10, "ISO-8859-16"},
119		{PG_LATIN10, "ISO8859-16"},
120		{PG_LATIN10, "iso885916"},
121
122		{PG_KOI8R, "KOI8-R"},
123		{PG_KOI8R, "CP20866"},
124
125		{PG_KOI8U, "KOI8-U"},
126		{PG_KOI8U, "CP21866"},
127
128		{PG_WIN866, "CP866"},
129		{PG_WIN874, "CP874"},
130		{PG_WIN1250, "CP1250"},
131		{PG_WIN1251, "CP1251"},
132		{PG_WIN1251, "ansi-1251"},
133		{PG_WIN1252, "CP1252"},
134		{PG_WIN1253, "CP1253"},
135		{PG_WIN1254, "CP1254"},
136		{PG_WIN1255, "CP1255"},
137		{PG_WIN1256, "CP1256"},
138		{PG_WIN1257, "CP1257"},
139		{PG_WIN1258, "CP1258"},
140
141		{PG_ISO_8859_5, "ISO-8859-5"},
142		{PG_ISO_8859_5, "ISO8859-5"},
143		{PG_ISO_8859_5, "iso88595"},
144		{PG_ISO_8859_5, "CP28595"},
145
146		{PG_ISO_8859_6, "ISO-8859-6"},
147		{PG_ISO_8859_6, "ISO8859-6"},
148		{PG_ISO_8859_6, "iso88596"},
149		{PG_ISO_8859_6, "CP28596"},
150
151		{PG_ISO_8859_7, "ISO-8859-7"},
152		{PG_ISO_8859_7, "ISO8859-7"},
153		{PG_ISO_8859_7, "iso88597"},
154		{PG_ISO_8859_7, "CP28597"},
155
156		{PG_ISO_8859_8, "ISO-8859-8"},
157		{PG_ISO_8859_8, "ISO8859-8"},
158		{PG_ISO_8859_8, "iso88598"},
159		{PG_ISO_8859_8, "CP28598"},
160
161		{PG_SJIS, "SJIS"},
162		{PG_SJIS, "PCK"},
163		{PG_SJIS, "CP932"},
164		{PG_SJIS, "SHIFT_JIS"},
165
166		{PG_BIG5, "BIG5"},
167		{PG_BIG5, "BIG5HKSCS"},
168		{PG_BIG5, "Big5-HKSCS"},
169		{PG_BIG5, "CP950"},
170
171		{PG_GBK, "GBK"},
172		{PG_GBK, "CP936"},
173
174		{PG_UHC, "UHC"},
175		{PG_UHC, "CP949"},
176
177		{PG_JOHAB, "JOHAB"},
178		{PG_JOHAB, "CP1361"},
179
180		{PG_GB18030, "GB18030"},
181		{PG_GB18030, "CP54936"},
182
183		{PG_SHIFT_JIS_2004, "SJIS_2004"},
184
185		{PG_SQL_ASCII, "US-ASCII"},
186
187		{PG_SQL_ASCII, NULL} /* end marker */
188		};
189
190		#ifdef WIN32
191		/*
192		* On Windows, use CP<code page number> instead of CODESET.
193		*
194		* This routine uses GetLocaleInfoEx() to parse short locale names like
195		* "de-DE", "fr-FR", etc. If those cannot be parsed correctly process falls
196		* back to the pre-VS-2010 manual parsing done with using
197		* <Language>_<Country>.<CodePage> as a base.
198		*
199		* Returns a malloc()'d string for the caller to free.
200		*/
201		static char *
202		win32_get_codeset(const char *ctype)
203		{
204		char *r = NULL;
205		char *codepage;
206		uint32 cp;
207		WCHAR wctype[LOCALE_NAME_MAX_LENGTH];
208
209		memset(wctype, 0, sizeof(wctype));
210		MultiByteToWideChar(CP_ACP, 0, ctype, -1, wctype, LOCALE_NAME_MAX_LENGTH);
211
212		if (GetLocaleInfoEx(wctype,
213		LOCALE_IDEFAULTANSICODEPAGE \| LOCALE_RETURN_NUMBER,
214		(LPWSTR) &cp, sizeof(cp) / sizeof(WCHAR)) > 0)
215		{
216		r = malloc(16); /* excess */
217		if (r != NULL)
218		{
219		/*
220		* If the return value is CP_ACP that means no ANSI code page is
221		* available, so only Unicode can be used for the locale.
222		*/
223		if (cp == CP_ACP)
224		strcpy(r, "utf8");
225		else
226		sprintf(r, "CP%u", cp);
227		}
228		}
229		else
230		{
231		/*
232		* Locale format on Win32 is <Language>_<Country>.<CodePage>. For
233		* example, English_United States.1252. If we see digits after the
234		* last dot, assume it's a codepage number. Otherwise, we might be
235		* dealing with a Unix-style locale string; Windows' setlocale() will
236		* take those even though GetLocaleInfoEx() won't, so we end up here.
237		* In that case, just return what's after the last dot and hope we can
238		* find it in our table.
239		*/
240		codepage = strrchr(ctype, '.');
241		if (codepage != NULL)
242		{
243		size_t ln;
244
245		codepage++;
246		ln = strlen(codepage);
247		r = malloc(ln + 3);
248		if (r != NULL)
249		{
250		if (strspn(codepage, "0123456789") == ln)
251		sprintf(r, "CP%s", codepage);
252		else
253		strcpy(r, codepage);
254		}
255		}
256		}
257
258		return r;
259		}
260
261		#ifndef FRONTEND
262		/*
263		* Given a Windows code page identifier, find the corresponding PostgreSQL
264		* encoding. Issue a warning and return -1 if none found.
265		*/
266		int
267		pg_codepage_to_encoding(UINT cp)
268		{
269		char sys[16];
270		int i;
271
272		sprintf(sys, "CP%u", cp);
273
274		/* Check the table */
275		for (i = 0; encoding_match_list[i].system_enc_name; i++)
276		if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
277		return encoding_match_list[i].pg_enc_code;
278
279		ereport(WARNING,
280		(errmsg("could not determine encoding for codeset \"%s\"", sys)));
281
282		return -1;
283		}
284		#endif
285		#endif /* WIN32 */
286
287		/*
288		* Given a setting for LC_CTYPE, return the Postgres ID of the associated
289		* encoding, if we can determine it. Return -1 if we can't determine it.
290		*
291		* Pass in NULL to get the encoding for the current locale setting.
292		* Pass "" to get the encoding selected by the server's environment.
293		*
294		* If the result is PG_SQL_ASCII, callers should treat it as being compatible
295		* with any desired encoding.
296		*
297		* If running in the backend and write_message is false, this function must
298		* cope with the possibility that elog() and palloc() are not yet usable.
299		*/
300		int
301		pg_get_encoding_from_locale(const char *ctype, bool write_message)
302	0	{
303	0	char *sys;
304	0	int i;
305
306	0	#ifndef WIN32
307	0	locale_t loc;
308	0	#endif
309
310		/* Get the CODESET property, and also LC_CTYPE if not passed in */
311	0	if (!ctype)
312	0	ctype = setlocale(LC_CTYPE, NULL);
313
314
315		/* If locale is C or POSIX, we can allow all encodings */
316	0	if (pg_strcasecmp(ctype, "C") == 0 \|\|
317	0	pg_strcasecmp(ctype, "POSIX") == 0)
318	0	return PG_SQL_ASCII;
319
320
321	0	#ifndef WIN32
322	0	loc = newlocale(LC_CTYPE_MASK, ctype, (locale_t) 0);
323	0	if (loc == (locale_t) 0)
324	0	return -1; /* bogus ctype passed in? */
325
326	0	sys = nl_langinfo_l(CODESET, loc);
327	0	if (sys)
328	0	sys = strdup(sys);
329
330	0	freelocale(loc);
331		#else
332		sys = win32_get_codeset(ctype);
333		#endif
334
335	0	if (!sys)
336	0	return -1; /* out of memory; unlikely */
337
338		/* Check the table */
339	0	for (i = 0; encoding_match_list[i].system_enc_name; i++)
340	0	{
341	0	if (pg_strcasecmp(sys, encoding_match_list[i].system_enc_name) == 0)
342	0	{
343	0	free(sys);
344	0	return encoding_match_list[i].pg_enc_code;
345	0	}
346	0	}
347
348		/* Special-case kluges for particular platforms go here */
349
350		#ifdef __darwin__
351
352		/*
353		* Current macOS has many locales that report an empty string for CODESET,
354		* but they all seem to actually use UTF-8.
355		*/
356		if (strlen(sys) == 0)
357		{
358		free(sys);
359		return PG_UTF8;
360		}
361		#endif
362
363		/*
364		* We print a warning if we got a CODESET string but couldn't recognize
365		* it. This means we need another entry in the table.
366		*/
367	0	if (write_message)
368	0	{
369		#ifdef FRONTEND
370		fprintf(stderr, _("could not determine encoding for locale \"%s\": codeset is \"%s\""),
371		ctype, sys);
372		/* keep newline separate so there's only one translatable string */
373		fputc('\n', stderr);
374		#else
375	0	ereport(WARNING,
376	0	(errmsg("could not determine encoding for locale \"%s\": codeset is \"%s\"",
377	0	ctype, sys)));
378	0	#endif
379	0	}
380
381	0	free(sys);
382	0	return -1;
383	0	}