Coverage Report

Created: 2025-11-16 06:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/glib/glib/gcharset.c
Line
Count
Source
1
/* gcharset.c - Charset information
2
 *
3
 * Copyright (C) 2011 Red Hat, Inc.
4
 *
5
 * SPDX-License-Identifier: LGPL-2.1-or-later
6
 *
7
 * This library is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * This library is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
19
 */
20
21
#include "config.h"
22
23
#include "gcharset.h"
24
#include "gcharsetprivate.h"
25
26
#include "garray.h"
27
#include "genviron.h"
28
#include "ghash.h"
29
#include "glib-private.h"
30
#include "gmessages.h"
31
#include "gstrfuncs.h"
32
#include "gthread.h"
33
#include "gthreadprivate.h"
34
#ifdef G_OS_WIN32
35
#include "gwin32.h"
36
#endif
37
38
#include "libcharset/libcharset.h"
39
40
#include <string.h>
41
#include <stdio.h>
42
43
#if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET)
44
#include <langinfo.h>
45
#endif
46
47
#include <locale.h>
48
#ifdef G_OS_WIN32
49
#define WIN32_LEAN_AND_MEAN
50
#include <windows.h>
51
#endif
52
53
G_LOCK_DEFINE_STATIC (aliases);
54
55
static GHashTable *
56
get_alias_hash (void)
57
0
{
58
0
  static GHashTable *alias_hash = NULL;
59
0
  const char *aliases;
60
61
0
  G_LOCK (aliases);
62
63
0
  if (!alias_hash)
64
0
    {
65
0
      alias_hash = g_hash_table_new (g_str_hash, g_str_equal);
66
67
0
      aliases = _g_locale_get_charset_aliases ();
68
0
      while (*aliases != '\0')
69
0
        {
70
0
          const char *canonical;
71
0
          const char *alias;
72
0
          const char **alias_array;
73
0
          int count = 0;
74
75
0
          alias = aliases;
76
0
          aliases += strlen (aliases) + 1;
77
0
          canonical = aliases;
78
0
          aliases += strlen (aliases) + 1;
79
80
0
          alias_array = g_hash_table_lookup (alias_hash, canonical);
81
0
          if (alias_array)
82
0
            {
83
0
              while (alias_array[count])
84
0
                count++;
85
0
            }
86
87
0
          alias_array = g_renew (const char *, alias_array, count + 2);
88
0
          alias_array[count] = alias;
89
0
          alias_array[count + 1] = NULL;
90
91
0
          g_hash_table_insert (alias_hash, (char *)canonical, alias_array);
92
0
        }
93
0
    }
94
95
0
  G_UNLOCK (aliases);
96
97
0
  return alias_hash;
98
0
}
99
100
/* As an abuse of the alias table, the following routines gets
101
 * the charsets that are aliases for the canonical name.
102
 */
103
const char **
104
_g_charset_get_aliases (const char *canonical_name)
105
0
{
106
0
  GHashTable *alias_hash = get_alias_hash ();
107
108
0
  return g_hash_table_lookup (alias_hash, canonical_name);
109
0
}
110
111
static gboolean
112
g_utf8_get_charset_internal (const char  *raw_data,
113
                             const char **a)
114
4
{
115
  /* Allow CHARSET to override the charset of any locale category. Users should
116
   * probably never be setting this — instead, just add the charset after a `.`
117
   * in `LANGUAGE`/`LC_ALL`/`LC_*`/`LANG`. I can’t find any reference (in
118
   * `git log`, code comments, or man pages) to this environment variable being
119
   * standardised or documented or even used anywhere outside GLib. Perhaps it
120
   * should eventually be removed. */
121
4
  const char *charset = g_getenv ("CHARSET");
122
123
4
  if (charset && *charset)
124
0
    {
125
0
      *a = charset;
126
127
0
      if (charset && strstr (charset, "UTF-8"))
128
0
        return TRUE;
129
0
      else
130
0
        return FALSE;
131
0
    }
132
133
  /* The libcharset code tries to be thread-safe without
134
   * a lock, but has a memory leak and a missing memory
135
   * barrier, so we lock for it
136
   */
137
4
  G_LOCK (aliases);
138
4
  charset = _g_locale_charset_unalias (raw_data);
139
4
  G_UNLOCK (aliases);
140
141
4
  if (charset && *charset)
142
4
    {
143
4
      *a = charset;
144
145
4
      if (charset && strstr (charset, "UTF-8"))
146
0
        return TRUE;
147
4
      else
148
4
        return FALSE;
149
4
    }
150
151
  /* Assume this for compatibility at present.  */
152
0
  *a = "US-ASCII";
153
154
0
  return FALSE;
155
4
}
156
157
typedef struct _GCharsetCache GCharsetCache;
158
159
struct _GCharsetCache {
160
  gboolean is_utf8;
161
  gchar *raw;
162
  gchar *charset;
163
};
164
165
static void
166
charset_cache_free (gpointer data)
167
0
{
168
0
  GCharsetCache *cache = data;
169
0
  g_free (cache->raw);
170
0
  g_free (cache->charset);
171
0
  g_free (cache);
172
0
}
173
174
/**
175
 * g_get_charset:
176
 * @charset: (out) (optional) (transfer none): return location for character set
177
 *   name, or %NULL.
178
 *
179
 * Obtains the character set for the [current locale](running.html#locale);
180
 * you might use this character set as an argument to g_convert(), to convert
181
 * from the current locale's encoding to some other encoding. (Frequently
182
 * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.)
183
 *
184
 * On Windows the character set returned by this function is the
185
 * so-called system default ANSI code-page. That is the character set
186
 * used by the "narrow" versions of C library and Win32 functions that
187
 * handle file names. It might be different from the character set
188
 * used by the C library's current locale.
189
 *
190
 * On Linux, the character set is found by consulting nl_langinfo() if
191
 * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG`
192
 * and `CHARSET` are queried in order. nl_langinfo() returns the C locale if
193
 * no locale has been loaded by setlocale().
194
 *
195
 * The return value is %TRUE if the locale's encoding is UTF-8, in that
196
 * case you can perhaps avoid calling g_convert().
197
 *
198
 * The string returned in @charset is not allocated, and should not be
199
 * freed.
200
 *
201
 * Returns: %TRUE if the returned charset is UTF-8
202
 */
203
gboolean
204
g_get_charset (const char **charset)
205
53.5k
{
206
53.5k
  static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
207
53.5k
  GCharsetCache *cache = g_private_get (&cache_private);
208
53.5k
  const gchar *raw;
209
210
53.5k
  if (!cache)
211
4
    cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
212
213
53.5k
  G_LOCK (aliases);
214
53.5k
  raw = _g_locale_charset_raw ();
215
53.5k
  G_UNLOCK (aliases);
216
217
53.5k
  if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
218
4
    {
219
4
      const gchar *new_charset;
220
221
4
      g_free (cache->raw);
222
4
      g_free (cache->charset);
223
4
      cache->raw = g_strdup (raw);
224
4
      cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
225
4
      cache->charset = g_strdup (new_charset);
226
4
    }
227
228
53.5k
  if (charset)
229
53.5k
    *charset = cache->charset;
230
231
53.5k
  return cache->is_utf8;
232
53.5k
}
233
234
/*
235
 * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
236
 * LC_TIME) to correctly check for charset about time conversion relatives.
237
 *
238
 * Returns: %TRUE if the returned charset is UTF-8
239
 */
240
gboolean
241
_g_get_time_charset (const char **charset)
242
0
{
243
0
  static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
244
0
  GCharsetCache *cache = g_private_get (&cache_private);
245
0
  const gchar *raw;
246
247
0
  if (!cache)
248
0
    cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
249
250
0
#ifdef HAVE_LANGINFO_TIME_CODESET
251
0
  raw = nl_langinfo (_NL_TIME_CODESET);
252
#else
253
  G_LOCK (aliases);
254
  raw = _g_locale_charset_raw ();
255
  G_UNLOCK (aliases);
256
#endif
257
258
0
  if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
259
0
    {
260
0
      const gchar *new_charset;
261
262
0
      g_free (cache->raw);
263
0
      g_free (cache->charset);
264
0
      cache->raw = g_strdup (raw);
265
0
      cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
266
0
      cache->charset = g_strdup (new_charset);
267
0
    }
268
269
0
  if (charset)
270
0
    *charset = cache->charset;
271
272
0
  return cache->is_utf8;
273
0
}
274
/*
275
 * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to
276
 * LC_CTYPE) to correctly check for charset about CTYPE conversion relatives.
277
 *
278
 * Returns: %TRUE if the returned charset is UTF-8
279
 */
280
gboolean
281
_g_get_ctype_charset (const char **charset)
282
0
{
283
0
  static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
284
0
  GCharsetCache *cache = g_private_get (&cache_private);
285
0
  const gchar *raw;
286
287
0
  if (!cache)
288
0
    cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
289
290
0
#ifdef HAVE_LANGINFO_CODESET
291
0
  raw = nl_langinfo (CODESET);
292
#else
293
  G_LOCK (aliases);
294
  raw = _g_locale_charset_raw ();
295
  G_UNLOCK (aliases);
296
#endif
297
298
0
  if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
299
0
    {
300
0
      const gchar *new_charset;
301
302
0
      g_free (cache->raw);
303
0
      g_free (cache->charset);
304
0
      cache->raw = g_strdup (raw);
305
0
      cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
306
0
      cache->charset = g_strdup (new_charset);
307
0
    }
308
309
0
  if (charset)
310
0
    *charset = cache->charset;
311
312
0
  return cache->is_utf8;
313
0
}
314
315
/**
316
 * g_get_codeset:
317
 *
318
 * Gets the character set for the current locale.
319
 *
320
 * Returns: a newly allocated string containing the name
321
 *     of the character set. This string must be freed with g_free().
322
 */
323
gchar *
324
g_get_codeset (void)
325
0
{
326
0
  const gchar *charset;
327
328
0
  g_get_charset (&charset);
329
330
0
  return g_strdup (charset);
331
0
}
332
333
/**
334
 * g_get_console_charset:
335
 * @charset: (out) (optional) (transfer none): return location for character set
336
 *   name, or %NULL.
337
 *
338
 * Obtains the character set used by the console attached to the process,
339
 * which is suitable for printing output to the terminal.
340
 *
341
 * Usually this matches the result returned by g_get_charset(), but in
342
 * environments where the locale's character set does not match the encoding
343
 * of the console this function tries to guess a more suitable value instead.
344
 *
345
 * On Windows the character set returned by this function is the
346
 * output code page used by the console associated with the calling process.
347
 * If the codepage can't be determined (for example because there is no
348
 * console attached) UTF-8 is assumed.
349
 *
350
 * The return value is %TRUE if the locale's encoding is UTF-8, in that
351
 * case you can perhaps avoid calling g_convert().
352
 *
353
 * The string returned in @charset is not allocated, and should not be
354
 * freed.
355
 *
356
 * Returns: %TRUE if the returned charset is UTF-8
357
 *
358
 * Since: 2.62
359
 */
360
gboolean
361
g_get_console_charset (const char **charset)
362
4
{
363
#ifdef G_OS_WIN32
364
  static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free);
365
  GCharsetCache *cache = g_private_get (&cache_private);
366
  const gchar *locale;
367
  unsigned int cp;
368
  char buf[2 + 20 + 1]; /* "CP" + G_MAXUINT64 (to be safe) in decimal form (20 bytes) + "\0" */
369
  const gchar *raw = NULL;
370
371
  if (!cache)
372
    cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache));
373
374
  /* first try to query $LANG (works for Cygwin/MSYS/MSYS2 and others using mintty) */
375
  locale = g_getenv ("LANG");
376
  if (locale != NULL && locale[0] != '\0')
377
    {
378
      /* If the locale name contains an encoding after the dot, return it.  */
379
      const char *dot = strchr (locale, '.');
380
381
      if (dot != NULL)
382
        {
383
          const char *modifier;
384
385
          dot++;
386
          /* Look for the possible @... trailer and remove it, if any.  */
387
          modifier = strchr (dot, '@');
388
          if (modifier == NULL)
389
            raw = dot;
390
          else if ((gsize) (modifier - dot) < sizeof (buf))
391
            {
392
              memcpy (buf, dot, modifier - dot);
393
              buf[modifier - dot] = '\0';
394
              raw = buf;
395
            }
396
        }
397
    }
398
  /* next try querying console codepage using native win32 API */
399
  if (raw == NULL)
400
    {
401
      cp = GetConsoleOutputCP ();
402
      if (cp)
403
        {
404
          sprintf (buf, "CP%u", cp);
405
          raw = buf;
406
        }
407
      else if (GetLastError () != ERROR_INVALID_HANDLE)
408
        {
409
          gchar *emsg = g_win32_error_message (GetLastError ());
410
          g_warning ("Failed to determine console output code page: %s. "
411
                     "Falling back to UTF-8", emsg);
412
          g_free (emsg);
413
        }
414
    }
415
  /* fall-back to UTF-8 if the rest failed (it's a universal default) */
416
  if (raw == NULL)
417
    raw = "UTF-8";
418
419
  if (cache->raw == NULL || strcmp (cache->raw, raw) != 0)
420
    {
421
      const gchar *new_charset;
422
423
      g_free (cache->raw);
424
      g_free (cache->charset);
425
      cache->raw = g_strdup (raw);
426
      cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset);
427
      cache->charset = g_strdup (new_charset);
428
    }
429
430
  if (charset)
431
    *charset = cache->charset;
432
433
  return cache->is_utf8;
434
#else
435
  /* assume the locale settings match the console encoding on non-Windows OSs */
436
4
  return g_get_charset (charset);
437
4
#endif
438
4
}
439
440
#ifndef G_OS_WIN32
441
442
/* read an alias file for the locales */
443
static void
444
read_aliases (const gchar *file,
445
              GHashTable  *alias_table)
446
0
{
447
0
  FILE *fp;
448
0
  char buf[256];
449
450
0
  fp = fopen (file, "re");
451
0
  if (!fp)
452
0
    return;
453
0
  while (fgets (buf, 256, fp))
454
0
    {
455
0
      char *p, *q;
456
457
0
      g_strstrip (buf);
458
459
      /* Line is a comment */
460
0
      if ((buf[0] == '#') || (buf[0] == '\0'))
461
0
        continue;
462
463
      /* Reads first column */
464
0
      for (p = buf, q = NULL; *p; p++) {
465
0
        if ((*p == '\t') || (*p == ' ') || (*p == ':')) {
466
0
          *p = '\0';
467
0
          q = p+1;
468
0
          while ((*q == '\t') || (*q == ' ')) {
469
0
            q++;
470
0
          }
471
0
          break;
472
0
        }
473
0
      }
474
      /* The line only had one column */
475
0
      if (!q || *q == '\0')
476
0
        continue;
477
478
      /* Read second column */
479
0
      for (p = q; *p; p++) {
480
0
        if ((*p == '\t') || (*p == ' ')) {
481
0
          *p = '\0';
482
0
          break;
483
0
        }
484
0
      }
485
486
      /* Add to alias table if necessary */
487
0
      if (!g_hash_table_lookup (alias_table, buf)) {
488
0
        g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q));
489
0
      }
490
0
    }
491
0
  fclose (fp);
492
0
}
493
494
#endif
495
496
static char *
497
unalias_lang (char *lang)
498
0
{
499
0
#ifndef G_OS_WIN32
500
0
  static GHashTable *alias_table = NULL;
501
0
  char *p;
502
0
  int i;
503
504
0
  if (g_once_init_enter_pointer (&alias_table))
505
0
    {
506
0
      GHashTable *table = g_hash_table_new (g_str_hash, g_str_equal);
507
0
      read_aliases ("/usr/share/locale/locale.alias", table);
508
0
      g_once_init_leave_pointer (&alias_table, table);
509
0
    }
510
511
0
  i = 0;
512
0
  while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0))
513
0
    {
514
0
      lang = p;
515
0
      if (i++ == 30)
516
0
        {
517
0
          static gboolean said_before = FALSE;
518
0
          if (!said_before)
519
0
            g_warning ("Too many alias levels for a locale, "
520
0
                       "may indicate a loop");
521
0
          said_before = TRUE;
522
0
          return lang;
523
0
        }
524
0
    }
525
0
#endif
526
0
  return lang;
527
0
}
528
529
/* Mask for components of locale spec. The ordering here is from
530
 * least significant to most significant
531
 */
532
enum
533
{
534
  COMPONENT_CODESET =   1 << 0,
535
  COMPONENT_TERRITORY = 1 << 1,
536
  COMPONENT_MODIFIER =  1 << 2
537
};
538
539
/* Break an X/Open style locale specification into components
540
 * e.g. `en_GB` or `uz_UZ.utf8@cyrillic`
541
 */
542
static guint
543
explode_locale (const gchar *locale,
544
                gchar      **language,
545
                gchar      **territory,
546
                gchar      **codeset,
547
                gchar      **modifier)
548
0
{
549
0
  const gchar *uscore_pos;
550
0
  const gchar *at_pos;
551
0
  const gchar *dot_pos;
552
553
0
  guint mask = 0;
554
555
0
  uscore_pos = strchr (locale, '_');
556
0
  dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.');
557
0
  at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@');
558
559
0
  if (at_pos)
560
0
    {
561
0
      mask |= COMPONENT_MODIFIER;
562
0
      *modifier = g_strdup (at_pos);
563
0
    }
564
0
  else
565
0
    at_pos = locale + strlen (locale);
566
567
0
  if (dot_pos && dot_pos < at_pos)
568
0
    {
569
0
      mask |= COMPONENT_CODESET;
570
0
      *codeset = g_strndup (dot_pos, at_pos - dot_pos);
571
0
    }
572
0
  else
573
0
    dot_pos = at_pos;
574
575
0
  if (uscore_pos && uscore_pos < dot_pos)
576
0
    {
577
0
      mask |= COMPONENT_TERRITORY;
578
0
      *territory = g_strndup (uscore_pos, dot_pos - uscore_pos);
579
0
    }
580
0
  else
581
0
    uscore_pos = dot_pos;
582
583
0
  g_assert (uscore_pos >= locale);
584
0
  *language = g_strndup (locale, uscore_pos - locale);
585
586
0
  return mask;
587
0
}
588
589
/*
590
 * Compute all interesting variants for a given locale name -
591
 * by stripping off different components of the value.
592
 *
593
 * For simplicity, we assume that the locale is in
594
 * X/Open format: language[_territory][.codeset][@modifier]
595
 *
596
 * TODO: Extend this to handle the CEN format (see the GNUlibc docs)
597
 *       as well. We could just copy the code from glibc wholesale
598
 *       but it is big, ugly, and complicated, so I'm reluctant
599
 *       to do so when this should handle 99% of the time...
600
 */
601
static void
602
append_locale_variants (GPtrArray *array,
603
                        const gchar *locale)
604
0
{
605
0
  gchar *language = NULL;
606
0
  gchar *territory = NULL;
607
0
  gchar *codeset = NULL;
608
0
  gchar *modifier = NULL;
609
610
0
  guint mask;
611
0
  guint i, j;
612
613
0
  g_return_if_fail (locale != NULL);
614
615
0
  mask = explode_locale (locale, &language, &territory, &codeset, &modifier);
616
617
  /* Iterate through all possible combinations, from least attractive
618
   * to most attractive.
619
   */
620
0
  for (j = 0; j <= mask; ++j)
621
0
    {
622
0
      i = mask - j;
623
624
0
      if ((i & ~mask) == 0)
625
0
        {
626
0
          gchar *val = g_strconcat (language,
627
0
                                    (i & COMPONENT_TERRITORY) ? territory : "",
628
0
                                    (i & COMPONENT_CODESET) ? codeset : "",
629
0
                                    (i & COMPONENT_MODIFIER) ? modifier : "",
630
0
                                    NULL);
631
0
          g_ptr_array_add (array, val);
632
0
        }
633
0
    }
634
635
0
  g_free (language);
636
0
  if (mask & COMPONENT_CODESET)
637
0
    g_free (codeset);
638
0
  if (mask & COMPONENT_TERRITORY)
639
0
    g_free (territory);
640
0
  if (mask & COMPONENT_MODIFIER)
641
0
    g_free (modifier);
642
0
}
643
644
/**
645
 * g_get_locale_variants:
646
 * @locale: a locale identifier
647
 *
648
 * Returns a list of derived variants of @locale, which can be used to
649
 * e.g. construct locale-dependent filenames or search paths. The returned
650
 * list is sorted from most desirable to least desirable.
651
 * This function handles territory, charset and extra locale modifiers. See
652
 * [`setlocale(3)`](man:setlocale) for information about locales and their format.
653
 *
654
 * @locale itself is guaranteed to be returned in the output.
655
 *
656
 * For example, if @locale is `fr_BE`, then the returned list
657
 * is `fr_BE`, `fr`. If @locale is `en_GB.UTF-8@euro`, then the returned list
658
 * is `en_GB.UTF-8@euro`, `en_GB.UTF-8`, `en_GB@euro`, `en_GB`, `en.UTF-8@euro`,
659
 * `en.UTF-8`, `en@euro`, `en`.
660
 *
661
 * If you need the list of variants for the current locale,
662
 * use g_get_language_names().
663
 *
664
 * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly
665
 *   allocated array of newly allocated strings with the locale variants. Free with
666
 *   g_strfreev().
667
 *
668
 * Since: 2.28
669
 */
670
gchar **
671
g_get_locale_variants (const gchar *locale)
672
0
{
673
0
  GPtrArray *array;
674
675
0
  g_return_val_if_fail (locale != NULL, NULL);
676
677
0
  array = g_ptr_array_sized_new (8);
678
0
  append_locale_variants (array, locale);
679
0
  g_ptr_array_add (array, NULL);
680
681
0
  return (gchar **) g_ptr_array_free (array, FALSE);
682
0
}
683
684
/* The following is (partly) taken from the gettext package.
685
   Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc.  */
686
687
static const gchar *
688
guess_category_value (const gchar *category_name)
689
0
{
690
0
  const gchar *retval;
691
692
  /* The highest priority value is the 'LANGUAGE' environment
693
     variable.  This is a GNU extension.  */
694
0
  retval = g_getenv ("LANGUAGE");
695
0
  if ((retval != NULL) && (retval[0] != '\0'))
696
0
    return retval;
697
698
  /* 'LANGUAGE' is not set.  So we have to proceed with the POSIX
699
     methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'.  On some
700
     systems this can be done by the 'setlocale' function itself.  */
701
702
  /* Setting of LC_ALL overwrites all other.  */
703
0
  retval = g_getenv ("LC_ALL");
704
0
  if ((retval != NULL) && (retval[0] != '\0'))
705
0
    return retval;
706
707
  /* Next comes the name of the desired category.  */
708
0
  retval = g_getenv (category_name);
709
0
  if ((retval != NULL) && (retval[0] != '\0'))
710
0
    return retval;
711
712
  /* Last possibility is the LANG environment variable.  */
713
0
  retval = g_getenv ("LANG");
714
0
  if ((retval != NULL) && (retval[0] != '\0'))
715
0
    return retval;
716
717
#ifdef G_PLATFORM_WIN32
718
  /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and
719
   * LANG, which we already did above. Oh well. The main point of
720
   * calling g_win32_getlocale() is to get the thread's locale as used
721
   * by Windows and the Microsoft C runtime (in the "English_United
722
   * States" format) translated into the Unixish format.
723
   */
724
  {
725
    char *locale = g_win32_getlocale ();
726
    retval = g_intern_string (locale);
727
    g_free (locale);
728
    return retval;
729
  }
730
#endif
731
732
0
  return NULL;
733
0
}
734
735
typedef struct _GLanguageNamesCache GLanguageNamesCache;
736
737
struct _GLanguageNamesCache {
738
  gchar *languages;
739
  gchar **language_names;
740
};
741
742
static void
743
language_names_cache_free (gpointer data)
744
0
{
745
0
  GLanguageNamesCache *cache = data;
746
0
  g_free (cache->languages);
747
0
  g_strfreev (cache->language_names);
748
0
  g_free (cache);
749
0
}
750
751
/**
752
 * g_get_language_names:
753
 *
754
 * Computes a list of applicable locale names, which can be used to
755
 * e.g. construct locale-dependent filenames or search paths. The returned
756
 * list is sorted from most desirable to least desirable and always contains
757
 * the default locale "C".
758
 *
759
 * For example, if LANGUAGE=de:en_US, then the returned list is
760
 * "de", "en_US", "en", "C".
761
 *
762
 * This function consults the environment variables `LANGUAGE`, `LC_ALL`,
763
 * `LC_MESSAGES` and `LANG` to find the list of locales specified by the
764
 * user.
765
 *
766
 * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib
767
 *    that must not be modified or freed.
768
 *
769
 * Since: 2.6
770
 */
771
const gchar * const *
772
g_get_language_names (void)
773
0
{
774
0
  return g_get_language_names_with_category ("LC_MESSAGES");
775
0
}
776
777
/**
778
 * g_get_language_names_with_category:
779
 * @category_name: a locale category name
780
 *
781
 * Computes a list of applicable locale names with a locale category name,
782
 * which can be used to construct the fallback locale-dependent filenames
783
 * or search paths. The returned list is sorted from most desirable to
784
 * least desirable and always contains the default locale "C".
785
 *
786
 * This function consults the environment variables `LANGUAGE`, `LC_ALL`,
787
 * @category_name, and `LANG` to find the list of locales specified by the
788
 * user.
789
 *
790
 * g_get_language_names() returns g_get_language_names_with_category("LC_MESSAGES").
791
 *
792
 * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by
793
 *    the thread g_get_language_names_with_category was called from.
794
 *    It must not be modified or freed. It must be copied if planned to be used in another thread.
795
 *
796
 * Since: 2.58
797
 */
798
const gchar * const *
799
g_get_language_names_with_category (const gchar *category_name)
800
0
{
801
0
  static GPrivate cache_private = G_PRIVATE_INIT ((void (*)(gpointer)) g_hash_table_unref);
802
0
  GHashTable *cache = g_private_get (&cache_private);
803
0
  const gchar *languages;
804
0
  GLanguageNamesCache *name_cache;
805
806
0
  g_return_val_if_fail (category_name != NULL, NULL);
807
808
0
  if (!cache)
809
0
    {
810
0
      cache = g_hash_table_new_full (g_str_hash, g_str_equal,
811
0
                                     g_free, language_names_cache_free);
812
0
      g_private_set (&cache_private, cache);
813
0
      g_ignore_leak (cache);
814
0
    }
815
816
0
  languages = guess_category_value (category_name);
817
0
  if (!languages)
818
0
    languages = "C";
819
820
0
  name_cache = (GLanguageNamesCache *) g_hash_table_lookup (cache, category_name);
821
0
  if (!(name_cache && name_cache->languages &&
822
0
        strcmp (name_cache->languages, languages) == 0))
823
0
    {
824
0
      GPtrArray *array;
825
0
      gchar **alist, **a;
826
827
0
      g_hash_table_remove (cache, category_name);
828
829
0
      array = g_ptr_array_sized_new (8);
830
831
0
      alist = g_strsplit (languages, ":", 0);
832
0
      for (a = alist; *a; a++)
833
0
        append_locale_variants (array, unalias_lang (*a));
834
0
      g_strfreev (alist);
835
0
      g_ptr_array_add (array, g_strdup ("C"));
836
0
      g_ptr_array_add (array, NULL);
837
838
0
      name_cache = g_new0 (GLanguageNamesCache, 1);
839
0
      name_cache->languages = g_strdup (languages);
840
0
      name_cache->language_names = (gchar **) g_ptr_array_free (array, FALSE);
841
0
      g_hash_table_insert (cache, g_strdup (category_name), name_cache);
842
0
      g_ignore_leak (name_cache);
843
0
    }
844
845
0
  return (const gchar * const *) name_cache->language_names;
846
0
}