/src/glib/glib/gcharset.c
Line | Count | Source |
1 | | /* gcharset.c - Charset information |
2 | | * |
3 | | * Copyright (C) 2011 Red Hat, Inc. |
4 | | * |
5 | | * SPDX-License-Identifier: LGPL-2.1-or-later |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * This library is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | |
23 | | #include "gcharset.h" |
24 | | #include "gcharsetprivate.h" |
25 | | |
26 | | #include "garray.h" |
27 | | #include "genviron.h" |
28 | | #include "ghash.h" |
29 | | #include "glib-private.h" |
30 | | #include "gmessages.h" |
31 | | #include "gstrfuncs.h" |
32 | | #include "gthread.h" |
33 | | #include "gthreadprivate.h" |
34 | | #ifdef G_OS_WIN32 |
35 | | #include "gwin32.h" |
36 | | #endif |
37 | | |
38 | | #include "libcharset/libcharset.h" |
39 | | |
40 | | #include <string.h> |
41 | | #include <stdio.h> |
42 | | |
43 | | #if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET) |
44 | | #include <langinfo.h> |
45 | | #endif |
46 | | |
47 | | #include <locale.h> |
48 | | #ifdef G_OS_WIN32 |
49 | | #define WIN32_LEAN_AND_MEAN |
50 | | #include <windows.h> |
51 | | #endif |
52 | | |
53 | | G_LOCK_DEFINE_STATIC (aliases); |
54 | | |
55 | | static GHashTable * |
56 | | get_alias_hash (void) |
57 | 0 | { |
58 | 0 | static GHashTable *alias_hash = NULL; |
59 | 0 | const char *aliases; |
60 | |
|
61 | 0 | G_LOCK (aliases); |
62 | |
|
63 | 0 | if (!alias_hash) |
64 | 0 | { |
65 | 0 | alias_hash = g_hash_table_new (g_str_hash, g_str_equal); |
66 | |
|
67 | 0 | aliases = _g_locale_get_charset_aliases (); |
68 | 0 | while (*aliases != '\0') |
69 | 0 | { |
70 | 0 | const char *canonical; |
71 | 0 | const char *alias; |
72 | 0 | const char **alias_array; |
73 | 0 | int count = 0; |
74 | |
|
75 | 0 | alias = aliases; |
76 | 0 | aliases += strlen (aliases) + 1; |
77 | 0 | canonical = aliases; |
78 | 0 | aliases += strlen (aliases) + 1; |
79 | |
|
80 | 0 | alias_array = g_hash_table_lookup (alias_hash, canonical); |
81 | 0 | if (alias_array) |
82 | 0 | { |
83 | 0 | while (alias_array[count]) |
84 | 0 | count++; |
85 | 0 | } |
86 | |
|
87 | 0 | alias_array = g_renew (const char *, alias_array, count + 2); |
88 | 0 | alias_array[count] = alias; |
89 | 0 | alias_array[count + 1] = NULL; |
90 | |
|
91 | 0 | g_hash_table_insert (alias_hash, (char *)canonical, alias_array); |
92 | 0 | } |
93 | 0 | } |
94 | |
|
95 | 0 | G_UNLOCK (aliases); |
96 | |
|
97 | 0 | return alias_hash; |
98 | 0 | } |
99 | | |
100 | | /* As an abuse of the alias table, the following routines gets |
101 | | * the charsets that are aliases for the canonical name. |
102 | | */ |
103 | | const char ** |
104 | | _g_charset_get_aliases (const char *canonical_name) |
105 | 0 | { |
106 | 0 | GHashTable *alias_hash = get_alias_hash (); |
107 | |
|
108 | 0 | return g_hash_table_lookup (alias_hash, canonical_name); |
109 | 0 | } |
110 | | |
111 | | static gboolean |
112 | | g_utf8_get_charset_internal (const char *raw_data, |
113 | | const char **a) |
114 | 4 | { |
115 | | /* Allow CHARSET to override the charset of any locale category. Users should |
116 | | * probably never be setting this — instead, just add the charset after a `.` |
117 | | * in `LANGUAGE`/`LC_ALL`/`LC_*`/`LANG`. I can’t find any reference (in |
118 | | * `git log`, code comments, or man pages) to this environment variable being |
119 | | * standardised or documented or even used anywhere outside GLib. Perhaps it |
120 | | * should eventually be removed. */ |
121 | 4 | const char *charset = g_getenv ("CHARSET"); |
122 | | |
123 | 4 | if (charset && *charset) |
124 | 0 | { |
125 | 0 | *a = charset; |
126 | |
|
127 | 0 | if (charset && strstr (charset, "UTF-8")) |
128 | 0 | return TRUE; |
129 | 0 | else |
130 | 0 | return FALSE; |
131 | 0 | } |
132 | | |
133 | | /* The libcharset code tries to be thread-safe without |
134 | | * a lock, but has a memory leak and a missing memory |
135 | | * barrier, so we lock for it |
136 | | */ |
137 | 4 | G_LOCK (aliases); |
138 | 4 | charset = _g_locale_charset_unalias (raw_data); |
139 | 4 | G_UNLOCK (aliases); |
140 | | |
141 | 4 | if (charset && *charset) |
142 | 4 | { |
143 | 4 | *a = charset; |
144 | | |
145 | 4 | if (charset && strstr (charset, "UTF-8")) |
146 | 0 | return TRUE; |
147 | 4 | else |
148 | 4 | return FALSE; |
149 | 4 | } |
150 | | |
151 | | /* Assume this for compatibility at present. */ |
152 | 0 | *a = "US-ASCII"; |
153 | |
|
154 | 0 | return FALSE; |
155 | 4 | } |
156 | | |
157 | | typedef struct _GCharsetCache GCharsetCache; |
158 | | |
159 | | struct _GCharsetCache { |
160 | | gboolean is_utf8; |
161 | | gchar *raw; |
162 | | gchar *charset; |
163 | | }; |
164 | | |
165 | | static void |
166 | | charset_cache_free (gpointer data) |
167 | 0 | { |
168 | 0 | GCharsetCache *cache = data; |
169 | 0 | g_free (cache->raw); |
170 | 0 | g_free (cache->charset); |
171 | 0 | g_free (cache); |
172 | 0 | } |
173 | | |
174 | | /** |
175 | | * g_get_charset: |
176 | | * @charset: (out) (optional) (transfer none): return location for character set |
177 | | * name, or %NULL. |
178 | | * |
179 | | * Obtains the character set for the [current locale](running.html#locale); |
180 | | * you might use this character set as an argument to g_convert(), to convert |
181 | | * from the current locale's encoding to some other encoding. (Frequently |
182 | | * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) |
183 | | * |
184 | | * On Windows the character set returned by this function is the |
185 | | * so-called system default ANSI code-page. That is the character set |
186 | | * used by the "narrow" versions of C library and Win32 functions that |
187 | | * handle file names. It might be different from the character set |
188 | | * used by the C library's current locale. |
189 | | * |
190 | | * On Linux, the character set is found by consulting nl_langinfo() if |
191 | | * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG` |
192 | | * and `CHARSET` are queried in order. nl_langinfo() returns the C locale if |
193 | | * no locale has been loaded by setlocale(). |
194 | | * |
195 | | * The return value is %TRUE if the locale's encoding is UTF-8, in that |
196 | | * case you can perhaps avoid calling g_convert(). |
197 | | * |
198 | | * The string returned in @charset is not allocated, and should not be |
199 | | * freed. |
200 | | * |
201 | | * Returns: %TRUE if the returned charset is UTF-8 |
202 | | */ |
203 | | gboolean |
204 | | g_get_charset (const char **charset) |
205 | 53.5k | { |
206 | 53.5k | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
207 | 53.5k | GCharsetCache *cache = g_private_get (&cache_private); |
208 | 53.5k | const gchar *raw; |
209 | | |
210 | 53.5k | if (!cache) |
211 | 4 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
212 | | |
213 | 53.5k | G_LOCK (aliases); |
214 | 53.5k | raw = _g_locale_charset_raw (); |
215 | 53.5k | G_UNLOCK (aliases); |
216 | | |
217 | 53.5k | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
218 | 4 | { |
219 | 4 | const gchar *new_charset; |
220 | | |
221 | 4 | g_free (cache->raw); |
222 | 4 | g_free (cache->charset); |
223 | 4 | cache->raw = g_strdup (raw); |
224 | 4 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
225 | 4 | cache->charset = g_strdup (new_charset); |
226 | 4 | } |
227 | | |
228 | 53.5k | if (charset) |
229 | 53.5k | *charset = cache->charset; |
230 | | |
231 | 53.5k | return cache->is_utf8; |
232 | 53.5k | } |
233 | | |
234 | | /* |
235 | | * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to |
236 | | * LC_TIME) to correctly check for charset about time conversion relatives. |
237 | | * |
238 | | * Returns: %TRUE if the returned charset is UTF-8 |
239 | | */ |
240 | | gboolean |
241 | | _g_get_time_charset (const char **charset) |
242 | 0 | { |
243 | 0 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
244 | 0 | GCharsetCache *cache = g_private_get (&cache_private); |
245 | 0 | const gchar *raw; |
246 | |
|
247 | 0 | if (!cache) |
248 | 0 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
249 | |
|
250 | 0 | #ifdef HAVE_LANGINFO_TIME_CODESET |
251 | 0 | raw = nl_langinfo (_NL_TIME_CODESET); |
252 | | #else |
253 | | G_LOCK (aliases); |
254 | | raw = _g_locale_charset_raw (); |
255 | | G_UNLOCK (aliases); |
256 | | #endif |
257 | |
|
258 | 0 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
259 | 0 | { |
260 | 0 | const gchar *new_charset; |
261 | |
|
262 | 0 | g_free (cache->raw); |
263 | 0 | g_free (cache->charset); |
264 | 0 | cache->raw = g_strdup (raw); |
265 | 0 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
266 | 0 | cache->charset = g_strdup (new_charset); |
267 | 0 | } |
268 | |
|
269 | 0 | if (charset) |
270 | 0 | *charset = cache->charset; |
271 | |
|
272 | 0 | return cache->is_utf8; |
273 | 0 | } |
274 | | /* |
275 | | * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to |
276 | | * LC_CTYPE) to correctly check for charset about CTYPE conversion relatives. |
277 | | * |
278 | | * Returns: %TRUE if the returned charset is UTF-8 |
279 | | */ |
280 | | gboolean |
281 | | _g_get_ctype_charset (const char **charset) |
282 | 0 | { |
283 | 0 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
284 | 0 | GCharsetCache *cache = g_private_get (&cache_private); |
285 | 0 | const gchar *raw; |
286 | |
|
287 | 0 | if (!cache) |
288 | 0 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
289 | |
|
290 | 0 | #ifdef HAVE_LANGINFO_CODESET |
291 | 0 | raw = nl_langinfo (CODESET); |
292 | | #else |
293 | | G_LOCK (aliases); |
294 | | raw = _g_locale_charset_raw (); |
295 | | G_UNLOCK (aliases); |
296 | | #endif |
297 | |
|
298 | 0 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
299 | 0 | { |
300 | 0 | const gchar *new_charset; |
301 | |
|
302 | 0 | g_free (cache->raw); |
303 | 0 | g_free (cache->charset); |
304 | 0 | cache->raw = g_strdup (raw); |
305 | 0 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
306 | 0 | cache->charset = g_strdup (new_charset); |
307 | 0 | } |
308 | |
|
309 | 0 | if (charset) |
310 | 0 | *charset = cache->charset; |
311 | |
|
312 | 0 | return cache->is_utf8; |
313 | 0 | } |
314 | | |
315 | | /** |
316 | | * g_get_codeset: |
317 | | * |
318 | | * Gets the character set for the current locale. |
319 | | * |
320 | | * Returns: a newly allocated string containing the name |
321 | | * of the character set. This string must be freed with g_free(). |
322 | | */ |
323 | | gchar * |
324 | | g_get_codeset (void) |
325 | 0 | { |
326 | 0 | const gchar *charset; |
327 | |
|
328 | 0 | g_get_charset (&charset); |
329 | |
|
330 | 0 | return g_strdup (charset); |
331 | 0 | } |
332 | | |
333 | | /** |
334 | | * g_get_console_charset: |
335 | | * @charset: (out) (optional) (transfer none): return location for character set |
336 | | * name, or %NULL. |
337 | | * |
338 | | * Obtains the character set used by the console attached to the process, |
339 | | * which is suitable for printing output to the terminal. |
340 | | * |
341 | | * Usually this matches the result returned by g_get_charset(), but in |
342 | | * environments where the locale's character set does not match the encoding |
343 | | * of the console this function tries to guess a more suitable value instead. |
344 | | * |
345 | | * On Windows the character set returned by this function is the |
346 | | * output code page used by the console associated with the calling process. |
347 | | * If the codepage can't be determined (for example because there is no |
348 | | * console attached) UTF-8 is assumed. |
349 | | * |
350 | | * The return value is %TRUE if the locale's encoding is UTF-8, in that |
351 | | * case you can perhaps avoid calling g_convert(). |
352 | | * |
353 | | * The string returned in @charset is not allocated, and should not be |
354 | | * freed. |
355 | | * |
356 | | * Returns: %TRUE if the returned charset is UTF-8 |
357 | | * |
358 | | * Since: 2.62 |
359 | | */ |
360 | | gboolean |
361 | | g_get_console_charset (const char **charset) |
362 | 4 | { |
363 | | #ifdef G_OS_WIN32 |
364 | | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
365 | | GCharsetCache *cache = g_private_get (&cache_private); |
366 | | const gchar *locale; |
367 | | unsigned int cp; |
368 | | char buf[2 + 20 + 1]; /* "CP" + G_MAXUINT64 (to be safe) in decimal form (20 bytes) + "\0" */ |
369 | | const gchar *raw = NULL; |
370 | | |
371 | | if (!cache) |
372 | | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
373 | | |
374 | | /* first try to query $LANG (works for Cygwin/MSYS/MSYS2 and others using mintty) */ |
375 | | locale = g_getenv ("LANG"); |
376 | | if (locale != NULL && locale[0] != '\0') |
377 | | { |
378 | | /* If the locale name contains an encoding after the dot, return it. */ |
379 | | const char *dot = strchr (locale, '.'); |
380 | | |
381 | | if (dot != NULL) |
382 | | { |
383 | | const char *modifier; |
384 | | |
385 | | dot++; |
386 | | /* Look for the possible @... trailer and remove it, if any. */ |
387 | | modifier = strchr (dot, '@'); |
388 | | if (modifier == NULL) |
389 | | raw = dot; |
390 | | else if ((gsize) (modifier - dot) < sizeof (buf)) |
391 | | { |
392 | | memcpy (buf, dot, modifier - dot); |
393 | | buf[modifier - dot] = '\0'; |
394 | | raw = buf; |
395 | | } |
396 | | } |
397 | | } |
398 | | /* next try querying console codepage using native win32 API */ |
399 | | if (raw == NULL) |
400 | | { |
401 | | cp = GetConsoleOutputCP (); |
402 | | if (cp) |
403 | | { |
404 | | sprintf (buf, "CP%u", cp); |
405 | | raw = buf; |
406 | | } |
407 | | else if (GetLastError () != ERROR_INVALID_HANDLE) |
408 | | { |
409 | | gchar *emsg = g_win32_error_message (GetLastError ()); |
410 | | g_warning ("Failed to determine console output code page: %s. " |
411 | | "Falling back to UTF-8", emsg); |
412 | | g_free (emsg); |
413 | | } |
414 | | } |
415 | | /* fall-back to UTF-8 if the rest failed (it's a universal default) */ |
416 | | if (raw == NULL) |
417 | | raw = "UTF-8"; |
418 | | |
419 | | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
420 | | { |
421 | | const gchar *new_charset; |
422 | | |
423 | | g_free (cache->raw); |
424 | | g_free (cache->charset); |
425 | | cache->raw = g_strdup (raw); |
426 | | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
427 | | cache->charset = g_strdup (new_charset); |
428 | | } |
429 | | |
430 | | if (charset) |
431 | | *charset = cache->charset; |
432 | | |
433 | | return cache->is_utf8; |
434 | | #else |
435 | | /* assume the locale settings match the console encoding on non-Windows OSs */ |
436 | 4 | return g_get_charset (charset); |
437 | 4 | #endif |
438 | 4 | } |
439 | | |
440 | | #ifndef G_OS_WIN32 |
441 | | |
442 | | /* read an alias file for the locales */ |
443 | | static void |
444 | | read_aliases (const gchar *file, |
445 | | GHashTable *alias_table) |
446 | 0 | { |
447 | 0 | FILE *fp; |
448 | 0 | char buf[256]; |
449 | |
|
450 | 0 | fp = fopen (file, "re"); |
451 | 0 | if (!fp) |
452 | 0 | return; |
453 | 0 | while (fgets (buf, 256, fp)) |
454 | 0 | { |
455 | 0 | char *p, *q; |
456 | |
|
457 | 0 | g_strstrip (buf); |
458 | | |
459 | | /* Line is a comment */ |
460 | 0 | if ((buf[0] == '#') || (buf[0] == '\0')) |
461 | 0 | continue; |
462 | | |
463 | | /* Reads first column */ |
464 | 0 | for (p = buf, q = NULL; *p; p++) { |
465 | 0 | if ((*p == '\t') || (*p == ' ') || (*p == ':')) { |
466 | 0 | *p = '\0'; |
467 | 0 | q = p+1; |
468 | 0 | while ((*q == '\t') || (*q == ' ')) { |
469 | 0 | q++; |
470 | 0 | } |
471 | 0 | break; |
472 | 0 | } |
473 | 0 | } |
474 | | /* The line only had one column */ |
475 | 0 | if (!q || *q == '\0') |
476 | 0 | continue; |
477 | | |
478 | | /* Read second column */ |
479 | 0 | for (p = q; *p; p++) { |
480 | 0 | if ((*p == '\t') || (*p == ' ')) { |
481 | 0 | *p = '\0'; |
482 | 0 | break; |
483 | 0 | } |
484 | 0 | } |
485 | | |
486 | | /* Add to alias table if necessary */ |
487 | 0 | if (!g_hash_table_lookup (alias_table, buf)) { |
488 | 0 | g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q)); |
489 | 0 | } |
490 | 0 | } |
491 | 0 | fclose (fp); |
492 | 0 | } |
493 | | |
494 | | #endif |
495 | | |
496 | | static char * |
497 | | unalias_lang (char *lang) |
498 | 0 | { |
499 | 0 | #ifndef G_OS_WIN32 |
500 | 0 | static GHashTable *alias_table = NULL; |
501 | 0 | char *p; |
502 | 0 | int i; |
503 | |
|
504 | 0 | if (g_once_init_enter_pointer (&alias_table)) |
505 | 0 | { |
506 | 0 | GHashTable *table = g_hash_table_new (g_str_hash, g_str_equal); |
507 | 0 | read_aliases ("/usr/share/locale/locale.alias", table); |
508 | 0 | g_once_init_leave_pointer (&alias_table, table); |
509 | 0 | } |
510 | |
|
511 | 0 | i = 0; |
512 | 0 | while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0)) |
513 | 0 | { |
514 | 0 | lang = p; |
515 | 0 | if (i++ == 30) |
516 | 0 | { |
517 | 0 | static gboolean said_before = FALSE; |
518 | 0 | if (!said_before) |
519 | 0 | g_warning ("Too many alias levels for a locale, " |
520 | 0 | "may indicate a loop"); |
521 | 0 | said_before = TRUE; |
522 | 0 | return lang; |
523 | 0 | } |
524 | 0 | } |
525 | 0 | #endif |
526 | 0 | return lang; |
527 | 0 | } |
528 | | |
529 | | /* Mask for components of locale spec. The ordering here is from |
530 | | * least significant to most significant |
531 | | */ |
532 | | enum |
533 | | { |
534 | | COMPONENT_CODESET = 1 << 0, |
535 | | COMPONENT_TERRITORY = 1 << 1, |
536 | | COMPONENT_MODIFIER = 1 << 2 |
537 | | }; |
538 | | |
539 | | /* Break an X/Open style locale specification into components |
540 | | * e.g. `en_GB` or `uz_UZ.utf8@cyrillic` |
541 | | */ |
542 | | static guint |
543 | | explode_locale (const gchar *locale, |
544 | | gchar **language, |
545 | | gchar **territory, |
546 | | gchar **codeset, |
547 | | gchar **modifier) |
548 | 0 | { |
549 | 0 | const gchar *uscore_pos; |
550 | 0 | const gchar *at_pos; |
551 | 0 | const gchar *dot_pos; |
552 | |
|
553 | 0 | guint mask = 0; |
554 | |
|
555 | 0 | uscore_pos = strchr (locale, '_'); |
556 | 0 | dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.'); |
557 | 0 | at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@'); |
558 | |
|
559 | 0 | if (at_pos) |
560 | 0 | { |
561 | 0 | mask |= COMPONENT_MODIFIER; |
562 | 0 | *modifier = g_strdup (at_pos); |
563 | 0 | } |
564 | 0 | else |
565 | 0 | at_pos = locale + strlen (locale); |
566 | |
|
567 | 0 | if (dot_pos && dot_pos < at_pos) |
568 | 0 | { |
569 | 0 | mask |= COMPONENT_CODESET; |
570 | 0 | *codeset = g_strndup (dot_pos, at_pos - dot_pos); |
571 | 0 | } |
572 | 0 | else |
573 | 0 | dot_pos = at_pos; |
574 | |
|
575 | 0 | if (uscore_pos && uscore_pos < dot_pos) |
576 | 0 | { |
577 | 0 | mask |= COMPONENT_TERRITORY; |
578 | 0 | *territory = g_strndup (uscore_pos, dot_pos - uscore_pos); |
579 | 0 | } |
580 | 0 | else |
581 | 0 | uscore_pos = dot_pos; |
582 | |
|
583 | 0 | g_assert (uscore_pos >= locale); |
584 | 0 | *language = g_strndup (locale, uscore_pos - locale); |
585 | |
|
586 | 0 | return mask; |
587 | 0 | } |
588 | | |
589 | | /* |
590 | | * Compute all interesting variants for a given locale name - |
591 | | * by stripping off different components of the value. |
592 | | * |
593 | | * For simplicity, we assume that the locale is in |
594 | | * X/Open format: language[_territory][.codeset][@modifier] |
595 | | * |
596 | | * TODO: Extend this to handle the CEN format (see the GNUlibc docs) |
597 | | * as well. We could just copy the code from glibc wholesale |
598 | | * but it is big, ugly, and complicated, so I'm reluctant |
599 | | * to do so when this should handle 99% of the time... |
600 | | */ |
601 | | static void |
602 | | append_locale_variants (GPtrArray *array, |
603 | | const gchar *locale) |
604 | 0 | { |
605 | 0 | gchar *language = NULL; |
606 | 0 | gchar *territory = NULL; |
607 | 0 | gchar *codeset = NULL; |
608 | 0 | gchar *modifier = NULL; |
609 | |
|
610 | 0 | guint mask; |
611 | 0 | guint i, j; |
612 | |
|
613 | 0 | g_return_if_fail (locale != NULL); |
614 | | |
615 | 0 | mask = explode_locale (locale, &language, &territory, &codeset, &modifier); |
616 | | |
617 | | /* Iterate through all possible combinations, from least attractive |
618 | | * to most attractive. |
619 | | */ |
620 | 0 | for (j = 0; j <= mask; ++j) |
621 | 0 | { |
622 | 0 | i = mask - j; |
623 | |
|
624 | 0 | if ((i & ~mask) == 0) |
625 | 0 | { |
626 | 0 | gchar *val = g_strconcat (language, |
627 | 0 | (i & COMPONENT_TERRITORY) ? territory : "", |
628 | 0 | (i & COMPONENT_CODESET) ? codeset : "", |
629 | 0 | (i & COMPONENT_MODIFIER) ? modifier : "", |
630 | 0 | NULL); |
631 | 0 | g_ptr_array_add (array, val); |
632 | 0 | } |
633 | 0 | } |
634 | |
|
635 | 0 | g_free (language); |
636 | 0 | if (mask & COMPONENT_CODESET) |
637 | 0 | g_free (codeset); |
638 | 0 | if (mask & COMPONENT_TERRITORY) |
639 | 0 | g_free (territory); |
640 | 0 | if (mask & COMPONENT_MODIFIER) |
641 | 0 | g_free (modifier); |
642 | 0 | } |
643 | | |
644 | | /** |
645 | | * g_get_locale_variants: |
646 | | * @locale: a locale identifier |
647 | | * |
648 | | * Returns a list of derived variants of @locale, which can be used to |
649 | | * e.g. construct locale-dependent filenames or search paths. The returned |
650 | | * list is sorted from most desirable to least desirable. |
651 | | * This function handles territory, charset and extra locale modifiers. See |
652 | | * [`setlocale(3)`](man:setlocale) for information about locales and their format. |
653 | | * |
654 | | * @locale itself is guaranteed to be returned in the output. |
655 | | * |
656 | | * For example, if @locale is `fr_BE`, then the returned list |
657 | | * is `fr_BE`, `fr`. If @locale is `en_GB.UTF-8@euro`, then the returned list |
658 | | * is `en_GB.UTF-8@euro`, `en_GB.UTF-8`, `en_GB@euro`, `en_GB`, `en.UTF-8@euro`, |
659 | | * `en.UTF-8`, `en@euro`, `en`. |
660 | | * |
661 | | * If you need the list of variants for the current locale, |
662 | | * use g_get_language_names(). |
663 | | * |
664 | | * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly |
665 | | * allocated array of newly allocated strings with the locale variants. Free with |
666 | | * g_strfreev(). |
667 | | * |
668 | | * Since: 2.28 |
669 | | */ |
670 | | gchar ** |
671 | | g_get_locale_variants (const gchar *locale) |
672 | 0 | { |
673 | 0 | GPtrArray *array; |
674 | |
|
675 | 0 | g_return_val_if_fail (locale != NULL, NULL); |
676 | | |
677 | 0 | array = g_ptr_array_sized_new (8); |
678 | 0 | append_locale_variants (array, locale); |
679 | 0 | g_ptr_array_add (array, NULL); |
680 | |
|
681 | 0 | return (gchar **) g_ptr_array_free (array, FALSE); |
682 | 0 | } |
683 | | |
684 | | /* The following is (partly) taken from the gettext package. |
685 | | Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. */ |
686 | | |
687 | | static const gchar * |
688 | | guess_category_value (const gchar *category_name) |
689 | 0 | { |
690 | 0 | const gchar *retval; |
691 | | |
692 | | /* The highest priority value is the 'LANGUAGE' environment |
693 | | variable. This is a GNU extension. */ |
694 | 0 | retval = g_getenv ("LANGUAGE"); |
695 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
696 | 0 | return retval; |
697 | | |
698 | | /* 'LANGUAGE' is not set. So we have to proceed with the POSIX |
699 | | methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'. On some |
700 | | systems this can be done by the 'setlocale' function itself. */ |
701 | | |
702 | | /* Setting of LC_ALL overwrites all other. */ |
703 | 0 | retval = g_getenv ("LC_ALL"); |
704 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
705 | 0 | return retval; |
706 | | |
707 | | /* Next comes the name of the desired category. */ |
708 | 0 | retval = g_getenv (category_name); |
709 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
710 | 0 | return retval; |
711 | | |
712 | | /* Last possibility is the LANG environment variable. */ |
713 | 0 | retval = g_getenv ("LANG"); |
714 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
715 | 0 | return retval; |
716 | | |
717 | | #ifdef G_PLATFORM_WIN32 |
718 | | /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and |
719 | | * LANG, which we already did above. Oh well. The main point of |
720 | | * calling g_win32_getlocale() is to get the thread's locale as used |
721 | | * by Windows and the Microsoft C runtime (in the "English_United |
722 | | * States" format) translated into the Unixish format. |
723 | | */ |
724 | | { |
725 | | char *locale = g_win32_getlocale (); |
726 | | retval = g_intern_string (locale); |
727 | | g_free (locale); |
728 | | return retval; |
729 | | } |
730 | | #endif |
731 | | |
732 | 0 | return NULL; |
733 | 0 | } |
734 | | |
735 | | typedef struct _GLanguageNamesCache GLanguageNamesCache; |
736 | | |
737 | | struct _GLanguageNamesCache { |
738 | | gchar *languages; |
739 | | gchar **language_names; |
740 | | }; |
741 | | |
742 | | static void |
743 | | language_names_cache_free (gpointer data) |
744 | 0 | { |
745 | 0 | GLanguageNamesCache *cache = data; |
746 | 0 | g_free (cache->languages); |
747 | 0 | g_strfreev (cache->language_names); |
748 | 0 | g_free (cache); |
749 | 0 | } |
750 | | |
751 | | /** |
752 | | * g_get_language_names: |
753 | | * |
754 | | * Computes a list of applicable locale names, which can be used to |
755 | | * e.g. construct locale-dependent filenames or search paths. The returned |
756 | | * list is sorted from most desirable to least desirable and always contains |
757 | | * the default locale "C". |
758 | | * |
759 | | * For example, if LANGUAGE=de:en_US, then the returned list is |
760 | | * "de", "en_US", "en", "C". |
761 | | * |
762 | | * This function consults the environment variables `LANGUAGE`, `LC_ALL`, |
763 | | * `LC_MESSAGES` and `LANG` to find the list of locales specified by the |
764 | | * user. |
765 | | * |
766 | | * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib |
767 | | * that must not be modified or freed. |
768 | | * |
769 | | * Since: 2.6 |
770 | | */ |
771 | | const gchar * const * |
772 | | g_get_language_names (void) |
773 | 0 | { |
774 | 0 | return g_get_language_names_with_category ("LC_MESSAGES"); |
775 | 0 | } |
776 | | |
777 | | /** |
778 | | * g_get_language_names_with_category: |
779 | | * @category_name: a locale category name |
780 | | * |
781 | | * Computes a list of applicable locale names with a locale category name, |
782 | | * which can be used to construct the fallback locale-dependent filenames |
783 | | * or search paths. The returned list is sorted from most desirable to |
784 | | * least desirable and always contains the default locale "C". |
785 | | * |
786 | | * This function consults the environment variables `LANGUAGE`, `LC_ALL`, |
787 | | * @category_name, and `LANG` to find the list of locales specified by the |
788 | | * user. |
789 | | * |
790 | | * g_get_language_names() returns g_get_language_names_with_category("LC_MESSAGES"). |
791 | | * |
792 | | * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by |
793 | | * the thread g_get_language_names_with_category was called from. |
794 | | * It must not be modified or freed. It must be copied if planned to be used in another thread. |
795 | | * |
796 | | * Since: 2.58 |
797 | | */ |
798 | | const gchar * const * |
799 | | g_get_language_names_with_category (const gchar *category_name) |
800 | 0 | { |
801 | 0 | static GPrivate cache_private = G_PRIVATE_INIT ((void (*)(gpointer)) g_hash_table_unref); |
802 | 0 | GHashTable *cache = g_private_get (&cache_private); |
803 | 0 | const gchar *languages; |
804 | 0 | GLanguageNamesCache *name_cache; |
805 | |
|
806 | 0 | g_return_val_if_fail (category_name != NULL, NULL); |
807 | | |
808 | 0 | if (!cache) |
809 | 0 | { |
810 | 0 | cache = g_hash_table_new_full (g_str_hash, g_str_equal, |
811 | 0 | g_free, language_names_cache_free); |
812 | 0 | g_private_set (&cache_private, cache); |
813 | 0 | g_ignore_leak (cache); |
814 | 0 | } |
815 | |
|
816 | 0 | languages = guess_category_value (category_name); |
817 | 0 | if (!languages) |
818 | 0 | languages = "C"; |
819 | |
|
820 | 0 | name_cache = (GLanguageNamesCache *) g_hash_table_lookup (cache, category_name); |
821 | 0 | if (!(name_cache && name_cache->languages && |
822 | 0 | strcmp (name_cache->languages, languages) == 0)) |
823 | 0 | { |
824 | 0 | GPtrArray *array; |
825 | 0 | gchar **alist, **a; |
826 | |
|
827 | 0 | g_hash_table_remove (cache, category_name); |
828 | |
|
829 | 0 | array = g_ptr_array_sized_new (8); |
830 | |
|
831 | 0 | alist = g_strsplit (languages, ":", 0); |
832 | 0 | for (a = alist; *a; a++) |
833 | 0 | append_locale_variants (array, unalias_lang (*a)); |
834 | 0 | g_strfreev (alist); |
835 | 0 | g_ptr_array_add (array, g_strdup ("C")); |
836 | 0 | g_ptr_array_add (array, NULL); |
837 | |
|
838 | 0 | name_cache = g_new0 (GLanguageNamesCache, 1); |
839 | 0 | name_cache->languages = g_strdup (languages); |
840 | 0 | name_cache->language_names = (gchar **) g_ptr_array_free (array, FALSE); |
841 | 0 | g_hash_table_insert (cache, g_strdup (category_name), name_cache); |
842 | 0 | g_ignore_leak (name_cache); |
843 | 0 | } |
844 | |
|
845 | 0 | return (const gchar * const *) name_cache->language_names; |
846 | 0 | } |