/src/pango/subprojects/glib/glib/gcharset.c
Line | Count | Source |
1 | | /* gcharset.c - Charset information |
2 | | * |
3 | | * Copyright (C) 2011 Red Hat, Inc. |
4 | | * |
5 | | * SPDX-License-Identifier: LGPL-2.1-or-later |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * This library is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | |
23 | | #include "gcharset.h" |
24 | | #include "gcharsetprivate.h" |
25 | | |
26 | | #include "garray.h" |
27 | | #include "genviron.h" |
28 | | #include "ghash.h" |
29 | | #include "glib-private.h" |
30 | | #include "gmessages.h" |
31 | | #include "gstdio.h" |
32 | | #include "gstrfuncs.h" |
33 | | #include "gthread.h" |
34 | | #include "gthreadprivate.h" |
35 | | #ifdef G_OS_WIN32 |
36 | | #include "gwin32.h" |
37 | | #endif |
38 | | |
39 | | #include "libcharset/libcharset.h" |
40 | | |
41 | | #include <string.h> |
42 | | #include <stdio.h> |
43 | | |
44 | | #if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET) |
45 | | #include <langinfo.h> |
46 | | #endif |
47 | | |
48 | | #include <locale.h> |
49 | | #ifdef G_OS_WIN32 |
50 | | #define WIN32_LEAN_AND_MEAN |
51 | | #include <windows.h> |
52 | | #endif |
53 | | |
54 | | G_LOCK_DEFINE_STATIC (aliases); |
55 | | |
56 | | static GHashTable * |
57 | | get_alias_hash (void) |
58 | 0 | { |
59 | 0 | static GHashTable *alias_hash = NULL; |
60 | 0 | const char *aliases; |
61 | |
|
62 | 0 | G_LOCK (aliases); |
63 | |
|
64 | 0 | if (!alias_hash) |
65 | 0 | { |
66 | 0 | alias_hash = g_hash_table_new (g_str_hash, g_str_equal); |
67 | |
|
68 | 0 | aliases = _g_locale_get_charset_aliases (); |
69 | 0 | while (*aliases != '\0') |
70 | 0 | { |
71 | 0 | const char *canonical; |
72 | 0 | const char *alias; |
73 | 0 | const char **alias_array; |
74 | 0 | size_t count = 0; |
75 | |
|
76 | 0 | alias = aliases; |
77 | 0 | aliases += strlen (aliases) + 1; |
78 | 0 | canonical = aliases; |
79 | 0 | aliases += strlen (aliases) + 1; |
80 | |
|
81 | 0 | alias_array = g_hash_table_lookup (alias_hash, canonical); |
82 | 0 | if (alias_array) |
83 | 0 | { |
84 | 0 | while (alias_array[count]) |
85 | 0 | count++; |
86 | 0 | } |
87 | |
|
88 | 0 | alias_array = g_renew (const char *, alias_array, count + 2); |
89 | 0 | alias_array[count] = alias; |
90 | 0 | alias_array[count + 1] = NULL; |
91 | |
|
92 | 0 | g_hash_table_insert (alias_hash, (char *)canonical, alias_array); |
93 | 0 | } |
94 | 0 | } |
95 | |
|
96 | 0 | G_UNLOCK (aliases); |
97 | |
|
98 | 0 | return alias_hash; |
99 | 0 | } |
100 | | |
101 | | /* As an abuse of the alias table, the following routines gets |
102 | | * the charsets that are aliases for the canonical name. |
103 | | */ |
104 | | const char ** |
105 | | _g_charset_get_aliases (const char *canonical_name) |
106 | 0 | { |
107 | 0 | GHashTable *alias_hash = get_alias_hash (); |
108 | |
|
109 | 0 | return g_hash_table_lookup (alias_hash, canonical_name); |
110 | 0 | } |
111 | | |
112 | | static gboolean |
113 | | g_utf8_get_charset_internal (const char *raw_data, |
114 | | const char **a) |
115 | 0 | { |
116 | | /* Allow CHARSET to override the charset of any locale category. Users should |
117 | | * probably never be setting this — instead, just add the charset after a `.` |
118 | | * in `LANGUAGE`/`LC_ALL`/`LC_*`/`LANG`. I can’t find any reference (in |
119 | | * `git log`, code comments, or man pages) to this environment variable being |
120 | | * standardised or documented or even used anywhere outside GLib. Perhaps it |
121 | | * should eventually be removed. */ |
122 | 0 | const char *charset = g_getenv ("CHARSET"); |
123 | |
|
124 | 0 | if (charset && *charset) |
125 | 0 | { |
126 | 0 | *a = charset; |
127 | |
|
128 | 0 | if (charset && strstr (charset, "UTF-8")) |
129 | 0 | return TRUE; |
130 | 0 | else |
131 | 0 | return FALSE; |
132 | 0 | } |
133 | | |
134 | | /* The libcharset code tries to be thread-safe without |
135 | | * a lock, but has a memory leak and a missing memory |
136 | | * barrier, so we lock for it |
137 | | */ |
138 | 0 | G_LOCK (aliases); |
139 | 0 | charset = _g_locale_charset_unalias (raw_data); |
140 | 0 | G_UNLOCK (aliases); |
141 | |
|
142 | 0 | if (charset && *charset) |
143 | 0 | { |
144 | 0 | *a = charset; |
145 | |
|
146 | 0 | if (charset && strstr (charset, "UTF-8")) |
147 | 0 | return TRUE; |
148 | 0 | else |
149 | 0 | return FALSE; |
150 | 0 | } |
151 | | |
152 | | /* Assume this for compatibility at present. */ |
153 | 0 | *a = "US-ASCII"; |
154 | |
|
155 | 0 | return FALSE; |
156 | 0 | } |
157 | | |
158 | | typedef struct _GCharsetCache GCharsetCache; |
159 | | |
160 | | struct _GCharsetCache { |
161 | | gboolean is_utf8; |
162 | | gchar *raw; |
163 | | gchar *charset; |
164 | | }; |
165 | | |
166 | | static void |
167 | | charset_cache_free (gpointer data) |
168 | 0 | { |
169 | 0 | GCharsetCache *cache = data; |
170 | 0 | g_free (cache->raw); |
171 | 0 | g_free (cache->charset); |
172 | 0 | g_free (cache); |
173 | 0 | } |
174 | | |
175 | | /** |
176 | | * g_get_charset: |
177 | | * @charset: (out) (optional) (transfer none): return location for character set |
178 | | * name, or %NULL. |
179 | | * |
180 | | * Obtains the character set for the [current locale](running.html#locale); |
181 | | * you might use this character set as an argument to g_convert(), to convert |
182 | | * from the current locale's encoding to some other encoding. (Frequently |
183 | | * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) |
184 | | * |
185 | | * On Windows the character set returned by this function is the |
186 | | * so-called system default ANSI code-page. That is the character set |
187 | | * used by the "narrow" versions of C library and Win32 functions that |
188 | | * handle file names. It might be different from the character set |
189 | | * used by the C library's current locale. |
190 | | * |
191 | | * On Linux, the character set is found by consulting nl_langinfo() if |
192 | | * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG` |
193 | | * and `CHARSET` are queried in order. nl_langinfo() returns the C locale if |
194 | | * no locale has been loaded by setlocale(). |
195 | | * |
196 | | * The return value is %TRUE if the locale's encoding is UTF-8, in that |
197 | | * case you can perhaps avoid calling g_convert(). |
198 | | * |
199 | | * The string returned in @charset is not allocated, and should not be |
200 | | * freed. |
201 | | * |
202 | | * Returns: %TRUE if the returned charset is UTF-8 |
203 | | */ |
204 | | gboolean |
205 | | g_get_charset (const char **charset) |
206 | 0 | { |
207 | 0 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
208 | 0 | GCharsetCache *cache = g_private_get (&cache_private); |
209 | 0 | const gchar *raw; |
210 | |
|
211 | 0 | if (!cache) |
212 | 0 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
213 | |
|
214 | 0 | G_LOCK (aliases); |
215 | 0 | raw = _g_locale_charset_raw (); |
216 | 0 | G_UNLOCK (aliases); |
217 | |
|
218 | 0 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
219 | 0 | { |
220 | 0 | const gchar *new_charset; |
221 | |
|
222 | 0 | g_free (cache->raw); |
223 | 0 | g_free (cache->charset); |
224 | 0 | cache->raw = g_strdup (raw); |
225 | 0 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
226 | 0 | cache->charset = g_strdup (new_charset); |
227 | 0 | } |
228 | |
|
229 | 0 | if (charset) |
230 | 0 | *charset = cache->charset; |
231 | |
|
232 | 0 | return cache->is_utf8; |
233 | 0 | } |
234 | | |
235 | | /* |
236 | | * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to |
237 | | * LC_TIME) to correctly check for charset about time conversion relatives. |
238 | | * |
239 | | * Returns: %TRUE if the returned charset is UTF-8 |
240 | | */ |
241 | | gboolean |
242 | | _g_get_time_charset (const char **charset) |
243 | 0 | { |
244 | 0 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
245 | 0 | GCharsetCache *cache = g_private_get (&cache_private); |
246 | 0 | const gchar *raw; |
247 | |
|
248 | 0 | if (!cache) |
249 | 0 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
250 | |
|
251 | 0 | #ifdef HAVE_LANGINFO_TIME_CODESET |
252 | 0 | raw = nl_langinfo (_NL_TIME_CODESET); |
253 | | #else |
254 | | G_LOCK (aliases); |
255 | | raw = _g_locale_charset_raw (); |
256 | | G_UNLOCK (aliases); |
257 | | #endif |
258 | |
|
259 | 0 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
260 | 0 | { |
261 | 0 | const gchar *new_charset; |
262 | |
|
263 | 0 | g_free (cache->raw); |
264 | 0 | g_free (cache->charset); |
265 | 0 | cache->raw = g_strdup (raw); |
266 | 0 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
267 | 0 | cache->charset = g_strdup (new_charset); |
268 | 0 | } |
269 | |
|
270 | 0 | if (charset) |
271 | 0 | *charset = cache->charset; |
272 | |
|
273 | 0 | return cache->is_utf8; |
274 | 0 | } |
275 | | /* |
276 | | * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to |
277 | | * LC_CTYPE) to correctly check for charset about CTYPE conversion relatives. |
278 | | * |
279 | | * Returns: %TRUE if the returned charset is UTF-8 |
280 | | */ |
281 | | gboolean |
282 | | _g_get_ctype_charset (const char **charset) |
283 | 0 | { |
284 | 0 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
285 | 0 | GCharsetCache *cache = g_private_get (&cache_private); |
286 | 0 | const gchar *raw; |
287 | |
|
288 | 0 | if (!cache) |
289 | 0 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
290 | |
|
291 | 0 | #ifdef HAVE_LANGINFO_CODESET |
292 | 0 | raw = nl_langinfo (CODESET); |
293 | | #else |
294 | | G_LOCK (aliases); |
295 | | raw = _g_locale_charset_raw (); |
296 | | G_UNLOCK (aliases); |
297 | | #endif |
298 | |
|
299 | 0 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
300 | 0 | { |
301 | 0 | const gchar *new_charset; |
302 | |
|
303 | 0 | g_free (cache->raw); |
304 | 0 | g_free (cache->charset); |
305 | 0 | cache->raw = g_strdup (raw); |
306 | 0 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
307 | 0 | cache->charset = g_strdup (new_charset); |
308 | 0 | } |
309 | |
|
310 | 0 | if (charset) |
311 | 0 | *charset = cache->charset; |
312 | |
|
313 | 0 | return cache->is_utf8; |
314 | 0 | } |
315 | | |
316 | | /** |
317 | | * g_get_codeset: |
318 | | * |
319 | | * Gets the character set for the current locale. |
320 | | * |
321 | | * Returns: a newly allocated string containing the name |
322 | | * of the character set. This string must be freed with g_free(). |
323 | | */ |
324 | | gchar * |
325 | | g_get_codeset (void) |
326 | 0 | { |
327 | 0 | const gchar *charset; |
328 | |
|
329 | 0 | g_get_charset (&charset); |
330 | |
|
331 | 0 | return g_strdup (charset); |
332 | 0 | } |
333 | | |
334 | | /** |
335 | | * g_get_console_charset: |
336 | | * @charset: (out) (optional) (transfer none): return location for character set |
337 | | * name, or %NULL. |
338 | | * |
339 | | * Obtains the character set used by the console attached to the process, |
340 | | * which is suitable for printing output to the terminal. |
341 | | * |
342 | | * Usually this matches the result returned by g_get_charset(), but in |
343 | | * environments where the locale's character set does not match the encoding |
344 | | * of the console this function tries to guess a more suitable value instead. |
345 | | * |
346 | | * On Windows the character set returned by this function is the |
347 | | * output code page used by the console associated with the calling process. |
348 | | * If the codepage can't be determined (for example because there is no |
349 | | * console attached) UTF-8 is assumed. |
350 | | * |
351 | | * The return value is %TRUE if the locale's encoding is UTF-8, in that |
352 | | * case you can perhaps avoid calling g_convert(). |
353 | | * |
354 | | * The string returned in @charset is not allocated, and should not be |
355 | | * freed. |
356 | | * |
357 | | * Returns: %TRUE if the returned charset is UTF-8 |
358 | | * |
359 | | * Since: 2.62 |
360 | | */ |
361 | | gboolean |
362 | | g_get_console_charset (const char **charset) |
363 | 0 | { |
364 | | #ifdef G_OS_WIN32 |
365 | | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
366 | | GCharsetCache *cache = g_private_get (&cache_private); |
367 | | const gchar *locale; |
368 | | unsigned int cp; |
369 | | char buf[2 + 20 + 1]; /* "CP" + G_MAXUINT64 (to be safe) in decimal form (20 bytes) + "\0" */ |
370 | | const gchar *raw = NULL; |
371 | | |
372 | | if (!cache) |
373 | | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
374 | | |
375 | | /* first try to query $LANG (works for Cygwin/MSYS/MSYS2 and others using mintty) */ |
376 | | locale = g_getenv ("LANG"); |
377 | | if (locale != NULL && locale[0] != '\0') |
378 | | { |
379 | | /* If the locale name contains an encoding after the dot, return it. */ |
380 | | const char *dot = strchr (locale, '.'); |
381 | | |
382 | | if (dot != NULL) |
383 | | { |
384 | | const char *modifier; |
385 | | |
386 | | dot++; |
387 | | /* Look for the possible @... trailer and remove it, if any. */ |
388 | | modifier = strchr (dot, '@'); |
389 | | if (modifier == NULL) |
390 | | raw = dot; |
391 | | else if ((gsize) (modifier - dot) < sizeof (buf)) |
392 | | { |
393 | | memcpy (buf, dot, modifier - dot); |
394 | | buf[modifier - dot] = '\0'; |
395 | | raw = buf; |
396 | | } |
397 | | } |
398 | | } |
399 | | /* next try querying console codepage using native win32 API */ |
400 | | if (raw == NULL) |
401 | | { |
402 | | cp = GetConsoleOutputCP (); |
403 | | if (cp) |
404 | | { |
405 | | sprintf (buf, "CP%u", cp); |
406 | | raw = buf; |
407 | | } |
408 | | else if (GetLastError () != ERROR_INVALID_HANDLE) |
409 | | { |
410 | | gchar *emsg = g_win32_error_message (GetLastError ()); |
411 | | g_warning ("Failed to determine console output code page: %s. " |
412 | | "Falling back to UTF-8", emsg); |
413 | | g_free (emsg); |
414 | | } |
415 | | } |
416 | | /* fall-back to UTF-8 if the rest failed (it's a universal default) */ |
417 | | if (raw == NULL) |
418 | | raw = "UTF-8"; |
419 | | |
420 | | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
421 | | { |
422 | | const gchar *new_charset; |
423 | | |
424 | | g_free (cache->raw); |
425 | | g_free (cache->charset); |
426 | | cache->raw = g_strdup (raw); |
427 | | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
428 | | cache->charset = g_strdup (new_charset); |
429 | | } |
430 | | |
431 | | if (charset) |
432 | | *charset = cache->charset; |
433 | | |
434 | | return cache->is_utf8; |
435 | | #else |
436 | | /* assume the locale settings match the console encoding on non-Windows OSs */ |
437 | 0 | return g_get_charset (charset); |
438 | 0 | #endif |
439 | 0 | } |
440 | | |
441 | | #ifndef G_OS_WIN32 |
442 | | |
443 | | /* read an alias file for the locales */ |
444 | | static void |
445 | | read_aliases (const gchar *file, |
446 | | GHashTable *alias_table) |
447 | 0 | { |
448 | 0 | FILE *fp; |
449 | 0 | char buf[256]; |
450 | |
|
451 | 0 | fp = g_fopen (file, "re"); |
452 | 0 | if (!fp) |
453 | 0 | return; |
454 | 0 | while (fgets (buf, 256, fp)) |
455 | 0 | { |
456 | 0 | char *p, *q; |
457 | |
|
458 | 0 | g_strstrip (buf); |
459 | | |
460 | | /* Line is a comment */ |
461 | 0 | if ((buf[0] == '#') || (buf[0] == '\0')) |
462 | 0 | continue; |
463 | | |
464 | | /* Reads first column */ |
465 | 0 | for (p = buf, q = NULL; *p; p++) { |
466 | 0 | if ((*p == '\t') || (*p == ' ') || (*p == ':')) { |
467 | 0 | *p = '\0'; |
468 | 0 | q = p+1; |
469 | 0 | while ((*q == '\t') || (*q == ' ')) { |
470 | 0 | q++; |
471 | 0 | } |
472 | 0 | break; |
473 | 0 | } |
474 | 0 | } |
475 | | /* The line only had one column */ |
476 | 0 | if (!q || *q == '\0') |
477 | 0 | continue; |
478 | | |
479 | | /* Read second column */ |
480 | 0 | for (p = q; *p; p++) { |
481 | 0 | if ((*p == '\t') || (*p == ' ')) { |
482 | 0 | *p = '\0'; |
483 | 0 | break; |
484 | 0 | } |
485 | 0 | } |
486 | | |
487 | | /* Add to alias table if necessary */ |
488 | 0 | if (!g_hash_table_lookup (alias_table, buf)) { |
489 | 0 | g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q)); |
490 | 0 | } |
491 | 0 | } |
492 | 0 | fclose (fp); |
493 | 0 | } |
494 | | |
495 | | #endif |
496 | | |
497 | | static char * |
498 | | unalias_lang (char *lang) |
499 | 0 | { |
500 | 0 | #ifndef G_OS_WIN32 |
501 | 0 | static GHashTable *alias_table = NULL; |
502 | 0 | char *p; |
503 | 0 | int i; |
504 | |
|
505 | 0 | if (g_once_init_enter_pointer (&alias_table)) |
506 | 0 | { |
507 | 0 | GHashTable *table = g_hash_table_new (g_str_hash, g_str_equal); |
508 | 0 | read_aliases ("/usr/share/locale/locale.alias", table); |
509 | 0 | g_once_init_leave_pointer (&alias_table, table); |
510 | 0 | } |
511 | |
|
512 | 0 | i = 0; |
513 | 0 | while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0)) |
514 | 0 | { |
515 | 0 | lang = p; |
516 | 0 | if (i++ == 30) |
517 | 0 | { |
518 | 0 | static gboolean said_before = FALSE; |
519 | 0 | if (!said_before) |
520 | 0 | g_warning ("Too many alias levels for a locale, " |
521 | 0 | "may indicate a loop"); |
522 | 0 | said_before = TRUE; |
523 | 0 | return lang; |
524 | 0 | } |
525 | 0 | } |
526 | 0 | #endif |
527 | 0 | return lang; |
528 | 0 | } |
529 | | |
530 | | /* Mask for components of locale spec. The ordering here is from |
531 | | * least significant to most significant |
532 | | */ |
533 | | enum |
534 | | { |
535 | | COMPONENT_CODESET = 1 << 0, |
536 | | COMPONENT_TERRITORY = 1 << 1, |
537 | | COMPONENT_MODIFIER = 1 << 2 |
538 | | }; |
539 | | |
540 | | /* Break an X/Open style locale specification into components |
541 | | * e.g. `en_GB` or `uz_UZ.utf8@cyrillic` |
542 | | */ |
543 | | static guint |
544 | | explode_locale (const gchar *locale, |
545 | | gchar **language, |
546 | | gchar **territory, |
547 | | gchar **codeset, |
548 | | gchar **modifier) |
549 | 0 | { |
550 | 0 | const gchar *uscore_pos; |
551 | 0 | const gchar *at_pos; |
552 | 0 | const gchar *dot_pos; |
553 | |
|
554 | 0 | guint mask = 0; |
555 | |
|
556 | 0 | uscore_pos = strchr (locale, '_'); |
557 | 0 | dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.'); |
558 | 0 | at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@'); |
559 | |
|
560 | 0 | if (at_pos) |
561 | 0 | { |
562 | 0 | mask |= COMPONENT_MODIFIER; |
563 | 0 | *modifier = g_strdup (at_pos); |
564 | 0 | } |
565 | 0 | else |
566 | 0 | at_pos = locale + strlen (locale); |
567 | |
|
568 | 0 | if (dot_pos && dot_pos < at_pos) |
569 | 0 | { |
570 | 0 | mask |= COMPONENT_CODESET; |
571 | 0 | *codeset = g_strndup (dot_pos, at_pos - dot_pos); |
572 | 0 | } |
573 | 0 | else |
574 | 0 | dot_pos = at_pos; |
575 | |
|
576 | 0 | if (uscore_pos && uscore_pos < dot_pos) |
577 | 0 | { |
578 | 0 | mask |= COMPONENT_TERRITORY; |
579 | 0 | *territory = g_strndup (uscore_pos, dot_pos - uscore_pos); |
580 | 0 | } |
581 | 0 | else |
582 | 0 | uscore_pos = dot_pos; |
583 | |
|
584 | 0 | g_assert (uscore_pos >= locale); |
585 | 0 | *language = g_strndup (locale, uscore_pos - locale); |
586 | |
|
587 | 0 | return mask; |
588 | 0 | } |
589 | | |
590 | | /* |
591 | | * Compute all interesting variants for a given locale name - |
592 | | * by stripping off different components of the value. |
593 | | * |
594 | | * For simplicity, we assume that the locale is in |
595 | | * X/Open format: language[_territory][.codeset][@modifier] |
596 | | * |
597 | | * TODO: Extend this to handle the CEN format (see the GNUlibc docs) |
598 | | * as well. We could just copy the code from glibc wholesale |
599 | | * but it is big, ugly, and complicated, so I'm reluctant |
600 | | * to do so when this should handle 99% of the time... |
601 | | */ |
602 | | static void |
603 | | append_locale_variants (GPtrArray *array, |
604 | | const gchar *locale) |
605 | 0 | { |
606 | 0 | gchar *language = NULL; |
607 | 0 | gchar *territory = NULL; |
608 | 0 | gchar *codeset = NULL; |
609 | 0 | gchar *modifier = NULL; |
610 | |
|
611 | 0 | guint mask; |
612 | 0 | guint i, j; |
613 | |
|
614 | 0 | g_return_if_fail (locale != NULL); |
615 | | |
616 | 0 | mask = explode_locale (locale, &language, &territory, &codeset, &modifier); |
617 | | |
618 | | /* Iterate through all possible combinations, from least attractive |
619 | | * to most attractive. |
620 | | */ |
621 | 0 | for (j = 0; j <= mask; ++j) |
622 | 0 | { |
623 | 0 | i = mask - j; |
624 | |
|
625 | 0 | if ((i & ~mask) == 0) |
626 | 0 | { |
627 | 0 | gchar *val = g_strconcat (language, |
628 | 0 | (i & COMPONENT_TERRITORY) ? territory : "", |
629 | 0 | (i & COMPONENT_CODESET) ? codeset : "", |
630 | 0 | (i & COMPONENT_MODIFIER) ? modifier : "", |
631 | 0 | NULL); |
632 | 0 | g_ptr_array_add (array, val); |
633 | 0 | } |
634 | 0 | } |
635 | |
|
636 | 0 | g_free (language); |
637 | 0 | if (mask & COMPONENT_CODESET) |
638 | 0 | g_free (codeset); |
639 | 0 | if (mask & COMPONENT_TERRITORY) |
640 | 0 | g_free (territory); |
641 | 0 | if (mask & COMPONENT_MODIFIER) |
642 | 0 | g_free (modifier); |
643 | 0 | } |
644 | | |
645 | | /** |
646 | | * g_get_locale_variants: |
647 | | * @locale: a locale identifier |
648 | | * |
649 | | * Returns a list of derived variants of @locale, which can be used to |
650 | | * e.g. construct locale-dependent filenames or search paths. The returned |
651 | | * list is sorted from most desirable to least desirable. |
652 | | * This function handles territory, charset and extra locale modifiers. See |
653 | | * [`setlocale(3)`](man:setlocale) for information about locales and their format. |
654 | | * |
655 | | * @locale itself is guaranteed to be returned in the output. |
656 | | * |
657 | | * For example, if @locale is `fr_BE`, then the returned list |
658 | | * is `fr_BE`, `fr`. If @locale is `en_GB.UTF-8@euro`, then the returned list |
659 | | * is `en_GB.UTF-8@euro`, `en_GB.UTF-8`, `en_GB@euro`, `en_GB`, `en.UTF-8@euro`, |
660 | | * `en.UTF-8`, `en@euro`, `en`. |
661 | | * |
662 | | * If you need the list of variants for the current locale, |
663 | | * use g_get_language_names(). |
664 | | * |
665 | | * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly |
666 | | * allocated array of newly allocated strings with the locale variants. Free with |
667 | | * g_strfreev(). |
668 | | * |
669 | | * Since: 2.28 |
670 | | */ |
671 | | gchar ** |
672 | | g_get_locale_variants (const gchar *locale) |
673 | 0 | { |
674 | 0 | GPtrArray *array; |
675 | |
|
676 | 0 | g_return_val_if_fail (locale != NULL, NULL); |
677 | | |
678 | 0 | array = g_ptr_array_sized_new (8); |
679 | 0 | append_locale_variants (array, locale); |
680 | 0 | g_ptr_array_add (array, NULL); |
681 | |
|
682 | 0 | return (gchar **) g_ptr_array_free (array, FALSE); |
683 | 0 | } |
684 | | |
685 | | /* The following is (partly) taken from the gettext package. |
686 | | Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. */ |
687 | | |
688 | | static const gchar * |
689 | | guess_category_value (const gchar *category_name) |
690 | 0 | { |
691 | 0 | const gchar *retval; |
692 | | |
693 | | /* The highest priority value is the 'LANGUAGE' environment |
694 | | variable. This is a GNU extension. */ |
695 | 0 | retval = g_getenv ("LANGUAGE"); |
696 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
697 | 0 | return retval; |
698 | | |
699 | | /* 'LANGUAGE' is not set. So we have to proceed with the POSIX |
700 | | methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'. On some |
701 | | systems this can be done by the 'setlocale' function itself. */ |
702 | | |
703 | | /* Setting of LC_ALL overwrites all other. */ |
704 | 0 | retval = g_getenv ("LC_ALL"); |
705 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
706 | 0 | return retval; |
707 | | |
708 | | /* Next comes the name of the desired category. */ |
709 | 0 | retval = g_getenv (category_name); |
710 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
711 | 0 | return retval; |
712 | | |
713 | | /* Last possibility is the LANG environment variable. */ |
714 | 0 | retval = g_getenv ("LANG"); |
715 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
716 | 0 | return retval; |
717 | | |
718 | | #ifdef G_PLATFORM_WIN32 |
719 | | /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and |
720 | | * LANG, which we already did above. Oh well. The main point of |
721 | | * calling g_win32_getlocale() is to get the thread's locale as used |
722 | | * by Windows and the Microsoft C runtime (in the "English_United |
723 | | * States" format) translated into the Unixish format. |
724 | | */ |
725 | | { |
726 | | char *locale = g_win32_getlocale (); |
727 | | retval = g_intern_string (locale); |
728 | | g_free (locale); |
729 | | return retval; |
730 | | } |
731 | | #endif |
732 | | |
733 | 0 | return NULL; |
734 | 0 | } |
735 | | |
736 | | typedef struct _GLanguageNamesCache GLanguageNamesCache; |
737 | | |
738 | | struct _GLanguageNamesCache { |
739 | | gchar *languages; |
740 | | gchar **language_names; |
741 | | }; |
742 | | |
743 | | static void |
744 | | language_names_cache_free (gpointer data) |
745 | 0 | { |
746 | 0 | GLanguageNamesCache *cache = data; |
747 | 0 | g_free (cache->languages); |
748 | 0 | g_strfreev (cache->language_names); |
749 | 0 | g_free (cache); |
750 | 0 | } |
751 | | |
752 | | /** |
753 | | * g_get_language_names: |
754 | | * |
755 | | * Computes a list of applicable locale names, which can be used to |
756 | | * e.g. construct locale-dependent filenames or search paths. The returned |
757 | | * list is sorted from most desirable to least desirable and always contains |
758 | | * the default locale "C". |
759 | | * |
760 | | * For example, if LANGUAGE=de:en_US, then the returned list is |
761 | | * "de", "en_US", "en", "C". |
762 | | * |
763 | | * This function consults the environment variables `LANGUAGE`, `LC_ALL`, |
764 | | * `LC_MESSAGES` and `LANG` to find the list of locales specified by the |
765 | | * user. |
766 | | * |
767 | | * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib |
768 | | * that must not be modified or freed. |
769 | | * |
770 | | * Since: 2.6 |
771 | | */ |
772 | | const gchar * const * |
773 | | g_get_language_names (void) |
774 | 0 | { |
775 | 0 | return g_get_language_names_with_category ("LC_MESSAGES"); |
776 | 0 | } |
777 | | |
778 | | /** |
779 | | * g_get_language_names_with_category: |
780 | | * @category_name: a locale category name |
781 | | * |
782 | | * Computes a list of applicable locale names with a locale category name, |
783 | | * which can be used to construct the fallback locale-dependent filenames |
784 | | * or search paths. The returned list is sorted from most desirable to |
785 | | * least desirable and always contains the default locale "C". |
786 | | * |
787 | | * This function consults the environment variables `LANGUAGE`, `LC_ALL`, |
788 | | * @category_name, and `LANG` to find the list of locales specified by the |
789 | | * user. |
790 | | * |
791 | | * g_get_language_names() returns g_get_language_names_with_category("LC_MESSAGES"). |
792 | | * |
793 | | * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by |
794 | | * the thread g_get_language_names_with_category was called from. |
795 | | * It must not be modified or freed. It must be copied if planned to be used in another thread. |
796 | | * |
797 | | * Since: 2.58 |
798 | | */ |
799 | | const gchar * const * |
800 | | g_get_language_names_with_category (const gchar *category_name) |
801 | 0 | { |
802 | 0 | static GPrivate cache_private = G_PRIVATE_INIT ((void (*)(gpointer)) g_hash_table_unref); |
803 | 0 | GHashTable *cache = g_private_get (&cache_private); |
804 | 0 | const gchar *languages; |
805 | 0 | GLanguageNamesCache *name_cache; |
806 | |
|
807 | 0 | g_return_val_if_fail (category_name != NULL, NULL); |
808 | | |
809 | 0 | if (!cache) |
810 | 0 | { |
811 | 0 | cache = g_hash_table_new_full (g_str_hash, g_str_equal, |
812 | 0 | g_free, language_names_cache_free); |
813 | 0 | g_private_set (&cache_private, cache); |
814 | 0 | g_ignore_leak (cache); |
815 | 0 | } |
816 | |
|
817 | 0 | languages = guess_category_value (category_name); |
818 | 0 | if (!languages) |
819 | 0 | languages = "C"; |
820 | |
|
821 | 0 | name_cache = (GLanguageNamesCache *) g_hash_table_lookup (cache, category_name); |
822 | 0 | if (!(name_cache && name_cache->languages && |
823 | 0 | strcmp (name_cache->languages, languages) == 0)) |
824 | 0 | { |
825 | 0 | GPtrArray *array; |
826 | 0 | gchar **alist, **a; |
827 | |
|
828 | 0 | g_hash_table_remove (cache, category_name); |
829 | |
|
830 | 0 | array = g_ptr_array_sized_new (8); |
831 | |
|
832 | 0 | alist = g_strsplit (languages, ":", 0); |
833 | 0 | for (a = alist; *a; a++) |
834 | 0 | append_locale_variants (array, unalias_lang (*a)); |
835 | 0 | g_strfreev (alist); |
836 | 0 | g_ptr_array_add (array, g_strdup ("C")); |
837 | 0 | g_ptr_array_add (array, NULL); |
838 | |
|
839 | 0 | name_cache = g_new0 (GLanguageNamesCache, 1); |
840 | 0 | name_cache->languages = g_strdup (languages); |
841 | 0 | name_cache->language_names = (gchar **) g_ptr_array_free (array, FALSE); |
842 | 0 | g_hash_table_insert (cache, g_strdup (category_name), name_cache); |
843 | 0 | g_ignore_leak (name_cache); |
844 | 0 | } |
845 | |
|
846 | 0 | return (const gchar * const *) name_cache->language_names; |
847 | 0 | } |