/src/irssi/subprojects/glib-2.74.3/glib/gcharset.c
Line | Count | Source |
1 | | /* gcharset.c - Charset information |
2 | | * |
3 | | * Copyright (C) 2011 Red Hat, Inc. |
4 | | * |
5 | | * SPDX-License-Identifier: LGPL-2.1-or-later |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * This library is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | |
23 | | #include "gcharset.h" |
24 | | #include "gcharsetprivate.h" |
25 | | |
26 | | #include "garray.h" |
27 | | #include "genviron.h" |
28 | | #include "ghash.h" |
29 | | #include "gmessages.h" |
30 | | #include "gstrfuncs.h" |
31 | | #include "gthread.h" |
32 | | #include "gthreadprivate.h" |
33 | | #ifdef G_OS_WIN32 |
34 | | #include "gwin32.h" |
35 | | #endif |
36 | | |
37 | | #include "libcharset/libcharset.h" |
38 | | |
39 | | #include <string.h> |
40 | | #include <stdio.h> |
41 | | |
42 | | #if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET) |
43 | | #include <langinfo.h> |
44 | | #endif |
45 | | |
46 | | #include <locale.h> |
47 | | #ifdef G_OS_WIN32 |
48 | | #define WIN32_LEAN_AND_MEAN |
49 | | #include <windows.h> |
50 | | #endif |
51 | | |
52 | | G_LOCK_DEFINE_STATIC (aliases); |
53 | | |
54 | | static GHashTable * |
55 | | get_alias_hash (void) |
56 | 0 | { |
57 | 0 | static GHashTable *alias_hash = NULL; |
58 | 0 | const char *aliases; |
59 | |
|
60 | 0 | G_LOCK (aliases); |
61 | |
|
62 | 0 | if (!alias_hash) |
63 | 0 | { |
64 | 0 | alias_hash = g_hash_table_new (g_str_hash, g_str_equal); |
65 | |
|
66 | 0 | aliases = _g_locale_get_charset_aliases (); |
67 | 0 | while (*aliases != '\0') |
68 | 0 | { |
69 | 0 | const char *canonical; |
70 | 0 | const char *alias; |
71 | 0 | const char **alias_array; |
72 | 0 | int count = 0; |
73 | |
|
74 | 0 | alias = aliases; |
75 | 0 | aliases += strlen (aliases) + 1; |
76 | 0 | canonical = aliases; |
77 | 0 | aliases += strlen (aliases) + 1; |
78 | |
|
79 | 0 | alias_array = g_hash_table_lookup (alias_hash, canonical); |
80 | 0 | if (alias_array) |
81 | 0 | { |
82 | 0 | while (alias_array[count]) |
83 | 0 | count++; |
84 | 0 | } |
85 | |
|
86 | 0 | alias_array = g_renew (const char *, alias_array, count + 2); |
87 | 0 | alias_array[count] = alias; |
88 | 0 | alias_array[count + 1] = NULL; |
89 | |
|
90 | 0 | g_hash_table_insert (alias_hash, (char *)canonical, alias_array); |
91 | 0 | } |
92 | 0 | } |
93 | |
|
94 | 0 | G_UNLOCK (aliases); |
95 | |
|
96 | 0 | return alias_hash; |
97 | 0 | } |
98 | | |
99 | | /* As an abuse of the alias table, the following routines gets |
100 | | * the charsets that are aliases for the canonical name. |
101 | | */ |
102 | | const char ** |
103 | | _g_charset_get_aliases (const char *canonical_name) |
104 | 0 | { |
105 | 0 | GHashTable *alias_hash = get_alias_hash (); |
106 | |
|
107 | 0 | return g_hash_table_lookup (alias_hash, canonical_name); |
108 | 0 | } |
109 | | |
110 | | static gboolean |
111 | | g_utf8_get_charset_internal (const char *raw_data, |
112 | | const char **a) |
113 | 8 | { |
114 | | /* Allow CHARSET to override the charset of any locale category. Users should |
115 | | * probably never be setting this — instead, just add the charset after a `.` |
116 | | * in `LANGUAGE`/`LC_ALL`/`LC_*`/`LANG`. I can’t find any reference (in |
117 | | * `git log`, code comments, or man pages) to this environment variable being |
118 | | * standardised or documented or even used anywhere outside GLib. Perhaps it |
119 | | * should eventually be removed. */ |
120 | 8 | const char *charset = g_getenv ("CHARSET"); |
121 | | |
122 | 8 | if (charset && *charset) |
123 | 0 | { |
124 | 0 | *a = charset; |
125 | |
|
126 | 0 | if (charset && strstr (charset, "UTF-8")) |
127 | 0 | return TRUE; |
128 | 0 | else |
129 | 0 | return FALSE; |
130 | 0 | } |
131 | | |
132 | | /* The libcharset code tries to be thread-safe without |
133 | | * a lock, but has a memory leak and a missing memory |
134 | | * barrier, so we lock for it |
135 | | */ |
136 | 8 | G_LOCK (aliases); |
137 | 8 | charset = _g_locale_charset_unalias (raw_data); |
138 | 8 | G_UNLOCK (aliases); |
139 | | |
140 | 8 | if (charset && *charset) |
141 | 8 | { |
142 | 8 | *a = charset; |
143 | | |
144 | 8 | if (charset && strstr (charset, "UTF-8")) |
145 | 0 | return TRUE; |
146 | 8 | else |
147 | 8 | return FALSE; |
148 | 8 | } |
149 | | |
150 | | /* Assume this for compatibility at present. */ |
151 | 0 | *a = "US-ASCII"; |
152 | |
|
153 | 0 | return FALSE; |
154 | 8 | } |
155 | | |
156 | | typedef struct _GCharsetCache GCharsetCache; |
157 | | |
158 | | struct _GCharsetCache { |
159 | | gboolean is_utf8; |
160 | | gchar *raw; |
161 | | gchar *charset; |
162 | | }; |
163 | | |
164 | | static void |
165 | | charset_cache_free (gpointer data) |
166 | 0 | { |
167 | 0 | GCharsetCache *cache = data; |
168 | 0 | g_free (cache->raw); |
169 | 0 | g_free (cache->charset); |
170 | 0 | g_free (cache); |
171 | 0 | } |
172 | | |
173 | | /** |
174 | | * g_get_charset: |
175 | | * @charset: (out) (optional) (transfer none): return location for character set |
176 | | * name, or %NULL. |
177 | | * |
178 | | * Obtains the character set for the [current locale][setlocale]; you |
179 | | * might use this character set as an argument to g_convert(), to convert |
180 | | * from the current locale's encoding to some other encoding. (Frequently |
181 | | * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) |
182 | | * |
183 | | * On Windows the character set returned by this function is the |
184 | | * so-called system default ANSI code-page. That is the character set |
185 | | * used by the "narrow" versions of C library and Win32 functions that |
186 | | * handle file names. It might be different from the character set |
187 | | * used by the C library's current locale. |
188 | | * |
189 | | * On Linux, the character set is found by consulting nl_langinfo() if |
190 | | * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG` |
191 | | * and `CHARSET` are queried in order. |
192 | | * |
193 | | * The return value is %TRUE if the locale's encoding is UTF-8, in that |
194 | | * case you can perhaps avoid calling g_convert(). |
195 | | * |
196 | | * The string returned in @charset is not allocated, and should not be |
197 | | * freed. |
198 | | * |
199 | | * Returns: %TRUE if the returned charset is UTF-8 |
200 | | */ |
201 | | gboolean |
202 | | g_get_charset (const char **charset) |
203 | 1.23k | { |
204 | 1.23k | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
205 | 1.23k | GCharsetCache *cache = g_private_get (&cache_private); |
206 | 1.23k | const gchar *raw; |
207 | | |
208 | 1.23k | if (!cache) |
209 | 8 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
210 | | |
211 | 1.23k | G_LOCK (aliases); |
212 | 1.23k | raw = _g_locale_charset_raw (); |
213 | 1.23k | G_UNLOCK (aliases); |
214 | | |
215 | 1.23k | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
216 | 8 | { |
217 | 8 | const gchar *new_charset; |
218 | | |
219 | 8 | g_free (cache->raw); |
220 | 8 | g_free (cache->charset); |
221 | 8 | cache->raw = g_strdup (raw); |
222 | 8 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
223 | 8 | cache->charset = g_strdup (new_charset); |
224 | 8 | } |
225 | | |
226 | 1.23k | if (charset) |
227 | 1.23k | *charset = cache->charset; |
228 | | |
229 | 1.23k | return cache->is_utf8; |
230 | 1.23k | } |
231 | | |
232 | | /* |
233 | | * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to |
234 | | * LC_TIME) to correctly check for charset about time conversion relatives. |
235 | | * |
236 | | * Returns: %TRUE if the returned charset is UTF-8 |
237 | | */ |
238 | | gboolean |
239 | | _g_get_time_charset (const char **charset) |
240 | 0 | { |
241 | 0 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
242 | 0 | GCharsetCache *cache = g_private_get (&cache_private); |
243 | 0 | const gchar *raw; |
244 | |
|
245 | 0 | if (!cache) |
246 | 0 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
247 | |
|
248 | 0 | #ifdef HAVE_LANGINFO_TIME_CODESET |
249 | 0 | raw = nl_langinfo (_NL_TIME_CODESET); |
250 | | #else |
251 | | G_LOCK (aliases); |
252 | | raw = _g_locale_charset_raw (); |
253 | | G_UNLOCK (aliases); |
254 | | #endif |
255 | |
|
256 | 0 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
257 | 0 | { |
258 | 0 | const gchar *new_charset; |
259 | |
|
260 | 0 | g_free (cache->raw); |
261 | 0 | g_free (cache->charset); |
262 | 0 | cache->raw = g_strdup (raw); |
263 | 0 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
264 | 0 | cache->charset = g_strdup (new_charset); |
265 | 0 | } |
266 | |
|
267 | 0 | if (charset) |
268 | 0 | *charset = cache->charset; |
269 | |
|
270 | 0 | return cache->is_utf8; |
271 | 0 | } |
272 | | /* |
273 | | * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to |
274 | | * LC_CTYPE) to correctly check for charset about CTYPE conversion relatives. |
275 | | * |
276 | | * Returns: %TRUE if the returned charset is UTF-8 |
277 | | */ |
278 | | gboolean |
279 | | _g_get_ctype_charset (const char **charset) |
280 | 0 | { |
281 | 0 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
282 | 0 | GCharsetCache *cache = g_private_get (&cache_private); |
283 | 0 | const gchar *raw; |
284 | |
|
285 | 0 | if (!cache) |
286 | 0 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
287 | |
|
288 | 0 | #ifdef HAVE_LANGINFO_CODESET |
289 | 0 | raw = nl_langinfo (CODESET); |
290 | | #else |
291 | | G_LOCK (aliases); |
292 | | raw = _g_locale_charset_raw (); |
293 | | G_UNLOCK (aliases); |
294 | | #endif |
295 | |
|
296 | 0 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
297 | 0 | { |
298 | 0 | const gchar *new_charset; |
299 | |
|
300 | 0 | g_free (cache->raw); |
301 | 0 | g_free (cache->charset); |
302 | 0 | cache->raw = g_strdup (raw); |
303 | 0 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
304 | 0 | cache->charset = g_strdup (new_charset); |
305 | 0 | } |
306 | |
|
307 | 0 | if (charset) |
308 | 0 | *charset = cache->charset; |
309 | |
|
310 | 0 | return cache->is_utf8; |
311 | 0 | } |
312 | | |
313 | | /** |
314 | | * g_get_codeset: |
315 | | * |
316 | | * Gets the character set for the current locale. |
317 | | * |
318 | | * Returns: a newly allocated string containing the name |
319 | | * of the character set. This string must be freed with g_free(). |
320 | | */ |
321 | | gchar * |
322 | | g_get_codeset (void) |
323 | 0 | { |
324 | 0 | const gchar *charset; |
325 | |
|
326 | 0 | g_get_charset (&charset); |
327 | |
|
328 | 0 | return g_strdup (charset); |
329 | 0 | } |
330 | | |
331 | | /** |
332 | | * g_get_console_charset: |
333 | | * @charset: (out) (optional) (transfer none): return location for character set |
334 | | * name, or %NULL. |
335 | | * |
336 | | * Obtains the character set used by the console attached to the process, |
337 | | * which is suitable for printing output to the terminal. |
338 | | * |
339 | | * Usually this matches the result returned by g_get_charset(), but in |
340 | | * environments where the locale's character set does not match the encoding |
341 | | * of the console this function tries to guess a more suitable value instead. |
342 | | * |
343 | | * On Windows the character set returned by this function is the |
344 | | * output code page used by the console associated with the calling process. |
345 | | * If the codepage can't be determined (for example because there is no |
346 | | * console attached) UTF-8 is assumed. |
347 | | * |
348 | | * The return value is %TRUE if the locale's encoding is UTF-8, in that |
349 | | * case you can perhaps avoid calling g_convert(). |
350 | | * |
351 | | * The string returned in @charset is not allocated, and should not be |
352 | | * freed. |
353 | | * |
354 | | * Returns: %TRUE if the returned charset is UTF-8 |
355 | | * |
356 | | * Since: 2.62 |
357 | | */ |
358 | | gboolean |
359 | | g_get_console_charset (const char **charset) |
360 | 2 | { |
361 | | #ifdef G_OS_WIN32 |
362 | | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
363 | | GCharsetCache *cache = g_private_get (&cache_private); |
364 | | const gchar *locale; |
365 | | unsigned int cp; |
366 | | char buf[2 + 20 + 1]; /* "CP" + G_MAXUINT64 (to be safe) in decimal form (20 bytes) + "\0" */ |
367 | | const gchar *raw = NULL; |
368 | | |
369 | | if (!cache) |
370 | | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
371 | | |
372 | | /* first try to query $LANG (works for Cygwin/MSYS/MSYS2 and others using mintty) */ |
373 | | locale = g_getenv ("LANG"); |
374 | | if (locale != NULL && locale[0] != '\0') |
375 | | { |
376 | | /* If the locale name contains an encoding after the dot, return it. */ |
377 | | const char *dot = strchr (locale, '.'); |
378 | | |
379 | | if (dot != NULL) |
380 | | { |
381 | | const char *modifier; |
382 | | |
383 | | dot++; |
384 | | /* Look for the possible @... trailer and remove it, if any. */ |
385 | | modifier = strchr (dot, '@'); |
386 | | if (modifier == NULL) |
387 | | raw = dot; |
388 | | else if ((gsize) (modifier - dot) < sizeof (buf)) |
389 | | { |
390 | | memcpy (buf, dot, modifier - dot); |
391 | | buf[modifier - dot] = '\0'; |
392 | | raw = buf; |
393 | | } |
394 | | } |
395 | | } |
396 | | /* next try querying console codepage using native win32 API */ |
397 | | if (raw == NULL) |
398 | | { |
399 | | cp = GetConsoleOutputCP (); |
400 | | if (cp) |
401 | | { |
402 | | sprintf (buf, "CP%u", cp); |
403 | | raw = buf; |
404 | | } |
405 | | else if (GetLastError () != ERROR_INVALID_HANDLE) |
406 | | { |
407 | | gchar *emsg = g_win32_error_message (GetLastError ()); |
408 | | g_warning ("Failed to determine console output code page: %s. " |
409 | | "Falling back to UTF-8", emsg); |
410 | | g_free (emsg); |
411 | | } |
412 | | } |
413 | | /* fall-back to UTF-8 if the rest failed (it's a universal default) */ |
414 | | if (raw == NULL) |
415 | | raw = "UTF-8"; |
416 | | |
417 | | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
418 | | { |
419 | | const gchar *new_charset; |
420 | | |
421 | | g_free (cache->raw); |
422 | | g_free (cache->charset); |
423 | | cache->raw = g_strdup (raw); |
424 | | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
425 | | cache->charset = g_strdup (new_charset); |
426 | | } |
427 | | |
428 | | if (charset) |
429 | | *charset = cache->charset; |
430 | | |
431 | | return cache->is_utf8; |
432 | | #else |
433 | | /* assume the locale settings match the console encoding on non-Windows OSs */ |
434 | 2 | return g_get_charset (charset); |
435 | 2 | #endif |
436 | 2 | } |
437 | | |
438 | | #ifndef G_OS_WIN32 |
439 | | |
440 | | /* read an alias file for the locales */ |
441 | | static void |
442 | | read_aliases (const gchar *file, |
443 | | GHashTable *alias_table) |
444 | 0 | { |
445 | 0 | FILE *fp; |
446 | 0 | char buf[256]; |
447 | |
|
448 | 0 | fp = fopen (file,"r"); |
449 | 0 | if (!fp) |
450 | 0 | return; |
451 | 0 | while (fgets (buf, 256, fp)) |
452 | 0 | { |
453 | 0 | char *p, *q; |
454 | |
|
455 | 0 | g_strstrip (buf); |
456 | | |
457 | | /* Line is a comment */ |
458 | 0 | if ((buf[0] == '#') || (buf[0] == '\0')) |
459 | 0 | continue; |
460 | | |
461 | | /* Reads first column */ |
462 | 0 | for (p = buf, q = NULL; *p; p++) { |
463 | 0 | if ((*p == '\t') || (*p == ' ') || (*p == ':')) { |
464 | 0 | *p = '\0'; |
465 | 0 | q = p+1; |
466 | 0 | while ((*q == '\t') || (*q == ' ')) { |
467 | 0 | q++; |
468 | 0 | } |
469 | 0 | break; |
470 | 0 | } |
471 | 0 | } |
472 | | /* The line only had one column */ |
473 | 0 | if (!q || *q == '\0') |
474 | 0 | continue; |
475 | | |
476 | | /* Read second column */ |
477 | 0 | for (p = q; *p; p++) { |
478 | 0 | if ((*p == '\t') || (*p == ' ')) { |
479 | 0 | *p = '\0'; |
480 | 0 | break; |
481 | 0 | } |
482 | 0 | } |
483 | | |
484 | | /* Add to alias table if necessary */ |
485 | 0 | if (!g_hash_table_lookup (alias_table, buf)) { |
486 | 0 | g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q)); |
487 | 0 | } |
488 | 0 | } |
489 | 0 | fclose (fp); |
490 | 0 | } |
491 | | |
492 | | #endif |
493 | | |
494 | | static char * |
495 | | unalias_lang (char *lang) |
496 | 0 | { |
497 | 0 | #ifndef G_OS_WIN32 |
498 | 0 | static GHashTable *alias_table = NULL; |
499 | 0 | char *p; |
500 | 0 | int i; |
501 | |
|
502 | 0 | if (g_once_init_enter (&alias_table)) |
503 | 0 | { |
504 | 0 | GHashTable *table = g_hash_table_new (g_str_hash, g_str_equal); |
505 | 0 | read_aliases ("/usr/share/locale/locale.alias", table); |
506 | 0 | g_once_init_leave (&alias_table, table); |
507 | 0 | } |
508 | |
|
509 | 0 | i = 0; |
510 | 0 | while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0)) |
511 | 0 | { |
512 | 0 | lang = p; |
513 | 0 | if (i++ == 30) |
514 | 0 | { |
515 | 0 | static gboolean said_before = FALSE; |
516 | 0 | if (!said_before) |
517 | 0 | g_warning ("Too many alias levels for a locale, " |
518 | 0 | "may indicate a loop"); |
519 | 0 | said_before = TRUE; |
520 | 0 | return lang; |
521 | 0 | } |
522 | 0 | } |
523 | 0 | #endif |
524 | 0 | return lang; |
525 | 0 | } |
526 | | |
527 | | /* Mask for components of locale spec. The ordering here is from |
528 | | * least significant to most significant |
529 | | */ |
530 | | enum |
531 | | { |
532 | | COMPONENT_CODESET = 1 << 0, |
533 | | COMPONENT_TERRITORY = 1 << 1, |
534 | | COMPONENT_MODIFIER = 1 << 2 |
535 | | }; |
536 | | |
537 | | /* Break an X/Open style locale specification into components |
538 | | */ |
539 | | static guint |
540 | | explode_locale (const gchar *locale, |
541 | | gchar **language, |
542 | | gchar **territory, |
543 | | gchar **codeset, |
544 | | gchar **modifier) |
545 | 0 | { |
546 | 0 | const gchar *uscore_pos; |
547 | 0 | const gchar *at_pos; |
548 | 0 | const gchar *dot_pos; |
549 | |
|
550 | 0 | guint mask = 0; |
551 | |
|
552 | 0 | uscore_pos = strchr (locale, '_'); |
553 | 0 | dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.'); |
554 | 0 | at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@'); |
555 | |
|
556 | 0 | if (at_pos) |
557 | 0 | { |
558 | 0 | mask |= COMPONENT_MODIFIER; |
559 | 0 | *modifier = g_strdup (at_pos); |
560 | 0 | } |
561 | 0 | else |
562 | 0 | at_pos = locale + strlen (locale); |
563 | |
|
564 | 0 | if (dot_pos) |
565 | 0 | { |
566 | 0 | mask |= COMPONENT_CODESET; |
567 | 0 | *codeset = g_strndup (dot_pos, at_pos - dot_pos); |
568 | 0 | } |
569 | 0 | else |
570 | 0 | dot_pos = at_pos; |
571 | |
|
572 | 0 | if (uscore_pos) |
573 | 0 | { |
574 | 0 | mask |= COMPONENT_TERRITORY; |
575 | 0 | *territory = g_strndup (uscore_pos, dot_pos - uscore_pos); |
576 | 0 | } |
577 | 0 | else |
578 | 0 | uscore_pos = dot_pos; |
579 | |
|
580 | 0 | *language = g_strndup (locale, uscore_pos - locale); |
581 | |
|
582 | 0 | return mask; |
583 | 0 | } |
584 | | |
585 | | /* |
586 | | * Compute all interesting variants for a given locale name - |
587 | | * by stripping off different components of the value. |
588 | | * |
589 | | * For simplicity, we assume that the locale is in |
590 | | * X/Open format: language[_territory][.codeset][@modifier] |
591 | | * |
592 | | * TODO: Extend this to handle the CEN format (see the GNUlibc docs) |
593 | | * as well. We could just copy the code from glibc wholesale |
594 | | * but it is big, ugly, and complicated, so I'm reluctant |
595 | | * to do so when this should handle 99% of the time... |
596 | | */ |
597 | | static void |
598 | | append_locale_variants (GPtrArray *array, |
599 | | const gchar *locale) |
600 | 0 | { |
601 | 0 | gchar *language = NULL; |
602 | 0 | gchar *territory = NULL; |
603 | 0 | gchar *codeset = NULL; |
604 | 0 | gchar *modifier = NULL; |
605 | |
|
606 | 0 | guint mask; |
607 | 0 | guint i, j; |
608 | |
|
609 | 0 | g_return_if_fail (locale != NULL); |
610 | | |
611 | 0 | mask = explode_locale (locale, &language, &territory, &codeset, &modifier); |
612 | | |
613 | | /* Iterate through all possible combinations, from least attractive |
614 | | * to most attractive. |
615 | | */ |
616 | 0 | for (j = 0; j <= mask; ++j) |
617 | 0 | { |
618 | 0 | i = mask - j; |
619 | |
|
620 | 0 | if ((i & ~mask) == 0) |
621 | 0 | { |
622 | 0 | gchar *val = g_strconcat (language, |
623 | 0 | (i & COMPONENT_TERRITORY) ? territory : "", |
624 | 0 | (i & COMPONENT_CODESET) ? codeset : "", |
625 | 0 | (i & COMPONENT_MODIFIER) ? modifier : "", |
626 | 0 | NULL); |
627 | 0 | g_ptr_array_add (array, val); |
628 | 0 | } |
629 | 0 | } |
630 | |
|
631 | 0 | g_free (language); |
632 | 0 | if (mask & COMPONENT_CODESET) |
633 | 0 | g_free (codeset); |
634 | 0 | if (mask & COMPONENT_TERRITORY) |
635 | 0 | g_free (territory); |
636 | 0 | if (mask & COMPONENT_MODIFIER) |
637 | 0 | g_free (modifier); |
638 | 0 | } |
639 | | |
640 | | /** |
641 | | * g_get_locale_variants: |
642 | | * @locale: a locale identifier |
643 | | * |
644 | | * Returns a list of derived variants of @locale, which can be used to |
645 | | * e.g. construct locale-dependent filenames or search paths. The returned |
646 | | * list is sorted from most desirable to least desirable. |
647 | | * This function handles territory, charset and extra locale modifiers. See |
648 | | * [`setlocale(3)`](man:setlocale) for information about locales and their format. |
649 | | * |
650 | | * @locale itself is guaranteed to be returned in the output. |
651 | | * |
652 | | * For example, if @locale is `fr_BE`, then the returned list |
653 | | * is `fr_BE`, `fr`. If @locale is `en_GB.UTF-8@euro`, then the returned list |
654 | | * is `en_GB.UTF-8@euro`, `en_GB.UTF-8`, `en_GB@euro`, `en_GB`, `en.UTF-8@euro`, |
655 | | * `en.UTF-8`, `en@euro`, `en`. |
656 | | * |
657 | | * If you need the list of variants for the current locale, |
658 | | * use g_get_language_names(). |
659 | | * |
660 | | * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly |
661 | | * allocated array of newly allocated strings with the locale variants. Free with |
662 | | * g_strfreev(). |
663 | | * |
664 | | * Since: 2.28 |
665 | | */ |
666 | | gchar ** |
667 | | g_get_locale_variants (const gchar *locale) |
668 | 0 | { |
669 | 0 | GPtrArray *array; |
670 | |
|
671 | 0 | g_return_val_if_fail (locale != NULL, NULL); |
672 | | |
673 | 0 | array = g_ptr_array_sized_new (8); |
674 | 0 | append_locale_variants (array, locale); |
675 | 0 | g_ptr_array_add (array, NULL); |
676 | |
|
677 | 0 | return (gchar **) g_ptr_array_free (array, FALSE); |
678 | 0 | } |
679 | | |
680 | | /* The following is (partly) taken from the gettext package. |
681 | | Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. */ |
682 | | |
683 | | static const gchar * |
684 | | guess_category_value (const gchar *category_name) |
685 | 0 | { |
686 | 0 | const gchar *retval; |
687 | | |
688 | | /* The highest priority value is the 'LANGUAGE' environment |
689 | | variable. This is a GNU extension. */ |
690 | 0 | retval = g_getenv ("LANGUAGE"); |
691 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
692 | 0 | return retval; |
693 | | |
694 | | /* 'LANGUAGE' is not set. So we have to proceed with the POSIX |
695 | | methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'. On some |
696 | | systems this can be done by the 'setlocale' function itself. */ |
697 | | |
698 | | /* Setting of LC_ALL overwrites all other. */ |
699 | 0 | retval = g_getenv ("LC_ALL"); |
700 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
701 | 0 | return retval; |
702 | | |
703 | | /* Next comes the name of the desired category. */ |
704 | 0 | retval = g_getenv (category_name); |
705 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
706 | 0 | return retval; |
707 | | |
708 | | /* Last possibility is the LANG environment variable. */ |
709 | 0 | retval = g_getenv ("LANG"); |
710 | 0 | if ((retval != NULL) && (retval[0] != '\0')) |
711 | 0 | return retval; |
712 | | |
713 | | #ifdef G_PLATFORM_WIN32 |
714 | | /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and |
715 | | * LANG, which we already did above. Oh well. The main point of |
716 | | * calling g_win32_getlocale() is to get the thread's locale as used |
717 | | * by Windows and the Microsoft C runtime (in the "English_United |
718 | | * States" format) translated into the Unixish format. |
719 | | */ |
720 | | { |
721 | | char *locale = g_win32_getlocale (); |
722 | | retval = g_intern_string (locale); |
723 | | g_free (locale); |
724 | | return retval; |
725 | | } |
726 | | #endif |
727 | | |
728 | 0 | return NULL; |
729 | 0 | } |
730 | | |
731 | | typedef struct _GLanguageNamesCache GLanguageNamesCache; |
732 | | |
733 | | struct _GLanguageNamesCache { |
734 | | gchar *languages; |
735 | | gchar **language_names; |
736 | | }; |
737 | | |
738 | | static void |
739 | | language_names_cache_free (gpointer data) |
740 | 0 | { |
741 | 0 | GLanguageNamesCache *cache = data; |
742 | 0 | g_free (cache->languages); |
743 | 0 | g_strfreev (cache->language_names); |
744 | 0 | g_free (cache); |
745 | 0 | } |
746 | | |
747 | | /** |
748 | | * g_get_language_names: |
749 | | * |
750 | | * Computes a list of applicable locale names, which can be used to |
751 | | * e.g. construct locale-dependent filenames or search paths. The returned |
752 | | * list is sorted from most desirable to least desirable and always contains |
753 | | * the default locale "C". |
754 | | * |
755 | | * For example, if LANGUAGE=de:en_US, then the returned list is |
756 | | * "de", "en_US", "en", "C". |
757 | | * |
758 | | * This function consults the environment variables `LANGUAGE`, `LC_ALL`, |
759 | | * `LC_MESSAGES` and `LANG` to find the list of locales specified by the |
760 | | * user. |
761 | | * |
762 | | * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib |
763 | | * that must not be modified or freed. |
764 | | * |
765 | | * Since: 2.6 |
766 | | */ |
767 | | const gchar * const * |
768 | | g_get_language_names (void) |
769 | 0 | { |
770 | 0 | return g_get_language_names_with_category ("LC_MESSAGES"); |
771 | 0 | } |
772 | | |
773 | | /** |
774 | | * g_get_language_names_with_category: |
775 | | * @category_name: a locale category name |
776 | | * |
777 | | * Computes a list of applicable locale names with a locale category name, |
778 | | * which can be used to construct the fallback locale-dependent filenames |
779 | | * or search paths. The returned list is sorted from most desirable to |
780 | | * least desirable and always contains the default locale "C". |
781 | | * |
782 | | * This function consults the environment variables `LANGUAGE`, `LC_ALL`, |
783 | | * @category_name, and `LANG` to find the list of locales specified by the |
784 | | * user. |
785 | | * |
786 | | * g_get_language_names() returns g_get_language_names_with_category("LC_MESSAGES"). |
787 | | * |
788 | | * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by |
789 | | * the thread g_get_language_names_with_category was called from. |
790 | | * It must not be modified or freed. It must be copied if planned to be used in another thread. |
791 | | * |
792 | | * Since: 2.58 |
793 | | */ |
794 | | const gchar * const * |
795 | | g_get_language_names_with_category (const gchar *category_name) |
796 | 0 | { |
797 | 0 | static GPrivate cache_private = G_PRIVATE_INIT ((void (*)(gpointer)) g_hash_table_unref); |
798 | 0 | GHashTable *cache = g_private_get (&cache_private); |
799 | 0 | const gchar *languages; |
800 | 0 | GLanguageNamesCache *name_cache; |
801 | |
|
802 | 0 | g_return_val_if_fail (category_name != NULL, NULL); |
803 | | |
804 | 0 | if (!cache) |
805 | 0 | { |
806 | 0 | cache = g_hash_table_new_full (g_str_hash, g_str_equal, |
807 | 0 | g_free, language_names_cache_free); |
808 | 0 | g_private_set (&cache_private, cache); |
809 | 0 | } |
810 | |
|
811 | 0 | languages = guess_category_value (category_name); |
812 | 0 | if (!languages) |
813 | 0 | languages = "C"; |
814 | |
|
815 | 0 | name_cache = (GLanguageNamesCache *) g_hash_table_lookup (cache, category_name); |
816 | 0 | if (!(name_cache && name_cache->languages && |
817 | 0 | strcmp (name_cache->languages, languages) == 0)) |
818 | 0 | { |
819 | 0 | GPtrArray *array; |
820 | 0 | gchar **alist, **a; |
821 | |
|
822 | 0 | g_hash_table_remove (cache, category_name); |
823 | |
|
824 | 0 | array = g_ptr_array_sized_new (8); |
825 | |
|
826 | 0 | alist = g_strsplit (languages, ":", 0); |
827 | 0 | for (a = alist; *a; a++) |
828 | 0 | append_locale_variants (array, unalias_lang (*a)); |
829 | 0 | g_strfreev (alist); |
830 | 0 | g_ptr_array_add (array, g_strdup ("C")); |
831 | 0 | g_ptr_array_add (array, NULL); |
832 | |
|
833 | 0 | name_cache = g_new0 (GLanguageNamesCache, 1); |
834 | 0 | name_cache->languages = g_strdup (languages); |
835 | 0 | name_cache->language_names = (gchar **) g_ptr_array_free (array, FALSE); |
836 | 0 | g_hash_table_insert (cache, g_strdup (category_name), name_cache); |
837 | 0 | } |
838 | |
|
839 | 0 | return (const gchar * const *) name_cache->language_names; |
840 | 0 | } |