/src/rauc/subprojects/glib-2.76.5/glib/gcharset.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* gcharset.c - Charset information |
2 | | * |
3 | | * Copyright (C) 2011 Red Hat, Inc. |
4 | | * |
5 | | * SPDX-License-Identifier: LGPL-2.1-or-later |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * This library is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public |
18 | | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | |
23 | | #include "gcharset.h" |
24 | | #include "gcharsetprivate.h" |
25 | | |
26 | | #include "garray.h" |
27 | | #include "genviron.h" |
28 | | #include "ghash.h" |
29 | | #include "gmessages.h" |
30 | | #include "gstrfuncs.h" |
31 | | #include "gthread.h" |
32 | | #include "gthreadprivate.h" |
33 | | #ifdef G_OS_WIN32 |
34 | | #include "gwin32.h" |
35 | | #endif |
36 | | |
37 | | #include "libcharset/libcharset.h" |
38 | | |
39 | | #include <string.h> |
40 | | #include <stdio.h> |
41 | | |
42 | | #if (HAVE_LANGINFO_TIME_CODESET || HAVE_LANGINFO_CODESET) |
43 | | #include <langinfo.h> |
44 | | #endif |
45 | | |
46 | | #include <locale.h> |
47 | | #ifdef G_OS_WIN32 |
48 | | #define WIN32_LEAN_AND_MEAN |
49 | | #include <windows.h> |
50 | | #endif |
51 | | |
52 | | G_LOCK_DEFINE_STATIC (aliases); |
53 | | |
54 | | static GHashTable * |
55 | | get_alias_hash (void) |
56 | 0 | { |
57 | 0 | static GHashTable *alias_hash = NULL; |
58 | 0 | const char *aliases; |
59 | |
|
60 | 0 | G_LOCK (aliases); |
61 | |
|
62 | 0 | if (!alias_hash) |
63 | 0 | { |
64 | 0 | alias_hash = g_hash_table_new (g_str_hash, g_str_equal); |
65 | |
|
66 | 0 | aliases = _g_locale_get_charset_aliases (); |
67 | 0 | while (*aliases != '\0') |
68 | 0 | { |
69 | 0 | const char *canonical; |
70 | 0 | const char *alias; |
71 | 0 | const char **alias_array; |
72 | 0 | int count = 0; |
73 | |
|
74 | 0 | alias = aliases; |
75 | 0 | aliases += strlen (aliases) + 1; |
76 | 0 | canonical = aliases; |
77 | 0 | aliases += strlen (aliases) + 1; |
78 | |
|
79 | 0 | alias_array = g_hash_table_lookup (alias_hash, canonical); |
80 | 0 | if (alias_array) |
81 | 0 | { |
82 | 0 | while (alias_array[count]) |
83 | 0 | count++; |
84 | 0 | } |
85 | |
|
86 | 0 | alias_array = g_renew (const char *, alias_array, count + 2); |
87 | 0 | alias_array[count] = alias; |
88 | 0 | alias_array[count + 1] = NULL; |
89 | |
|
90 | 0 | g_hash_table_insert (alias_hash, (char *)canonical, alias_array); |
91 | 0 | } |
92 | 0 | } |
93 | |
|
94 | 0 | G_UNLOCK (aliases); |
95 | |
|
96 | 0 | return alias_hash; |
97 | 0 | } |
98 | | |
99 | | /* As an abuse of the alias table, the following routines gets |
100 | | * the charsets that are aliases for the canonical name. |
101 | | */ |
102 | | const char ** |
103 | | _g_charset_get_aliases (const char *canonical_name) |
104 | 0 | { |
105 | 0 | GHashTable *alias_hash = get_alias_hash (); |
106 | |
|
107 | 0 | return g_hash_table_lookup (alias_hash, canonical_name); |
108 | 0 | } |
109 | | |
110 | | static gboolean |
111 | | g_utf8_get_charset_internal (const char *raw_data, |
112 | | const char **a) |
113 | 1 | { |
114 | | /* Allow CHARSET to override the charset of any locale category. Users should |
115 | | * probably never be setting this — instead, just add the charset after a `.` |
116 | | * in `LANGUAGE`/`LC_ALL`/`LC_*`/`LANG`. I can’t find any reference (in |
117 | | * `git log`, code comments, or man pages) to this environment variable being |
118 | | * standardised or documented or even used anywhere outside GLib. Perhaps it |
119 | | * should eventually be removed. */ |
120 | 1 | const char *charset = g_getenv ("CHARSET"); |
121 | | |
122 | 1 | if (charset && *charset) |
123 | 0 | { |
124 | 0 | *a = charset; |
125 | |
|
126 | 0 | if (charset && strstr (charset, "UTF-8")) |
127 | 0 | return TRUE; |
128 | 0 | else |
129 | 0 | return FALSE; |
130 | 0 | } |
131 | | |
132 | | /* The libcharset code tries to be thread-safe without |
133 | | * a lock, but has a memory leak and a missing memory |
134 | | * barrier, so we lock for it |
135 | | */ |
136 | 1 | G_LOCK (aliases); |
137 | 1 | charset = _g_locale_charset_unalias (raw_data); |
138 | 1 | G_UNLOCK (aliases); |
139 | | |
140 | 1 | if (charset && *charset) |
141 | 1 | { |
142 | 1 | *a = charset; |
143 | | |
144 | 1 | if (charset && strstr (charset, "UTF-8")) |
145 | 0 | return TRUE; |
146 | 1 | else |
147 | 1 | return FALSE; |
148 | 1 | } |
149 | | |
150 | | /* Assume this for compatibility at present. */ |
151 | 0 | *a = "US-ASCII"; |
152 | |
|
153 | 0 | return FALSE; |
154 | 1 | } |
155 | | |
156 | | typedef struct _GCharsetCache GCharsetCache; |
157 | | |
158 | | struct _GCharsetCache { |
159 | | gboolean is_utf8; |
160 | | gchar *raw; |
161 | | gchar *charset; |
162 | | }; |
163 | | |
164 | | static void |
165 | | charset_cache_free (gpointer data) |
166 | 0 | { |
167 | 0 | GCharsetCache *cache = data; |
168 | 0 | g_free (cache->raw); |
169 | 0 | g_free (cache->charset); |
170 | 0 | g_free (cache); |
171 | 0 | } |
172 | | |
173 | | /** |
174 | | * g_get_charset: |
175 | | * @charset: (out) (optional) (transfer none): return location for character set |
176 | | * name, or %NULL. |
177 | | * |
178 | | * Obtains the character set for the [current locale][setlocale]; you |
179 | | * might use this character set as an argument to g_convert(), to convert |
180 | | * from the current locale's encoding to some other encoding. (Frequently |
181 | | * g_locale_to_utf8() and g_locale_from_utf8() are nice shortcuts, though.) |
182 | | * |
183 | | * On Windows the character set returned by this function is the |
184 | | * so-called system default ANSI code-page. That is the character set |
185 | | * used by the "narrow" versions of C library and Win32 functions that |
186 | | * handle file names. It might be different from the character set |
187 | | * used by the C library's current locale. |
188 | | * |
189 | | * On Linux, the character set is found by consulting nl_langinfo() if |
190 | | * available. If not, the environment variables `LC_ALL`, `LC_CTYPE`, `LANG` |
191 | | * and `CHARSET` are queried in order. nl_langinfo() returns the C locale if |
192 | | * no locale has been loaded by setlocale(). |
193 | | * |
194 | | * The return value is %TRUE if the locale's encoding is UTF-8, in that |
195 | | * case you can perhaps avoid calling g_convert(). |
196 | | * |
197 | | * The string returned in @charset is not allocated, and should not be |
198 | | * freed. |
199 | | * |
200 | | * Returns: %TRUE if the returned charset is UTF-8 |
201 | | */ |
202 | | gboolean |
203 | | g_get_charset (const char **charset) |
204 | 6 | { |
205 | 6 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
206 | 6 | GCharsetCache *cache = g_private_get (&cache_private); |
207 | 6 | const gchar *raw; |
208 | | |
209 | 6 | if (!cache) |
210 | 1 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
211 | | |
212 | 6 | G_LOCK (aliases); |
213 | 6 | raw = _g_locale_charset_raw (); |
214 | 6 | G_UNLOCK (aliases); |
215 | | |
216 | 6 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
217 | 1 | { |
218 | 1 | const gchar *new_charset; |
219 | | |
220 | 1 | g_free (cache->raw); |
221 | 1 | g_free (cache->charset); |
222 | 1 | cache->raw = g_strdup (raw); |
223 | 1 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
224 | 1 | cache->charset = g_strdup (new_charset); |
225 | 1 | } |
226 | | |
227 | 6 | if (charset) |
228 | 4 | *charset = cache->charset; |
229 | | |
230 | 6 | return cache->is_utf8; |
231 | 6 | } |
232 | | |
233 | | /* |
234 | | * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to |
235 | | * LC_TIME) to correctly check for charset about time conversion relatives. |
236 | | * |
237 | | * Returns: %TRUE if the returned charset is UTF-8 |
238 | | */ |
239 | | gboolean |
240 | | _g_get_time_charset (const char **charset) |
241 | 0 | { |
242 | 0 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
243 | 0 | GCharsetCache *cache = g_private_get (&cache_private); |
244 | 0 | const gchar *raw; |
245 | |
|
246 | 0 | if (!cache) |
247 | 0 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
248 | |
|
249 | 0 | #ifdef HAVE_LANGINFO_TIME_CODESET |
250 | 0 | raw = nl_langinfo (_NL_TIME_CODESET); |
251 | | #else |
252 | | G_LOCK (aliases); |
253 | | raw = _g_locale_charset_raw (); |
254 | | G_UNLOCK (aliases); |
255 | | #endif |
256 | |
|
257 | 0 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
258 | 0 | { |
259 | 0 | const gchar *new_charset; |
260 | |
|
261 | 0 | g_free (cache->raw); |
262 | 0 | g_free (cache->charset); |
263 | 0 | cache->raw = g_strdup (raw); |
264 | 0 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
265 | 0 | cache->charset = g_strdup (new_charset); |
266 | 0 | } |
267 | |
|
268 | 0 | if (charset) |
269 | 0 | *charset = cache->charset; |
270 | |
|
271 | 0 | return cache->is_utf8; |
272 | 0 | } |
273 | | /* |
274 | | * Do the same as g_get_charset() but it temporarily set locale (LC_ALL to |
275 | | * LC_CTYPE) to correctly check for charset about CTYPE conversion relatives. |
276 | | * |
277 | | * Returns: %TRUE if the returned charset is UTF-8 |
278 | | */ |
279 | | gboolean |
280 | | _g_get_ctype_charset (const char **charset) |
281 | 0 | { |
282 | 0 | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
283 | 0 | GCharsetCache *cache = g_private_get (&cache_private); |
284 | 0 | const gchar *raw; |
285 | |
|
286 | 0 | if (!cache) |
287 | 0 | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
288 | |
|
289 | 0 | #ifdef HAVE_LANGINFO_CODESET |
290 | 0 | raw = nl_langinfo (CODESET); |
291 | | #else |
292 | | G_LOCK (aliases); |
293 | | raw = _g_locale_charset_raw (); |
294 | | G_UNLOCK (aliases); |
295 | | #endif |
296 | |
|
297 | 0 | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
298 | 0 | { |
299 | 0 | const gchar *new_charset; |
300 | |
|
301 | 0 | g_free (cache->raw); |
302 | 0 | g_free (cache->charset); |
303 | 0 | cache->raw = g_strdup (raw); |
304 | 0 | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
305 | 0 | cache->charset = g_strdup (new_charset); |
306 | 0 | } |
307 | |
|
308 | 0 | if (charset) |
309 | 0 | *charset = cache->charset; |
310 | |
|
311 | 0 | return cache->is_utf8; |
312 | 0 | } |
313 | | |
314 | | /** |
315 | | * g_get_codeset: |
316 | | * |
317 | | * Gets the character set for the current locale. |
318 | | * |
319 | | * Returns: a newly allocated string containing the name |
320 | | * of the character set. This string must be freed with g_free(). |
321 | | */ |
322 | | gchar * |
323 | | g_get_codeset (void) |
324 | 0 | { |
325 | 0 | const gchar *charset; |
326 | |
|
327 | 0 | g_get_charset (&charset); |
328 | |
|
329 | 0 | return g_strdup (charset); |
330 | 0 | } |
331 | | |
332 | | /** |
333 | | * g_get_console_charset: |
334 | | * @charset: (out) (optional) (transfer none): return location for character set |
335 | | * name, or %NULL. |
336 | | * |
337 | | * Obtains the character set used by the console attached to the process, |
338 | | * which is suitable for printing output to the terminal. |
339 | | * |
340 | | * Usually this matches the result returned by g_get_charset(), but in |
341 | | * environments where the locale's character set does not match the encoding |
342 | | * of the console this function tries to guess a more suitable value instead. |
343 | | * |
344 | | * On Windows the character set returned by this function is the |
345 | | * output code page used by the console associated with the calling process. |
346 | | * If the codepage can't be determined (for example because there is no |
347 | | * console attached) UTF-8 is assumed. |
348 | | * |
349 | | * The return value is %TRUE if the locale's encoding is UTF-8, in that |
350 | | * case you can perhaps avoid calling g_convert(). |
351 | | * |
352 | | * The string returned in @charset is not allocated, and should not be |
353 | | * freed. |
354 | | * |
355 | | * Returns: %TRUE if the returned charset is UTF-8 |
356 | | * |
357 | | * Since: 2.62 |
358 | | */ |
359 | | gboolean |
360 | | g_get_console_charset (const char **charset) |
361 | 2 | { |
362 | | #ifdef G_OS_WIN32 |
363 | | static GPrivate cache_private = G_PRIVATE_INIT (charset_cache_free); |
364 | | GCharsetCache *cache = g_private_get (&cache_private); |
365 | | const gchar *locale; |
366 | | unsigned int cp; |
367 | | char buf[2 + 20 + 1]; /* "CP" + G_MAXUINT64 (to be safe) in decimal form (20 bytes) + "\0" */ |
368 | | const gchar *raw = NULL; |
369 | | |
370 | | if (!cache) |
371 | | cache = g_private_set_alloc0 (&cache_private, sizeof (GCharsetCache)); |
372 | | |
373 | | /* first try to query $LANG (works for Cygwin/MSYS/MSYS2 and others using mintty) */ |
374 | | locale = g_getenv ("LANG"); |
375 | | if (locale != NULL && locale[0] != '\0') |
376 | | { |
377 | | /* If the locale name contains an encoding after the dot, return it. */ |
378 | | const char *dot = strchr (locale, '.'); |
379 | | |
380 | | if (dot != NULL) |
381 | | { |
382 | | const char *modifier; |
383 | | |
384 | | dot++; |
385 | | /* Look for the possible @... trailer and remove it, if any. */ |
386 | | modifier = strchr (dot, '@'); |
387 | | if (modifier == NULL) |
388 | | raw = dot; |
389 | | else if ((gsize) (modifier - dot) < sizeof (buf)) |
390 | | { |
391 | | memcpy (buf, dot, modifier - dot); |
392 | | buf[modifier - dot] = '\0'; |
393 | | raw = buf; |
394 | | } |
395 | | } |
396 | | } |
397 | | /* next try querying console codepage using native win32 API */ |
398 | | if (raw == NULL) |
399 | | { |
400 | | cp = GetConsoleOutputCP (); |
401 | | if (cp) |
402 | | { |
403 | | sprintf (buf, "CP%u", cp); |
404 | | raw = buf; |
405 | | } |
406 | | else if (GetLastError () != ERROR_INVALID_HANDLE) |
407 | | { |
408 | | gchar *emsg = g_win32_error_message (GetLastError ()); |
409 | | g_warning ("Failed to determine console output code page: %s. " |
410 | | "Falling back to UTF-8", emsg); |
411 | | g_free (emsg); |
412 | | } |
413 | | } |
414 | | /* fall-back to UTF-8 if the rest failed (it's a universal default) */ |
415 | | if (raw == NULL) |
416 | | raw = "UTF-8"; |
417 | | |
418 | | if (cache->raw == NULL || strcmp (cache->raw, raw) != 0) |
419 | | { |
420 | | const gchar *new_charset; |
421 | | |
422 | | g_free (cache->raw); |
423 | | g_free (cache->charset); |
424 | | cache->raw = g_strdup (raw); |
425 | | cache->is_utf8 = g_utf8_get_charset_internal (raw, &new_charset); |
426 | | cache->charset = g_strdup (new_charset); |
427 | | } |
428 | | |
429 | | if (charset) |
430 | | *charset = cache->charset; |
431 | | |
432 | | return cache->is_utf8; |
433 | | #else |
434 | | /* assume the locale settings match the console encoding on non-Windows OSs */ |
435 | 2 | return g_get_charset (charset); |
436 | 2 | #endif |
437 | 2 | } |
438 | | |
439 | | #ifndef G_OS_WIN32 |
440 | | |
441 | | /* read an alias file for the locales */ |
442 | | static void |
443 | | read_aliases (const gchar *file, |
444 | | GHashTable *alias_table) |
445 | 2 | { |
446 | 2 | FILE *fp; |
447 | 2 | char buf[256]; |
448 | | |
449 | 2 | fp = fopen (file, "re"); |
450 | 2 | if (!fp) |
451 | 2 | return; |
452 | 0 | while (fgets (buf, 256, fp)) |
453 | 0 | { |
454 | 0 | char *p, *q; |
455 | |
|
456 | 0 | g_strstrip (buf); |
457 | | |
458 | | /* Line is a comment */ |
459 | 0 | if ((buf[0] == '#') || (buf[0] == '\0')) |
460 | 0 | continue; |
461 | | |
462 | | /* Reads first column */ |
463 | 0 | for (p = buf, q = NULL; *p; p++) { |
464 | 0 | if ((*p == '\t') || (*p == ' ') || (*p == ':')) { |
465 | 0 | *p = '\0'; |
466 | 0 | q = p+1; |
467 | 0 | while ((*q == '\t') || (*q == ' ')) { |
468 | 0 | q++; |
469 | 0 | } |
470 | 0 | break; |
471 | 0 | } |
472 | 0 | } |
473 | | /* The line only had one column */ |
474 | 0 | if (!q || *q == '\0') |
475 | 0 | continue; |
476 | | |
477 | | /* Read second column */ |
478 | 0 | for (p = q; *p; p++) { |
479 | 0 | if ((*p == '\t') || (*p == ' ')) { |
480 | 0 | *p = '\0'; |
481 | 0 | break; |
482 | 0 | } |
483 | 0 | } |
484 | | |
485 | | /* Add to alias table if necessary */ |
486 | 0 | if (!g_hash_table_lookup (alias_table, buf)) { |
487 | 0 | g_hash_table_insert (alias_table, g_strdup (buf), g_strdup (q)); |
488 | 0 | } |
489 | 0 | } |
490 | 0 | fclose (fp); |
491 | 0 | } |
492 | | |
493 | | #endif |
494 | | |
495 | | static char * |
496 | | unalias_lang (char *lang) |
497 | 2 | { |
498 | 2 | #ifndef G_OS_WIN32 |
499 | 2 | static GHashTable *alias_table = NULL; |
500 | 2 | char *p; |
501 | 2 | int i; |
502 | | |
503 | 2 | if (g_once_init_enter (&alias_table)) |
504 | 2 | { |
505 | 2 | GHashTable *table = g_hash_table_new (g_str_hash, g_str_equal); |
506 | 2 | read_aliases ("/usr/share/locale/locale.alias", table); |
507 | 2 | g_once_init_leave (&alias_table, table); |
508 | 2 | } |
509 | | |
510 | 2 | i = 0; |
511 | 2 | while ((p = g_hash_table_lookup (alias_table, lang)) && (strcmp (p, lang) != 0)) |
512 | 0 | { |
513 | 0 | lang = p; |
514 | 0 | if (i++ == 30) |
515 | 0 | { |
516 | 0 | static gboolean said_before = FALSE; |
517 | 0 | if (!said_before) |
518 | 0 | g_warning ("Too many alias levels for a locale, " |
519 | 0 | "may indicate a loop"); |
520 | 0 | said_before = TRUE; |
521 | 0 | return lang; |
522 | 0 | } |
523 | 0 | } |
524 | 2 | #endif |
525 | 2 | return lang; |
526 | 2 | } |
527 | | |
528 | | /* Mask for components of locale spec. The ordering here is from |
529 | | * least significant to most significant |
530 | | */ |
531 | | enum |
532 | | { |
533 | | COMPONENT_CODESET = 1 << 0, |
534 | | COMPONENT_TERRITORY = 1 << 1, |
535 | | COMPONENT_MODIFIER = 1 << 2 |
536 | | }; |
537 | | |
538 | | /* Break an X/Open style locale specification into components |
539 | | */ |
540 | | static guint |
541 | | explode_locale (const gchar *locale, |
542 | | gchar **language, |
543 | | gchar **territory, |
544 | | gchar **codeset, |
545 | | gchar **modifier) |
546 | 2 | { |
547 | 2 | const gchar *uscore_pos; |
548 | 2 | const gchar *at_pos; |
549 | 2 | const gchar *dot_pos; |
550 | | |
551 | 2 | guint mask = 0; |
552 | | |
553 | 2 | uscore_pos = strchr (locale, '_'); |
554 | 2 | dot_pos = strchr (uscore_pos ? uscore_pos : locale, '.'); |
555 | 2 | at_pos = strchr (dot_pos ? dot_pos : (uscore_pos ? uscore_pos : locale), '@'); |
556 | | |
557 | 2 | if (at_pos) |
558 | 0 | { |
559 | 0 | mask |= COMPONENT_MODIFIER; |
560 | 0 | *modifier = g_strdup (at_pos); |
561 | 0 | } |
562 | 2 | else |
563 | 2 | at_pos = locale + strlen (locale); |
564 | | |
565 | 2 | if (dot_pos) |
566 | 0 | { |
567 | 0 | mask |= COMPONENT_CODESET; |
568 | 0 | *codeset = g_strndup (dot_pos, at_pos - dot_pos); |
569 | 0 | } |
570 | 2 | else |
571 | 2 | dot_pos = at_pos; |
572 | | |
573 | 2 | if (uscore_pos) |
574 | 0 | { |
575 | 0 | mask |= COMPONENT_TERRITORY; |
576 | 0 | *territory = g_strndup (uscore_pos, dot_pos - uscore_pos); |
577 | 0 | } |
578 | 2 | else |
579 | 2 | uscore_pos = dot_pos; |
580 | | |
581 | 2 | *language = g_strndup (locale, uscore_pos - locale); |
582 | | |
583 | 2 | return mask; |
584 | 2 | } |
585 | | |
586 | | /* |
587 | | * Compute all interesting variants for a given locale name - |
588 | | * by stripping off different components of the value. |
589 | | * |
590 | | * For simplicity, we assume that the locale is in |
591 | | * X/Open format: language[_territory][.codeset][@modifier] |
592 | | * |
593 | | * TODO: Extend this to handle the CEN format (see the GNUlibc docs) |
594 | | * as well. We could just copy the code from glibc wholesale |
595 | | * but it is big, ugly, and complicated, so I'm reluctant |
596 | | * to do so when this should handle 99% of the time... |
597 | | */ |
598 | | static void |
599 | | append_locale_variants (GPtrArray *array, |
600 | | const gchar *locale) |
601 | 2 | { |
602 | 2 | gchar *language = NULL; |
603 | 2 | gchar *territory = NULL; |
604 | 2 | gchar *codeset = NULL; |
605 | 2 | gchar *modifier = NULL; |
606 | | |
607 | 2 | guint mask; |
608 | 2 | guint i, j; |
609 | | |
610 | 2 | g_return_if_fail (locale != NULL); |
611 | | |
612 | 2 | mask = explode_locale (locale, &language, &territory, &codeset, &modifier); |
613 | | |
614 | | /* Iterate through all possible combinations, from least attractive |
615 | | * to most attractive. |
616 | | */ |
617 | 4 | for (j = 0; j <= mask; ++j) |
618 | 2 | { |
619 | 2 | i = mask - j; |
620 | | |
621 | 2 | if ((i & ~mask) == 0) |
622 | 2 | { |
623 | 2 | gchar *val = g_strconcat (language, |
624 | 2 | (i & COMPONENT_TERRITORY) ? territory : "", |
625 | 2 | (i & COMPONENT_CODESET) ? codeset : "", |
626 | 2 | (i & COMPONENT_MODIFIER) ? modifier : "", |
627 | 2 | NULL); |
628 | 2 | g_ptr_array_add (array, val); |
629 | 2 | } |
630 | 2 | } |
631 | | |
632 | 2 | g_free (language); |
633 | 2 | if (mask & COMPONENT_CODESET) |
634 | 0 | g_free (codeset); |
635 | 2 | if (mask & COMPONENT_TERRITORY) |
636 | 0 | g_free (territory); |
637 | 2 | if (mask & COMPONENT_MODIFIER) |
638 | 0 | g_free (modifier); |
639 | 2 | } |
640 | | |
641 | | /** |
642 | | * g_get_locale_variants: |
643 | | * @locale: a locale identifier |
644 | | * |
645 | | * Returns a list of derived variants of @locale, which can be used to |
646 | | * e.g. construct locale-dependent filenames or search paths. The returned |
647 | | * list is sorted from most desirable to least desirable. |
648 | | * This function handles territory, charset and extra locale modifiers. See |
649 | | * [`setlocale(3)`](man:setlocale) for information about locales and their format. |
650 | | * |
651 | | * @locale itself is guaranteed to be returned in the output. |
652 | | * |
653 | | * For example, if @locale is `fr_BE`, then the returned list |
654 | | * is `fr_BE`, `fr`. If @locale is `en_GB.UTF-8@euro`, then the returned list |
655 | | * is `en_GB.UTF-8@euro`, `en_GB.UTF-8`, `en_GB@euro`, `en_GB`, `en.UTF-8@euro`, |
656 | | * `en.UTF-8`, `en@euro`, `en`. |
657 | | * |
658 | | * If you need the list of variants for the current locale, |
659 | | * use g_get_language_names(). |
660 | | * |
661 | | * Returns: (transfer full) (array zero-terminated=1) (element-type utf8): a newly |
662 | | * allocated array of newly allocated strings with the locale variants. Free with |
663 | | * g_strfreev(). |
664 | | * |
665 | | * Since: 2.28 |
666 | | */ |
667 | | gchar ** |
668 | | g_get_locale_variants (const gchar *locale) |
669 | 0 | { |
670 | 0 | GPtrArray *array; |
671 | |
|
672 | 0 | g_return_val_if_fail (locale != NULL, NULL); |
673 | | |
674 | 0 | array = g_ptr_array_sized_new (8); |
675 | 0 | append_locale_variants (array, locale); |
676 | 0 | g_ptr_array_add (array, NULL); |
677 | |
|
678 | 0 | return (gchar **) g_ptr_array_free (array, FALSE); |
679 | 0 | } |
680 | | |
681 | | /* The following is (partly) taken from the gettext package. |
682 | | Copyright (C) 1995, 1996, 1997, 1998 Free Software Foundation, Inc. */ |
683 | | |
684 | | static const gchar * |
685 | | guess_category_value (const gchar *category_name) |
686 | 305 | { |
687 | 305 | const gchar *retval; |
688 | | |
689 | | /* The highest priority value is the 'LANGUAGE' environment |
690 | | variable. This is a GNU extension. */ |
691 | 305 | retval = g_getenv ("LANGUAGE"); |
692 | 305 | if ((retval != NULL) && (retval[0] != '\0')) |
693 | 0 | return retval; |
694 | | |
695 | | /* 'LANGUAGE' is not set. So we have to proceed with the POSIX |
696 | | methods of looking to 'LC_ALL', 'LC_xxx', and 'LANG'. On some |
697 | | systems this can be done by the 'setlocale' function itself. */ |
698 | | |
699 | | /* Setting of LC_ALL overwrites all other. */ |
700 | 305 | retval = g_getenv ("LC_ALL"); |
701 | 305 | if ((retval != NULL) && (retval[0] != '\0')) |
702 | 0 | return retval; |
703 | | |
704 | | /* Next comes the name of the desired category. */ |
705 | 305 | retval = g_getenv (category_name); |
706 | 305 | if ((retval != NULL) && (retval[0] != '\0')) |
707 | 0 | return retval; |
708 | | |
709 | | /* Last possibility is the LANG environment variable. */ |
710 | 305 | retval = g_getenv ("LANG"); |
711 | 305 | if ((retval != NULL) && (retval[0] != '\0')) |
712 | 0 | return retval; |
713 | | |
714 | | #ifdef G_PLATFORM_WIN32 |
715 | | /* g_win32_getlocale() first checks for LC_ALL, LC_MESSAGES and |
716 | | * LANG, which we already did above. Oh well. The main point of |
717 | | * calling g_win32_getlocale() is to get the thread's locale as used |
718 | | * by Windows and the Microsoft C runtime (in the "English_United |
719 | | * States" format) translated into the Unixish format. |
720 | | */ |
721 | | { |
722 | | char *locale = g_win32_getlocale (); |
723 | | retval = g_intern_string (locale); |
724 | | g_free (locale); |
725 | | return retval; |
726 | | } |
727 | | #endif |
728 | | |
729 | 305 | return NULL; |
730 | 305 | } |
731 | | |
732 | | typedef struct _GLanguageNamesCache GLanguageNamesCache; |
733 | | |
734 | | struct _GLanguageNamesCache { |
735 | | gchar *languages; |
736 | | gchar **language_names; |
737 | | }; |
738 | | |
739 | | static void |
740 | | language_names_cache_free (gpointer data) |
741 | 0 | { |
742 | 0 | GLanguageNamesCache *cache = data; |
743 | 0 | g_free (cache->languages); |
744 | 0 | g_strfreev (cache->language_names); |
745 | 0 | g_free (cache); |
746 | 0 | } |
747 | | |
748 | | /** |
749 | | * g_get_language_names: |
750 | | * |
751 | | * Computes a list of applicable locale names, which can be used to |
752 | | * e.g. construct locale-dependent filenames or search paths. The returned |
753 | | * list is sorted from most desirable to least desirable and always contains |
754 | | * the default locale "C". |
755 | | * |
756 | | * For example, if LANGUAGE=de:en_US, then the returned list is |
757 | | * "de", "en_US", "en", "C". |
758 | | * |
759 | | * This function consults the environment variables `LANGUAGE`, `LC_ALL`, |
760 | | * `LC_MESSAGES` and `LANG` to find the list of locales specified by the |
761 | | * user. |
762 | | * |
763 | | * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by GLib |
764 | | * that must not be modified or freed. |
765 | | * |
766 | | * Since: 2.6 |
767 | | */ |
768 | | const gchar * const * |
769 | | g_get_language_names (void) |
770 | 305 | { |
771 | 305 | return g_get_language_names_with_category ("LC_MESSAGES"); |
772 | 305 | } |
773 | | |
774 | | /** |
775 | | * g_get_language_names_with_category: |
776 | | * @category_name: a locale category name |
777 | | * |
778 | | * Computes a list of applicable locale names with a locale category name, |
779 | | * which can be used to construct the fallback locale-dependent filenames |
780 | | * or search paths. The returned list is sorted from most desirable to |
781 | | * least desirable and always contains the default locale "C". |
782 | | * |
783 | | * This function consults the environment variables `LANGUAGE`, `LC_ALL`, |
784 | | * @category_name, and `LANG` to find the list of locales specified by the |
785 | | * user. |
786 | | * |
787 | | * g_get_language_names() returns g_get_language_names_with_category("LC_MESSAGES"). |
788 | | * |
789 | | * Returns: (array zero-terminated=1) (transfer none): a %NULL-terminated array of strings owned by |
790 | | * the thread g_get_language_names_with_category was called from. |
791 | | * It must not be modified or freed. It must be copied if planned to be used in another thread. |
792 | | * |
793 | | * Since: 2.58 |
794 | | */ |
795 | | const gchar * const * |
796 | | g_get_language_names_with_category (const gchar *category_name) |
797 | 305 | { |
798 | 305 | static GPrivate cache_private = G_PRIVATE_INIT ((void (*)(gpointer)) g_hash_table_unref); |
799 | 305 | GHashTable *cache = g_private_get (&cache_private); |
800 | 305 | const gchar *languages; |
801 | 305 | GLanguageNamesCache *name_cache; |
802 | | |
803 | 305 | g_return_val_if_fail (category_name != NULL, NULL); |
804 | | |
805 | 305 | if (!cache) |
806 | 2 | { |
807 | 2 | cache = g_hash_table_new_full (g_str_hash, g_str_equal, |
808 | 2 | g_free, language_names_cache_free); |
809 | 2 | g_private_set (&cache_private, cache); |
810 | 2 | } |
811 | | |
812 | 305 | languages = guess_category_value (category_name); |
813 | 305 | if (!languages) |
814 | 305 | languages = "C"; |
815 | | |
816 | 305 | name_cache = (GLanguageNamesCache *) g_hash_table_lookup (cache, category_name); |
817 | 305 | if (!(name_cache && name_cache->languages && |
818 | 305 | strcmp (name_cache->languages, languages) == 0)) |
819 | 2 | { |
820 | 2 | GPtrArray *array; |
821 | 2 | gchar **alist, **a; |
822 | | |
823 | 2 | g_hash_table_remove (cache, category_name); |
824 | | |
825 | 2 | array = g_ptr_array_sized_new (8); |
826 | | |
827 | 2 | alist = g_strsplit (languages, ":", 0); |
828 | 4 | for (a = alist; *a; a++) |
829 | 2 | append_locale_variants (array, unalias_lang (*a)); |
830 | 2 | g_strfreev (alist); |
831 | 2 | g_ptr_array_add (array, g_strdup ("C")); |
832 | 2 | g_ptr_array_add (array, NULL); |
833 | | |
834 | 2 | name_cache = g_new0 (GLanguageNamesCache, 1); |
835 | 2 | name_cache->languages = g_strdup (languages); |
836 | 2 | name_cache->language_names = (gchar **) g_ptr_array_free (array, FALSE); |
837 | 2 | g_hash_table_insert (cache, g_strdup (category_name), name_cache); |
838 | 2 | } |
839 | | |
840 | 305 | return (const gchar * const *) name_cache->language_names; |
841 | 305 | } |