Line | Count | Source (jump to first uncovered line) |
1 | | /* toutf8.c --- Convert strings from system locale into UTF-8. |
2 | | Copyright (C) 2002-2024 Simon Josefsson |
3 | | |
4 | | This file is part of GNU Libidn. |
5 | | |
6 | | GNU Libidn is free software: you can redistribute it and/or |
7 | | modify it under the terms of either: |
8 | | |
9 | | * the GNU Lesser General Public License as published by the Free |
10 | | Software Foundation; either version 3 of the License, or (at |
11 | | your option) any later version. |
12 | | |
13 | | or |
14 | | |
15 | | * the GNU General Public License as published by the Free |
16 | | Software Foundation; either version 2 of the License, or (at |
17 | | your option) any later version. |
18 | | |
19 | | or both in parallel, as here. |
20 | | |
21 | | GNU Libidn is distributed in the hope that it will be useful, |
22 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
23 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
24 | | General Public License for more details. |
25 | | |
26 | | You should have received copies of the GNU General Public License and |
27 | | the GNU Lesser General Public License along with this program. If |
28 | | not, see <https://www.gnu.org/licenses/>. */ |
29 | | |
30 | | #ifdef HAVE_CONFIG_H |
31 | | # include "config.h" |
32 | | #endif |
33 | | |
34 | | /* Get prototypes. */ |
35 | | #include "stringprep.h" |
36 | | |
37 | | /* Get fprintf. */ |
38 | | #include <stdio.h> |
39 | | |
40 | | /* Get getenv. */ |
41 | | #include <stdlib.h> |
42 | | |
43 | | /* Get strlen. */ |
44 | | #include <string.h> |
45 | | |
46 | | /* Get iconv_string. */ |
47 | | #include "striconv.h" |
48 | | |
49 | | #ifdef _LIBC |
50 | | # define HAVE_ICONV 1 |
51 | | # define HAVE_LOCALE_H 1 |
52 | | # define HAVE_LANGINFO_CODESET 1 |
53 | | #endif |
54 | | |
55 | | #include <locale.h> |
56 | | |
57 | | #ifdef HAVE_LANGINFO_CODESET |
58 | | # include <langinfo.h> |
59 | | #endif |
60 | | |
61 | | #ifdef _LIBC |
62 | | # define stringprep_locale_charset() nl_langinfo (CODESET) |
63 | | #else |
64 | | /** |
65 | | * stringprep_locale_charset: |
66 | | * |
67 | | * Find out current locale charset. The function respect the CHARSET |
68 | | * environment variable, but typically uses nl_langinfo(CODESET) when |
69 | | * it is supported. It fall back on "ASCII" if CHARSET isn't set and |
70 | | * nl_langinfo isn't supported or return anything. |
71 | | * |
72 | | * Note that this function return the application's locale's preferred |
73 | | * charset (or thread's locale's preferred charset, if your system |
74 | | * support thread-specific locales). It does not return what the |
75 | | * system may be using. Thus, if you receive data from external |
76 | | * sources you cannot in general use this function to guess what |
77 | | * charset it is encoded in. Use stringprep_convert from the external |
78 | | * representation into the charset returned by this function, to have |
79 | | * data in the locale encoding. |
80 | | * |
81 | | * Return value: Return the character set used by the current locale. |
82 | | * It will never return NULL, but use "ASCII" as a fallback. |
83 | | **/ |
84 | | const char * |
85 | | stringprep_locale_charset (void) |
86 | 10.9k | { |
87 | 10.9k | const char *charset = getenv ("CHARSET"); /* flawfinder: ignore */ |
88 | | |
89 | 10.9k | if (charset && *charset) |
90 | 0 | return charset; |
91 | | |
92 | 10.9k | # ifdef HAVE_LANGINFO_CODESET |
93 | 10.9k | charset = nl_langinfo (CODESET); |
94 | | |
95 | 10.9k | if (charset && *charset) |
96 | 10.9k | return charset; |
97 | 0 | # endif |
98 | | |
99 | 0 | return "ASCII"; |
100 | 10.9k | } |
101 | | #endif |
102 | | |
103 | | /** |
104 | | * stringprep_convert: |
105 | | * @str: input zero-terminated string. |
106 | | * @to_codeset: name of destination character set. |
107 | | * @from_codeset: name of origin character set, as used by @str. |
108 | | * |
109 | | * Convert the string from one character set to another using the |
110 | | * system's iconv() function. |
111 | | * |
112 | | * Return value: Returns newly allocated zero-terminated string which |
113 | | * is @str transcoded into to_codeset. |
114 | | **/ |
115 | | char * |
116 | | stringprep_convert (const char *str, |
117 | | const char *to_codeset, const char *from_codeset) |
118 | 10.9k | { |
119 | 10.9k | #if HAVE_ICONV |
120 | 10.9k | return str_iconv (str, from_codeset, to_codeset); |
121 | | #else |
122 | | char *p; |
123 | | (void) to_codeset; |
124 | | (void) from_codeset; |
125 | | fprintf (stderr, "libidn: warning: libiconv not installed, cannot " |
126 | | "convert data to UTF-8\n"); |
127 | | p = malloc (strlen (str) + 1); |
128 | | if (!p) |
129 | | return NULL; |
130 | | return strcpy (p, str); |
131 | | #endif |
132 | 10.9k | } |
133 | | |
134 | | /** |
135 | | * stringprep_locale_to_utf8: |
136 | | * @str: input zero terminated string. |
137 | | * |
138 | | * Convert string encoded in the locale's character set into UTF-8 by |
139 | | * using stringprep_convert(). |
140 | | * |
141 | | * Return value: Returns newly allocated zero-terminated string which |
142 | | * is @str transcoded into UTF-8. |
143 | | **/ |
144 | | char * |
145 | | stringprep_locale_to_utf8 (const char *str) |
146 | 4.87k | { |
147 | 4.87k | return stringprep_convert (str, "UTF-8", stringprep_locale_charset ()); |
148 | 4.87k | } |
149 | | |
150 | | /** |
151 | | * stringprep_utf8_to_locale: |
152 | | * @str: input zero terminated string. |
153 | | * |
154 | | * Convert string encoded in UTF-8 into the locale's character set by |
155 | | * using stringprep_convert(). |
156 | | * |
157 | | * Return value: Returns newly allocated zero-terminated string which |
158 | | * is @str transcoded into the locale's character set. |
159 | | **/ |
160 | | char * |
161 | | stringprep_utf8_to_locale (const char *str) |
162 | 6.08k | { |
163 | 6.08k | return stringprep_convert (str, stringprep_locale_charset (), "UTF-8"); |
164 | 6.08k | } |