Line | Count | Source |
1 | | /*************************************************************************** |
2 | | * _ _ ____ _ |
3 | | * Project ___| | | | _ \| | |
4 | | * / __| | | | |_) | | |
5 | | * | (__| |_| | _ <| |___ |
6 | | * \___|\___/|_| \_\_____| |
7 | | * |
8 | | * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. |
9 | | * |
10 | | * This software is licensed as described in the file COPYING, which |
11 | | * you should have received as part of this distribution. The terms |
12 | | * are also available at https://curl.se/docs/copyright.html. |
13 | | * |
14 | | * You may opt to use, copy, modify, merge, publish, distribute and/or sell |
15 | | * copies of the Software, and permit persons to whom the Software is |
16 | | * furnished to do so, under the terms of the COPYING file. |
17 | | * |
18 | | * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY |
19 | | * KIND, either express or implied. |
20 | | * |
21 | | * SPDX-License-Identifier: curl |
22 | | * |
23 | | ***************************************************************************/ |
24 | | |
25 | | /* |
26 | | * IDN conversions |
27 | | */ |
28 | | |
29 | | #include "curl_setup.h" |
30 | | #include "urldata.h" |
31 | | #include "idn.h" |
32 | | #include "sendf.h" |
33 | | #include "curlx/warnless.h" |
34 | | |
35 | | #ifdef USE_LIBIDN2 |
36 | | #include <idn2.h> |
37 | | |
38 | | #if defined(_WIN32) && defined(UNICODE) |
39 | | #define IDN2_LOOKUP(name, host, flags) \ |
40 | | idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags) |
41 | | #else |
42 | | #define IDN2_LOOKUP(name, host, flags) \ |
43 | 0 | idn2_lookup_ul((const char *)name, (char **)host, flags) |
44 | | #endif |
45 | | #endif /* USE_LIBIDN2 */ |
46 | | |
47 | | /* for macOS and iOS targets */ |
48 | | #ifdef USE_APPLE_IDN |
49 | | #include <unicode/uidna.h> |
50 | | #include <iconv.h> |
51 | | #include <langinfo.h> |
52 | | |
53 | | #define MAX_HOST_LENGTH 512 |
54 | | |
55 | | static CURLcode iconv_to_utf8(const char *in, size_t inlen, |
56 | | char **out, size_t *outlen) |
57 | | { |
58 | | iconv_t cd = iconv_open("UTF-8", nl_langinfo(CODESET)); |
59 | | if(cd != (iconv_t)-1) { |
60 | | size_t iconv_outlen = *outlen; |
61 | | char *iconv_in = (char *)CURL_UNCONST(in); |
62 | | size_t iconv_inlen = inlen; |
63 | | size_t iconv_result = iconv(cd, &iconv_in, &iconv_inlen, |
64 | | out, &iconv_outlen); |
65 | | *outlen -= iconv_outlen; |
66 | | iconv_close(cd); |
67 | | if(iconv_result == (size_t)-1) { |
68 | | /* !checksrc! disable ERRNOVAR 1 */ |
69 | | if(errno == ENOMEM) |
70 | | return CURLE_OUT_OF_MEMORY; |
71 | | else |
72 | | return CURLE_URL_MALFORMAT; |
73 | | } |
74 | | |
75 | | return CURLE_OK; |
76 | | } |
77 | | else { |
78 | | /* !checksrc! disable ERRNOVAR 1 */ |
79 | | if(errno == ENOMEM) |
80 | | return CURLE_OUT_OF_MEMORY; |
81 | | else |
82 | | return CURLE_FAILED_INIT; |
83 | | } |
84 | | } |
85 | | |
86 | | static CURLcode mac_idn_to_ascii(const char *in, char **out) |
87 | | { |
88 | | size_t inlen = strlen(in); |
89 | | if(inlen < MAX_HOST_LENGTH) { |
90 | | char iconv_buffer[MAX_HOST_LENGTH] = {0}; |
91 | | char *iconv_outptr = iconv_buffer; |
92 | | size_t iconv_outlen = sizeof(iconv_buffer); |
93 | | CURLcode iconv_result = iconv_to_utf8(in, inlen, |
94 | | &iconv_outptr, &iconv_outlen); |
95 | | if(!iconv_result) { |
96 | | UErrorCode err = U_ZERO_ERROR; |
97 | | UIDNA* idna = uidna_openUTS46( |
98 | | UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_ASCII, &err); |
99 | | if(!U_FAILURE(err)) { |
100 | | UIDNAInfo info = UIDNA_INFO_INITIALIZER; |
101 | | char buffer[MAX_HOST_LENGTH] = {0}; |
102 | | (void)uidna_nameToASCII_UTF8(idna, iconv_buffer, (int)iconv_outlen, |
103 | | buffer, sizeof(buffer) - 1, &info, &err); |
104 | | uidna_close(idna); |
105 | | if(!U_FAILURE(err) && !info.errors) { |
106 | | *out = curlx_strdup(buffer); |
107 | | if(*out) |
108 | | return CURLE_OK; |
109 | | else |
110 | | return CURLE_OUT_OF_MEMORY; |
111 | | } |
112 | | } |
113 | | } |
114 | | else |
115 | | return iconv_result; |
116 | | } |
117 | | return CURLE_URL_MALFORMAT; |
118 | | } |
119 | | |
120 | | static CURLcode mac_ascii_to_idn(const char *in, char **out) |
121 | | { |
122 | | size_t inlen = strlen(in); |
123 | | if(inlen < MAX_HOST_LENGTH) { |
124 | | UErrorCode err = U_ZERO_ERROR; |
125 | | UIDNA* idna = uidna_openUTS46( |
126 | | UIDNA_CHECK_BIDI|UIDNA_NONTRANSITIONAL_TO_UNICODE, &err); |
127 | | if(!U_FAILURE(err)) { |
128 | | UIDNAInfo info = UIDNA_INFO_INITIALIZER; |
129 | | char buffer[MAX_HOST_LENGTH] = {0}; |
130 | | (void)uidna_nameToUnicodeUTF8(idna, in, -1, buffer, |
131 | | sizeof(buffer) - 1, &info, &err); |
132 | | uidna_close(idna); |
133 | | if(!U_FAILURE(err)) { |
134 | | *out = curlx_strdup(buffer); |
135 | | if(*out) |
136 | | return CURLE_OK; |
137 | | else |
138 | | return CURLE_OUT_OF_MEMORY; |
139 | | } |
140 | | } |
141 | | } |
142 | | return CURLE_URL_MALFORMAT; |
143 | | } |
144 | | #endif |
145 | | |
146 | | #ifdef USE_WIN32_IDN |
147 | | /* using Windows kernel32 and normaliz libraries. */ |
148 | | |
149 | | #if (!defined(_WIN32_WINNT) || _WIN32_WINNT < _WIN32_WINNT_VISTA) && \ |
150 | | (!defined(WINVER) || WINVER < 0x600) |
151 | | WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags, |
152 | | const WCHAR *lpUnicodeCharStr, |
153 | | int cchUnicodeChar, |
154 | | WCHAR *lpASCIICharStr, |
155 | | int cchASCIIChar); |
156 | | WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags, |
157 | | const WCHAR *lpASCIICharStr, |
158 | | int cchASCIIChar, |
159 | | WCHAR *lpUnicodeCharStr, |
160 | | int cchUnicodeChar); |
161 | | #endif |
162 | | |
163 | | #define IDN_MAX_LENGTH 255 |
164 | | |
165 | | static wchar_t *idn_curlx_convert_UTF8_to_wchar(const char *str_utf8) |
166 | | { |
167 | | wchar_t *str_w = NULL; |
168 | | |
169 | | if(str_utf8) { |
170 | | int str_w_len = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, |
171 | | str_utf8, -1, NULL, 0); |
172 | | if(str_w_len > 0) { |
173 | | str_w = curlx_malloc(str_w_len * sizeof(wchar_t)); |
174 | | if(str_w) { |
175 | | if(MultiByteToWideChar(CP_UTF8, 0, str_utf8, -1, str_w, |
176 | | str_w_len) == 0) { |
177 | | curlx_free(str_w); |
178 | | return NULL; |
179 | | } |
180 | | } |
181 | | } |
182 | | } |
183 | | return str_w; |
184 | | } |
185 | | |
186 | | static char *idn_curlx_convert_wchar_to_UTF8(const wchar_t *str_w) |
187 | | { |
188 | | char *str_utf8 = NULL; |
189 | | |
190 | | if(str_w) { |
191 | | int bytes = WideCharToMultiByte(CP_UTF8, 0, str_w, -1, |
192 | | NULL, 0, NULL, NULL); |
193 | | if(bytes > 0) { |
194 | | str_utf8 = curlx_malloc(bytes); |
195 | | if(str_utf8) { |
196 | | if(WideCharToMultiByte(CP_UTF8, 0, str_w, -1, str_utf8, bytes, |
197 | | NULL, NULL) == 0) { |
198 | | curlx_free(str_utf8); |
199 | | return NULL; |
200 | | } |
201 | | } |
202 | | } |
203 | | } |
204 | | return str_utf8; |
205 | | } |
206 | | |
207 | | static CURLcode win32_idn_to_ascii(const char *in, char **out) |
208 | | { |
209 | | wchar_t *in_w = idn_curlx_convert_UTF8_to_wchar(in); |
210 | | *out = NULL; |
211 | | if(in_w) { |
212 | | wchar_t punycode[IDN_MAX_LENGTH]; |
213 | | int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode, |
214 | | IDN_MAX_LENGTH); |
215 | | curlx_free(in_w); |
216 | | if(chars) { |
217 | | *out = idn_curlx_convert_wchar_to_UTF8(punycode); |
218 | | if(!*out) |
219 | | return CURLE_OUT_OF_MEMORY; |
220 | | } |
221 | | else |
222 | | return CURLE_URL_MALFORMAT; |
223 | | } |
224 | | else |
225 | | return CURLE_URL_MALFORMAT; |
226 | | |
227 | | return CURLE_OK; |
228 | | } |
229 | | |
230 | | static CURLcode win32_ascii_to_idn(const char *in, char **out) |
231 | | { |
232 | | wchar_t *in_w = idn_curlx_convert_UTF8_to_wchar(in); |
233 | | *out = NULL; |
234 | | if(in_w) { |
235 | | WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */ |
236 | | int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn, |
237 | | IDN_MAX_LENGTH); |
238 | | curlx_free(in_w); |
239 | | if(chars) { /* 'chars' is "the number of characters retrieved" */ |
240 | | *out = idn_curlx_convert_wchar_to_UTF8(idn); |
241 | | if(!*out) |
242 | | return CURLE_OUT_OF_MEMORY; |
243 | | } |
244 | | else |
245 | | return CURLE_URL_MALFORMAT; |
246 | | } |
247 | | else |
248 | | return CURLE_URL_MALFORMAT; |
249 | | |
250 | | return CURLE_OK; |
251 | | } |
252 | | |
253 | | #endif /* USE_WIN32_IDN */ |
254 | | |
255 | | /* |
256 | | * Helpers for IDNA conversions. |
257 | | */ |
258 | | bool Curl_is_ASCII_name(const char *hostname) |
259 | 0 | { |
260 | | /* get an UNSIGNED local version of the pointer */ |
261 | 0 | const unsigned char *ch = (const unsigned char *)hostname; |
262 | |
|
263 | 0 | if(!hostname) /* bad input, consider it ASCII! */ |
264 | 0 | return TRUE; |
265 | | |
266 | 0 | while(*ch) { |
267 | 0 | if(*ch++ & 0x80) |
268 | 0 | return FALSE; |
269 | 0 | } |
270 | 0 | return TRUE; |
271 | 0 | } |
272 | | |
273 | | #ifdef USE_IDN |
274 | | /* |
275 | | * Curl_idn_decode() returns an allocated IDN decoded string if it was |
276 | | * possible. NULL on error. |
277 | | * |
278 | | * CURLE_URL_MALFORMAT - the hostname could not be converted |
279 | | * CURLE_OUT_OF_MEMORY - memory problem |
280 | | * |
281 | | */ |
282 | | static CURLcode idn_decode(const char *input, char **output) |
283 | 0 | { |
284 | 0 | char *decoded = NULL; |
285 | 0 | CURLcode result = CURLE_OK; |
286 | 0 | #ifdef USE_LIBIDN2 |
287 | 0 | if(idn2_check_version(IDN2_VERSION)) { |
288 | 0 | int flags = IDN2_NFC_INPUT |
289 | 0 | #if IDN2_VERSION_NUMBER >= 0x00140000 |
290 | | /* IDN2_NFC_INPUT: Normalize input string using normalization form C. |
291 | | IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional |
292 | | processing. */ |
293 | 0 | | IDN2_NONTRANSITIONAL |
294 | 0 | #endif |
295 | 0 | ; |
296 | 0 | int rc = IDN2_LOOKUP(input, &decoded, flags); |
297 | 0 | if(rc != IDN2_OK) |
298 | | /* fallback to TR46 Transitional mode for better IDNA2003 |
299 | | compatibility */ |
300 | 0 | rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL); |
301 | 0 | if(rc != IDN2_OK) |
302 | 0 | result = CURLE_URL_MALFORMAT; |
303 | 0 | } |
304 | 0 | else |
305 | | /* a too old libidn2 version */ |
306 | 0 | result = CURLE_NOT_BUILT_IN; |
307 | | #elif defined(USE_WIN32_IDN) |
308 | | result = win32_idn_to_ascii(input, &decoded); |
309 | | #elif defined(USE_APPLE_IDN) |
310 | | result = mac_idn_to_ascii(input, &decoded); |
311 | | #endif |
312 | 0 | if(!result) |
313 | 0 | *output = decoded; |
314 | 0 | return result; |
315 | 0 | } |
316 | | |
317 | | static CURLcode idn_encode(const char *puny, char **output) |
318 | 0 | { |
319 | 0 | char *enc = NULL; |
320 | 0 | #ifdef USE_LIBIDN2 |
321 | 0 | int rc = idn2_to_unicode_8z8z(puny, &enc, 0); |
322 | 0 | if(rc != IDNA_SUCCESS) |
323 | 0 | return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT; |
324 | | #elif defined(USE_WIN32_IDN) |
325 | | CURLcode result = win32_ascii_to_idn(puny, &enc); |
326 | | if(result) |
327 | | return result; |
328 | | #elif defined(USE_APPLE_IDN) |
329 | | CURLcode result = mac_ascii_to_idn(puny, &enc); |
330 | | if(result) |
331 | | return result; |
332 | | #endif |
333 | 0 | *output = enc; |
334 | 0 | return CURLE_OK; |
335 | 0 | } |
336 | | |
337 | | CURLcode Curl_idn_decode(const char *input, char **output) |
338 | 0 | { |
339 | 0 | char *d = NULL; |
340 | 0 | CURLcode result = idn_decode(input, &d); |
341 | 0 | #ifdef USE_LIBIDN2 |
342 | 0 | if(!result) { |
343 | 0 | char *c = curlx_strdup(d); |
344 | 0 | idn2_free(d); |
345 | 0 | if(c) |
346 | 0 | d = c; |
347 | 0 | else |
348 | 0 | result = CURLE_OUT_OF_MEMORY; |
349 | 0 | } |
350 | 0 | #endif |
351 | 0 | if(!result) { |
352 | 0 | if(!d[0]) { /* ended up zero length, not acceptable */ |
353 | 0 | result = CURLE_URL_MALFORMAT; |
354 | 0 | curlx_free(d); |
355 | 0 | } |
356 | 0 | else |
357 | 0 | *output = d; |
358 | 0 | } |
359 | 0 | return result; |
360 | 0 | } |
361 | | |
362 | | CURLcode Curl_idn_encode(const char *puny, char **output) |
363 | 0 | { |
364 | 0 | char *d = NULL; |
365 | 0 | CURLcode result = idn_encode(puny, &d); |
366 | 0 | #ifdef USE_LIBIDN2 |
367 | 0 | if(!result) { |
368 | 0 | char *c = curlx_strdup(d); |
369 | 0 | idn2_free(d); |
370 | 0 | if(c) |
371 | 0 | d = c; |
372 | 0 | else |
373 | 0 | result = CURLE_OUT_OF_MEMORY; |
374 | 0 | } |
375 | 0 | #endif |
376 | 0 | if(!result) |
377 | 0 | *output = d; |
378 | 0 | return result; |
379 | 0 | } |
380 | | |
381 | | /* |
382 | | * Frees data allocated by idnconvert_hostname() |
383 | | */ |
384 | | void Curl_free_idnconverted_hostname(struct hostname *host) |
385 | 0 | { |
386 | 0 | Curl_safefree(host->encalloc); |
387 | 0 | } |
388 | | |
389 | | #endif /* USE_IDN */ |
390 | | |
391 | | /* |
392 | | * Perform any necessary IDN conversion of hostname |
393 | | */ |
394 | | CURLcode Curl_idnconvert_hostname(struct hostname *host) |
395 | 0 | { |
396 | | /* set the name we use to display the hostname */ |
397 | 0 | host->dispname = host->name; |
398 | |
|
399 | 0 | #ifdef USE_IDN |
400 | | /* Check name for non-ASCII and convert hostname if we can */ |
401 | 0 | if(!Curl_is_ASCII_name(host->name)) { |
402 | 0 | char *decoded; |
403 | 0 | CURLcode result = Curl_idn_decode(host->name, &decoded); |
404 | 0 | if(result) |
405 | 0 | return result; |
406 | | /* successful */ |
407 | 0 | host->name = host->encalloc = decoded; |
408 | 0 | } |
409 | 0 | #endif |
410 | 0 | return CURLE_OK; |
411 | 0 | } |