Coverage Report

Created: 2026-01-10 07:08

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/curl/lib/idn.c
Line
Count
Source
1
/***************************************************************************
2
 *                                  _   _ ____  _
3
 *  Project                     ___| | | |  _ \| |
4
 *                             / __| | | | |_) | |
5
 *                            | (__| |_| |  _ <| |___
6
 *                             \___|\___/|_| \_\_____|
7
 *
8
 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9
 *
10
 * This software is licensed as described in the file COPYING, which
11
 * you should have received as part of this distribution. The terms
12
 * are also available at https://curl.se/docs/copyright.html.
13
 *
14
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15
 * copies of the Software, and permit persons to whom the Software is
16
 * furnished to do so, under the terms of the COPYING file.
17
 *
18
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19
 * KIND, either express or implied.
20
 *
21
 * SPDX-License-Identifier: curl
22
 *
23
 ***************************************************************************/
24
/*
25
 * IDN conversions
26
 */
27
#include "curl_setup.h"
28
29
#include "urldata.h"
30
#include "idn.h"
31
32
#ifdef USE_LIBIDN2
33
#include <idn2.h>
34
35
#if defined(_WIN32) && defined(UNICODE)
36
#define IDN2_LOOKUP(name, host, flags)                                  \
37
  idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags)
38
#else
39
#define IDN2_LOOKUP(name, host, flags)                          \
40
3.73k
  idn2_lookup_ul((const char *)name, (char **)host, flags)
41
#endif
42
#endif /* USE_LIBIDN2 */
43
44
/* for macOS and iOS targets */
45
#ifdef USE_APPLE_IDN
46
#include <unicode/uidna.h>
47
#include <iconv.h>
48
#include <langinfo.h>
49
50
#define MAX_HOST_LENGTH 512
51
52
static CURLcode iconv_to_utf8(const char *in, size_t inlen,
53
                              char **out, size_t *outlen)
54
{
55
  iconv_t cd = iconv_open("UTF-8", nl_langinfo(CODESET));
56
  if(cd != (iconv_t)-1) {
57
    size_t iconv_outlen = *outlen;
58
    char *iconv_in = (char *)CURL_UNCONST(in);
59
    size_t iconv_inlen = inlen;
60
    size_t iconv_result = iconv(cd, &iconv_in, &iconv_inlen,
61
                                out, &iconv_outlen);
62
    *outlen -= iconv_outlen;
63
    iconv_close(cd);
64
    if(iconv_result == (size_t)-1) {
65
      /* !checksrc! disable ERRNOVAR 1 */
66
      if(errno == ENOMEM)
67
        return CURLE_OUT_OF_MEMORY;
68
      else
69
        return CURLE_URL_MALFORMAT;
70
    }
71
72
    return CURLE_OK;
73
  }
74
  else {
75
    /* !checksrc! disable ERRNOVAR 1 */
76
    if(errno == ENOMEM)
77
      return CURLE_OUT_OF_MEMORY;
78
    else
79
      return CURLE_FAILED_INIT;
80
  }
81
}
82
83
static CURLcode mac_idn_to_ascii(const char *in, char **out)
84
{
85
  size_t inlen = strlen(in);
86
  if(inlen < MAX_HOST_LENGTH) {
87
    char iconv_buffer[MAX_HOST_LENGTH] = { 0 };
88
    char *iconv_outptr = iconv_buffer;
89
    size_t iconv_outlen = sizeof(iconv_buffer);
90
    CURLcode iconv_result = iconv_to_utf8(in, inlen,
91
                                          &iconv_outptr, &iconv_outlen);
92
    if(!iconv_result) {
93
      UErrorCode err = U_ZERO_ERROR;
94
      UIDNA *idna = uidna_openUTS46(
95
        UIDNA_CHECK_BIDI | UIDNA_NONTRANSITIONAL_TO_ASCII, &err);
96
      if(!U_FAILURE(err)) {
97
        UIDNAInfo info = UIDNA_INFO_INITIALIZER;
98
        char buffer[MAX_HOST_LENGTH] = { 0 };
99
        (void)uidna_nameToASCII_UTF8(idna, iconv_buffer, (int)iconv_outlen,
100
                                     buffer, sizeof(buffer) - 1, &info, &err);
101
        uidna_close(idna);
102
        if(!U_FAILURE(err) && !info.errors) {
103
          *out = curlx_strdup(buffer);
104
          if(*out)
105
            return CURLE_OK;
106
          else
107
            return CURLE_OUT_OF_MEMORY;
108
        }
109
      }
110
    }
111
    else
112
      return iconv_result;
113
  }
114
  return CURLE_URL_MALFORMAT;
115
}
116
117
static CURLcode mac_ascii_to_idn(const char *in, char **out)
118
{
119
  size_t inlen = strlen(in);
120
  if(inlen < MAX_HOST_LENGTH) {
121
    UErrorCode err = U_ZERO_ERROR;
122
    UIDNA *idna = uidna_openUTS46(
123
      UIDNA_CHECK_BIDI | UIDNA_NONTRANSITIONAL_TO_UNICODE, &err);
124
    if(!U_FAILURE(err)) {
125
      UIDNAInfo info = UIDNA_INFO_INITIALIZER;
126
      char buffer[MAX_HOST_LENGTH] = { 0 };
127
      (void)uidna_nameToUnicodeUTF8(idna, in, -1, buffer,
128
                                    sizeof(buffer) - 1, &info, &err);
129
      uidna_close(idna);
130
      if(!U_FAILURE(err)) {
131
        *out = curlx_strdup(buffer);
132
        if(*out)
133
          return CURLE_OK;
134
        else
135
          return CURLE_OUT_OF_MEMORY;
136
      }
137
    }
138
  }
139
  return CURLE_URL_MALFORMAT;
140
}
141
#endif
142
143
#ifdef USE_WIN32_IDN
144
/* using Windows kernel32 and normaliz libraries. */
145
146
#if (!defined(_WIN32_WINNT) || _WIN32_WINNT < _WIN32_WINNT_VISTA) && \
147
  (!defined(WINVER) || WINVER < 0x600)
148
WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags,
149
                                 const WCHAR *lpUnicodeCharStr,
150
                                 int cchUnicodeChar,
151
                                 WCHAR *lpASCIICharStr,
152
                                 int cchASCIIChar);
153
WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags,
154
                                   const WCHAR *lpASCIICharStr,
155
                                   int cchASCIIChar,
156
                                   WCHAR *lpUnicodeCharStr,
157
                                   int cchUnicodeChar);
158
#endif
159
160
#define IDN_MAX_LENGTH 255
161
162
static char *idn_curlx_convert_wchar_to_UTF8(const wchar_t *str_w, int chars)
163
{
164
  char *str_utf8 = NULL;
165
  int bytes = WideCharToMultiByte(CP_UTF8, 0, str_w, chars, NULL, 0,
166
                                  NULL, NULL);
167
  if(bytes > 0) {
168
    str_utf8 = curlx_malloc(bytes);
169
    if(str_utf8) {
170
      if(WideCharToMultiByte(CP_UTF8, 0, str_w, chars, str_utf8, bytes,
171
                             NULL, NULL) == 0) {
172
        curlx_free(str_utf8);
173
        return NULL;
174
      }
175
    }
176
  }
177
  return str_utf8;
178
}
179
180
static CURLcode win32_idn_to_ascii(const char *in, char **out)
181
{
182
  wchar_t in_w[IDN_MAX_LENGTH];
183
  int in_w_len;
184
  *out = NULL;
185
  /* Returned in_w_len includes the null-terminator, which then gets
186
     preserved across the calls that follow, ending up terminating
187
     the buffer returned to the caller. */
188
  in_w_len = MultiByteToWideChar(CP_UTF8, 0, in, -1, in_w, IDN_MAX_LENGTH);
189
  if(in_w_len) {
190
    wchar_t punycode[IDN_MAX_LENGTH];
191
    int chars = IdnToAscii(0, in_w, in_w_len, punycode, IDN_MAX_LENGTH);
192
    if(chars > 0) {
193
      *out = idn_curlx_convert_wchar_to_UTF8(punycode, chars);
194
      if(!*out)
195
        return CURLE_OUT_OF_MEMORY;
196
    }
197
    else
198
      return CURLE_URL_MALFORMAT;
199
  }
200
  else
201
    return CURLE_URL_MALFORMAT;
202
203
  return CURLE_OK;
204
}
205
206
static CURLcode win32_ascii_to_idn(const char *in, char **out)
207
{
208
  wchar_t in_w[IDN_MAX_LENGTH];
209
  int in_w_len;
210
  *out = NULL;
211
  /* Returned in_w_len includes the null-terminator, which then gets
212
     preserved across the calls that follow, ending up terminating
213
     the buffer returned to the caller. */
214
  in_w_len = MultiByteToWideChar(CP_UTF8, 0, in, -1, in_w, IDN_MAX_LENGTH);
215
  if(in_w_len) {
216
    WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
217
    int chars = IdnToUnicode(0, in_w, in_w_len, idn, IDN_MAX_LENGTH);
218
    if(chars > 0) {  /* 'chars' is "the number of characters retrieved" */
219
      *out = idn_curlx_convert_wchar_to_UTF8(idn, chars);
220
      if(!*out)
221
        return CURLE_OUT_OF_MEMORY;
222
    }
223
    else
224
      return CURLE_URL_MALFORMAT;
225
  }
226
  else
227
    return CURLE_URL_MALFORMAT;
228
229
  return CURLE_OK;
230
}
231
232
#endif /* USE_WIN32_IDN */
233
234
/*
235
 * Helpers for IDNA conversions.
236
 */
237
bool Curl_is_ASCII_name(const char *hostname)
238
345k
{
239
  /* get an UNSIGNED local version of the pointer */
240
345k
  const unsigned char *ch = (const unsigned char *)hostname;
241
242
345k
  if(!hostname) /* bad input, consider it ASCII! */
243
103
    return TRUE;
244
245
6.02M
  while(*ch) {
246
5.68M
    if(*ch++ & 0x80)
247
1.88k
      return FALSE;
248
5.68M
  }
249
343k
  return TRUE;
250
345k
}
251
252
#ifdef USE_IDN
253
/*
254
 * Curl_idn_decode() returns an allocated IDN decoded string if it was
255
 * possible. NULL on error.
256
 *
257
 * CURLE_URL_MALFORMAT - the hostname could not be converted
258
 * CURLE_OUT_OF_MEMORY - memory problem
259
 *
260
 */
261
static CURLcode idn_decode(const char *input, char **output)
262
1.86k
{
263
1.86k
  char *decoded = NULL;
264
1.86k
  CURLcode result = CURLE_OK;
265
1.86k
#ifdef USE_LIBIDN2
266
1.86k
  if(idn2_check_version(IDN2_VERSION)) {
267
1.86k
    int flags = IDN2_NFC_INPUT
268
1.86k
#if IDN2_VERSION_NUMBER >= 0x00140000
269
      /* IDN2_NFC_INPUT: Normalize input string using normalization form C.
270
         IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional
271
         processing. */
272
1.86k
      | IDN2_NONTRANSITIONAL
273
1.86k
#endif
274
1.86k
      ;
275
1.86k
    int rc = IDN2_LOOKUP(input, &decoded, flags);
276
1.86k
    if(rc != IDN2_OK)
277
      /* fallback to TR46 Transitional mode for better IDNA2003
278
         compatibility */
279
1.86k
      rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL);
280
1.86k
    if(rc != IDN2_OK)
281
1.86k
      result = CURLE_URL_MALFORMAT;
282
1.86k
  }
283
0
  else
284
    /* a too old libidn2 version */
285
0
    result = CURLE_NOT_BUILT_IN;
286
#elif defined(USE_WIN32_IDN)
287
  result = win32_idn_to_ascii(input, &decoded);
288
#elif defined(USE_APPLE_IDN)
289
  result = mac_idn_to_ascii(input, &decoded);
290
#endif
291
1.86k
  if(!result)
292
0
    *output = decoded;
293
1.86k
  return result;
294
1.86k
}
295
296
static CURLcode idn_encode(const char *puny, char **output)
297
0
{
298
0
  char *enc = NULL;
299
0
#ifdef USE_LIBIDN2
300
0
  int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
301
0
  if(rc != IDNA_SUCCESS)
302
0
    return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
303
#elif defined(USE_WIN32_IDN)
304
  CURLcode result = win32_ascii_to_idn(puny, &enc);
305
  if(result)
306
    return result;
307
#elif defined(USE_APPLE_IDN)
308
  CURLcode result = mac_ascii_to_idn(puny, &enc);
309
  if(result)
310
    return result;
311
#endif
312
0
  *output = enc;
313
0
  return CURLE_OK;
314
0
}
315
316
CURLcode Curl_idn_decode(const char *input, char **output)
317
1.86k
{
318
1.86k
  char *d = NULL;
319
1.86k
  CURLcode result = idn_decode(input, &d);
320
1.86k
#ifdef USE_LIBIDN2
321
1.86k
  if(!result) {
322
0
    char *c = curlx_strdup(d);
323
0
    idn2_free(d);
324
0
    if(c)
325
0
      d = c;
326
0
    else
327
0
      result = CURLE_OUT_OF_MEMORY;
328
0
  }
329
1.86k
#endif
330
1.86k
  if(!result) {
331
0
    if(!d[0]) { /* ended up zero length, not acceptable */
332
0
      result = CURLE_URL_MALFORMAT;
333
0
      curlx_free(d);
334
0
    }
335
0
    else
336
0
      *output = d;
337
0
  }
338
1.86k
  return result;
339
1.86k
}
340
341
CURLcode Curl_idn_encode(const char *puny, char **output)
342
0
{
343
0
  char *d = NULL;
344
0
  CURLcode result = idn_encode(puny, &d);
345
0
#ifdef USE_LIBIDN2
346
0
  if(!result) {
347
0
    char *c = curlx_strdup(d);
348
0
    idn2_free(d);
349
0
    if(c)
350
0
      d = c;
351
0
    else
352
0
      result = CURLE_OUT_OF_MEMORY;
353
0
  }
354
0
#endif
355
0
  if(!result)
356
0
    *output = d;
357
0
  return result;
358
0
}
359
360
/*
361
 * Frees data allocated by idnconvert_hostname()
362
 */
363
void Curl_free_idnconverted_hostname(struct hostname *host)
364
698k
{
365
698k
  Curl_safefree(host->encalloc);
366
698k
}
367
368
#endif /* USE_IDN */
369
370
/*
371
 * Perform any necessary IDN conversion of hostname
372
 */
373
CURLcode Curl_idnconvert_hostname(struct hostname *host)
374
344k
{
375
  /* set the name we use to display the hostname */
376
344k
  host->dispname = host->name;
377
378
344k
#ifdef USE_IDN
379
  /* Check name for non-ASCII and convert hostname if we can */
380
344k
  if(!Curl_is_ASCII_name(host->name)) {
381
1.86k
    char *decoded;
382
1.86k
    CURLcode result = Curl_idn_decode(host->name, &decoded);
383
1.86k
    if(result)
384
1.86k
      return result;
385
    /* successful */
386
0
    host->name = host->encalloc = decoded;
387
0
  }
388
342k
#endif
389
342k
  return CURLE_OK;
390
344k
}