Coverage Report

Created: 2026-03-31 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn/lib/gl/striconv.c
Line
Count
Source
1
/* Charset conversion.
2
   Copyright (C) 2001-2007, 2010-2026 Free Software Foundation, Inc.
3
   Written by Bruno Haible and Simon Josefsson.
4
5
   This file is free software: you can redistribute it and/or modify
6
   it under the terms of the GNU Lesser General Public License as
7
   published by the Free Software Foundation; either version 2.1 of the
8
   License, or (at your option) any later version.
9
10
   This file is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU Lesser General Public License for more details.
14
15
   You should have received a copy of the GNU Lesser General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18
#include <config.h>
19
20
/* Specification.  */
21
#include "striconv.h"
22
23
#include <errno.h>
24
#include <stdlib.h>
25
#include <string.h>
26
27
#if HAVE_ICONV
28
# include <iconv.h>
29
/* Get MB_LEN_MAX, CHAR_BIT.  */
30
# include <limits.h>
31
#endif
32
33
#include "c-strcase.h"
34
35
#ifndef SIZE_MAX
36
17.0k
# define SIZE_MAX ((size_t) -1)
37
#endif
38
39
40
#if HAVE_ICONV
41
42
int
43
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
44
              char **resultp, size_t *lengthp)
45
0
{
46
0
# define tmpbufsize 4096
47
48
  /* Set to the initial state.  */
49
0
  iconv (cd, NULL, NULL, NULL, NULL);
50
51
  /* Determine the length we need.  */
52
0
  size_t length;
53
0
  {
54
0
    size_t count = 0;
55
    /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
56
       libiconv's UCS-4-INTERNAL encoding.  */
57
0
    union { unsigned int align; char buf[tmpbufsize]; } tmp;
58
0
# define tmpbuf tmp.buf
59
0
    const char *inptr = src;
60
0
    size_t insize = srclen;
61
62
0
    while (insize > 0)
63
0
      {
64
0
        char *outptr = tmpbuf;
65
0
        size_t outsize = tmpbufsize;
66
0
        size_t res = iconv (cd,
67
0
                            (ICONV_CONST char **) &inptr, &insize,
68
0
                            &outptr, &outsize);
69
70
0
        if (res == (size_t)(-1))
71
0
          {
72
0
            if (errno == E2BIG)
73
0
              ;
74
0
            else if (errno == EINVAL)
75
0
              break;
76
0
            else
77
0
              return -1;
78
0
          }
79
# if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
80
     && !(defined __GLIBC__ && !defined __UCLIBC__)
81
        /* NetBSD iconv() inserts a question mark if it cannot convert.
82
           Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc
83
           are known to prefer to fail rather than doing a lossy conversion.  */
84
        else if (res > 0)
85
          {
86
            errno = EILSEQ;
87
            return -1;
88
          }
89
# endif
90
0
        count += outptr - tmpbuf;
91
0
      }
92
0
    {
93
0
      char *outptr = tmpbuf;
94
0
      size_t outsize = tmpbufsize;
95
0
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
96
97
0
      if (res == (size_t)(-1))
98
0
        return -1;
99
0
      count += outptr - tmpbuf;
100
0
    }
101
0
    length = count;
102
0
# undef tmpbuf
103
0
  }
104
105
0
  if (length == 0)
106
0
    {
107
0
      *lengthp = 0;
108
0
      return 0;
109
0
    }
110
111
0
  char *result;
112
0
  if (*resultp != NULL && *lengthp >= length)
113
0
    result = *resultp;
114
0
  else
115
0
    {
116
0
      result = (char *) malloc (length);
117
0
      if (result == NULL)
118
0
        {
119
0
          errno = ENOMEM;
120
0
          return -1;
121
0
        }
122
0
    }
123
124
  /* Return to the initial state.  */
125
0
  iconv (cd, NULL, NULL, NULL, NULL);
126
127
  /* Do the conversion for real.  */
128
0
  {
129
0
    const char *inptr = src;
130
0
    size_t insize = srclen;
131
0
    char *outptr = result;
132
0
    size_t outsize = length;
133
134
0
    while (insize > 0)
135
0
      {
136
0
        size_t res = iconv (cd,
137
0
                            (ICONV_CONST char **) &inptr, &insize,
138
0
                            &outptr, &outsize);
139
140
0
        if (res == (size_t)(-1))
141
0
          {
142
0
            if (errno == EINVAL)
143
0
              break;
144
0
            else
145
0
              goto fail;
146
0
          }
147
# if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
148
     && !(defined __GLIBC__ && !defined __UCLIBC__)
149
        /* NetBSD iconv() inserts a question mark if it cannot convert.
150
           Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc
151
           are known to prefer to fail rather than doing a lossy conversion.  */
152
        else if (res > 0)
153
          {
154
            errno = EILSEQ;
155
            goto fail;
156
          }
157
# endif
158
0
      }
159
0
    {
160
0
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
161
162
0
      if (res == (size_t)(-1))
163
0
        goto fail;
164
0
    }
165
0
    if (outsize != 0)
166
0
      abort ();
167
0
  }
168
169
0
  *resultp = result;
170
0
  *lengthp = length;
171
172
0
  return 0;
173
174
0
 fail:
175
0
  {
176
0
    if (result != *resultp)
177
0
      free (result);
178
0
    return -1;
179
0
  }
180
0
# undef tmpbufsize
181
0
}
182
183
char *
184
str_cd_iconv (const char *src, iconv_t cd)
185
17.0k
{
186
  /* For most encodings, a trailing NUL byte in the input will be converted
187
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
188
     function is usable for UTF-7, we have to exclude the NUL byte from the
189
     conversion and add it by hand afterwards.  */
190
# if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
191
     && !(defined __GLIBC__ && !defined __UCLIBC__)
192
  /* NetBSD iconv() inserts a question mark if it cannot convert.
193
     Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc are
194
     known to prefer to fail rather than doing a lossy conversion.  For other
195
     iconv() implementations, we have to look at the number of irreversible
196
     conversions returned; but this information is lost when iconv() returns
197
     for an E2BIG reason.  Therefore we cannot use the second, faster
198
     algorithm.  */
199
200
  char *result = NULL;
201
  size_t length = 0;
202
  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
203
204
  if (retval < 0)
205
    {
206
      if (result != NULL)
207
        abort ();
208
      return NULL;
209
    }
210
211
  /* Add the terminating NUL byte.  */
212
  char *final_result =
213
    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
214
  if (final_result == NULL)
215
    {
216
      free (result);
217
      errno = ENOMEM;
218
      return NULL;
219
    }
220
  final_result[length] = '\0';
221
222
  return final_result;
223
224
# else
225
  /* This algorithm is likely faster than the one above.  But it may produce
226
     iconv() returns for an E2BIG reason, when the output size guess is too
227
     small.  Therefore it can only be used when we don't need the number of
228
     irreversible conversions performed.  */
229
17.0k
  size_t inbytes_remaining = strlen (src);
230
231
  /* Make a guess for the worst-case output size, in order to avoid a
232
     realloc.  It's OK if the guess is wrong as long as it is not zero and
233
     doesn't lead to an integer overflow.  */
234
17.0k
  size_t result_size = inbytes_remaining;
235
17.0k
  {
236
17.0k
    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
237
17.0k
    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
238
17.0k
      result_size *= MB_LEN_MAX;
239
17.0k
  }
240
17.0k
  result_size += 1; /* for the terminating NUL */
241
242
17.0k
  char *result = (char *) malloc (result_size);
243
17.0k
  if (result == NULL)
244
0
    {
245
0
      errno = ENOMEM;
246
0
      return NULL;
247
0
    }
248
249
  /* Set to the initial state.  */
250
17.0k
  iconv (cd, NULL, NULL, NULL, NULL);
251
252
  /* Do the conversion.  */
253
17.0k
  size_t length;
254
17.0k
  const char *inptr = src;
255
17.0k
  {
256
17.0k
    char *outptr = result;
257
17.0k
    size_t outbytes_remaining = result_size - 1;
258
259
17.0k
    for (;;)
260
17.0k
      {
261
        /* Here inptr + inbytes_remaining = src + strlen (src),
262
                outptr + outbytes_remaining = result + result_size - 1.  */
263
17.0k
        size_t res = iconv (cd,
264
17.0k
                            (ICONV_CONST char **) &inptr, &inbytes_remaining,
265
17.0k
                            &outptr, &outbytes_remaining);
266
267
17.0k
        if (res == (size_t)(-1))
268
10.9k
          {
269
10.9k
            if (errno == EINVAL)
270
0
              break;
271
10.9k
            else if (errno == E2BIG)
272
0
              {
273
0
                size_t used = outptr - result;
274
0
                size_t newsize = result_size * 2;
275
0
                char *newresult;
276
277
0
                if (!(newsize > result_size))
278
0
                  {
279
0
                    errno = ENOMEM;
280
0
                    goto failed;
281
0
                  }
282
0
                newresult = (char *) realloc (result, newsize);
283
0
                if (newresult == NULL)
284
0
                  {
285
0
                    errno = ENOMEM;
286
0
                    goto failed;
287
0
                  }
288
0
                result = newresult;
289
0
                result_size = newsize;
290
0
                outptr = result + used;
291
0
                outbytes_remaining = result_size - 1 - used;
292
0
              }
293
10.9k
            else
294
10.9k
              goto failed;
295
10.9k
          }
296
6.07k
        else
297
6.07k
          break;
298
17.0k
      }
299
6.07k
    for (;;)
300
6.07k
      {
301
        /* Here outptr + outbytes_remaining = result + result_size - 1.  */
302
6.07k
        size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
303
304
6.07k
        if (res == (size_t)(-1))
305
0
          {
306
0
            if (errno == E2BIG)
307
0
              {
308
0
                size_t used = outptr - result;
309
0
                size_t newsize = result_size * 2;
310
0
                char *newresult;
311
312
0
                if (!(newsize > result_size))
313
0
                  {
314
0
                    errno = ENOMEM;
315
0
                    goto failed;
316
0
                  }
317
0
                newresult = (char *) realloc (result, newsize);
318
0
                if (newresult == NULL)
319
0
                  {
320
0
                    errno = ENOMEM;
321
0
                    goto failed;
322
0
                  }
323
0
                result = newresult;
324
0
                result_size = newsize;
325
0
                outptr = result + used;
326
0
                outbytes_remaining = result_size - 1 - used;
327
0
              }
328
0
            else
329
0
              goto failed;
330
0
          }
331
6.07k
        else
332
6.07k
          break;
333
6.07k
      }
334
335
    /* Add the terminating NUL byte.  */
336
6.07k
    *outptr++ = '\0';
337
338
6.07k
    length = outptr - result;
339
6.07k
  }
340
341
  /* Give away unused memory.  */
342
6.07k
  if (length < result_size)
343
6.07k
    {
344
6.07k
      char *smaller_result = (char *) realloc (result, length);
345
346
6.07k
      if (smaller_result != NULL)
347
6.07k
        result = smaller_result;
348
6.07k
    }
349
350
6.07k
  return result;
351
352
10.9k
 failed:
353
10.9k
  free (result);
354
10.9k
  return NULL;
355
356
6.07k
# endif
357
6.07k
}
358
359
#endif
360
361
char *
362
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
363
17.4k
{
364
17.4k
  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
365
454
    {
366
454
      char *result = strdup (src);
367
368
454
      if (result == NULL)
369
454
        errno = ENOMEM;
370
454
      return result;
371
454
    }
372
17.0k
  else
373
17.0k
    {
374
17.0k
#if HAVE_ICONV
375
17.0k
      iconv_t cd = iconv_open (to_codeset, from_codeset);
376
17.0k
      if (cd == (iconv_t) -1)
377
0
        return NULL;
378
379
17.0k
      char *result = str_cd_iconv (src, cd);
380
381
17.0k
      if (result == NULL)
382
10.9k
        {
383
          /* Close cd, but preserve the errno from str_cd_iconv.  */
384
10.9k
          int saved_errno = errno;
385
10.9k
          iconv_close (cd);
386
10.9k
          errno = saved_errno;
387
10.9k
        }
388
6.07k
      else
389
6.07k
        {
390
6.07k
          if (iconv_close (cd) < 0)
391
0
            {
392
0
              free (result);
393
0
              return NULL;
394
0
            }
395
6.07k
        }
396
17.0k
      return result;
397
#else
398
      /* This is a different error code than if iconv_open existed but didn't
399
         support from_codeset and to_codeset, so that the caller can emit
400
         an error message such as
401
           "iconv() is not supported. Installing GNU libiconv and
402
            then reinstalling this package would fix this."  */
403
      errno = ENOSYS;
404
      return NULL;
405
#endif
406
17.0k
    }
407
17.4k
}