Coverage Report

Created: 2025-11-09 06:06

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libidn/lib/gl/striconv.c
Line
Count
Source
1
/* Charset conversion.
2
   Copyright (C) 2001-2007, 2010-2025 Free Software Foundation, Inc.
3
   Written by Bruno Haible and Simon Josefsson.
4
5
   This file is free software: you can redistribute it and/or modify
6
   it under the terms of the GNU Lesser General Public License as
7
   published by the Free Software Foundation; either version 2.1 of the
8
   License, or (at your option) any later version.
9
10
   This file is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU Lesser General Public License for more details.
14
15
   You should have received a copy of the GNU Lesser General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18
#include <config.h>
19
20
/* Specification.  */
21
#include "striconv.h"
22
23
#include <errno.h>
24
#include <stdlib.h>
25
#include <string.h>
26
27
#if HAVE_ICONV
28
# include <iconv.h>
29
/* Get MB_LEN_MAX, CHAR_BIT.  */
30
# include <limits.h>
31
#endif
32
33
#include "c-strcase.h"
34
35
#ifndef SIZE_MAX
36
16.0k
# define SIZE_MAX ((size_t) -1)
37
#endif
38
39
40
#if HAVE_ICONV
41
42
int
43
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
44
              char **resultp, size_t *lengthp)
45
0
{
46
0
# define tmpbufsize 4096
47
0
  size_t length;
48
0
  char *result;
49
50
  /* Set to the initial state.  */
51
0
  iconv (cd, NULL, NULL, NULL, NULL);
52
53
  /* Determine the length we need.  */
54
0
  {
55
0
    size_t count = 0;
56
    /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
57
       libiconv's UCS-4-INTERNAL encoding.  */
58
0
    union { unsigned int align; char buf[tmpbufsize]; } tmp;
59
0
# define tmpbuf tmp.buf
60
0
    const char *inptr = src;
61
0
    size_t insize = srclen;
62
63
0
    while (insize > 0)
64
0
      {
65
0
        char *outptr = tmpbuf;
66
0
        size_t outsize = tmpbufsize;
67
0
        size_t res = iconv (cd,
68
0
                            (ICONV_CONST char **) &inptr, &insize,
69
0
                            &outptr, &outsize);
70
71
0
        if (res == (size_t)(-1))
72
0
          {
73
0
            if (errno == E2BIG)
74
0
              ;
75
0
            else if (errno == EINVAL)
76
0
              break;
77
0
            else
78
0
              return -1;
79
0
          }
80
# if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
81
     && !(defined __GLIBC__ && !defined __UCLIBC__)
82
        /* Irix iconv() inserts a NUL byte if it cannot convert.
83
           NetBSD iconv() inserts a question mark if it cannot convert.
84
           Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc
85
           are known to prefer to fail rather than doing a lossy conversion.  */
86
        else if (res > 0)
87
          {
88
            errno = EILSEQ;
89
            return -1;
90
          }
91
# endif
92
0
        count += outptr - tmpbuf;
93
0
      }
94
0
    {
95
0
      char *outptr = tmpbuf;
96
0
      size_t outsize = tmpbufsize;
97
0
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
98
99
0
      if (res == (size_t)(-1))
100
0
        return -1;
101
0
      count += outptr - tmpbuf;
102
0
    }
103
0
    length = count;
104
0
# undef tmpbuf
105
0
  }
106
107
0
  if (length == 0)
108
0
    {
109
0
      *lengthp = 0;
110
0
      return 0;
111
0
    }
112
0
  if (*resultp != NULL && *lengthp >= length)
113
0
    result = *resultp;
114
0
  else
115
0
    {
116
0
      result = (char *) malloc (length);
117
0
      if (result == NULL)
118
0
        {
119
0
          errno = ENOMEM;
120
0
          return -1;
121
0
        }
122
0
    }
123
124
  /* Return to the initial state.  */
125
0
  iconv (cd, NULL, NULL, NULL, NULL);
126
127
  /* Do the conversion for real.  */
128
0
  {
129
0
    const char *inptr = src;
130
0
    size_t insize = srclen;
131
0
    char *outptr = result;
132
0
    size_t outsize = length;
133
134
0
    while (insize > 0)
135
0
      {
136
0
        size_t res = iconv (cd,
137
0
                            (ICONV_CONST char **) &inptr, &insize,
138
0
                            &outptr, &outsize);
139
140
0
        if (res == (size_t)(-1))
141
0
          {
142
0
            if (errno == EINVAL)
143
0
              break;
144
0
            else
145
0
              goto fail;
146
0
          }
147
# if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
148
     && !(defined __GLIBC__ && !defined __UCLIBC__)
149
        /* Irix iconv() inserts a NUL byte if it cannot convert.
150
           NetBSD iconv() inserts a question mark if it cannot convert.
151
           Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc
152
           are known to prefer to fail rather than doing a lossy conversion.  */
153
        else if (res > 0)
154
          {
155
            errno = EILSEQ;
156
            goto fail;
157
          }
158
# endif
159
0
      }
160
0
    {
161
0
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
162
163
0
      if (res == (size_t)(-1))
164
0
        goto fail;
165
0
    }
166
0
    if (outsize != 0)
167
0
      abort ();
168
0
  }
169
170
0
  *resultp = result;
171
0
  *lengthp = length;
172
173
0
  return 0;
174
175
0
 fail:
176
0
  {
177
0
    if (result != *resultp)
178
0
      free (result);
179
0
    return -1;
180
0
  }
181
0
# undef tmpbufsize
182
0
}
183
184
char *
185
str_cd_iconv (const char *src, iconv_t cd)
186
16.0k
{
187
  /* For most encodings, a trailing NUL byte in the input will be converted
188
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
189
     function is usable for UTF-7, we have to exclude the NUL byte from the
190
     conversion and add it by hand afterwards.  */
191
# if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \
192
     && !(defined __GLIBC__ && !defined __UCLIBC__)
193
  /* Irix iconv() inserts a NUL byte if it cannot convert.
194
     NetBSD iconv() inserts a question mark if it cannot convert.
195
     Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc are
196
     known to prefer to fail rather than doing a lossy conversion.  For other
197
     iconv() implementations, we have to look at the number of irreversible
198
     conversions returned; but this information is lost when iconv() returns
199
     for an E2BIG reason.  Therefore we cannot use the second, faster
200
     algorithm.  */
201
202
  char *result = NULL;
203
  size_t length = 0;
204
  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
205
  char *final_result;
206
207
  if (retval < 0)
208
    {
209
      if (result != NULL)
210
        abort ();
211
      return NULL;
212
    }
213
214
  /* Add the terminating NUL byte.  */
215
  final_result =
216
    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
217
  if (final_result == NULL)
218
    {
219
      free (result);
220
      errno = ENOMEM;
221
      return NULL;
222
    }
223
  final_result[length] = '\0';
224
225
  return final_result;
226
227
# else
228
  /* This algorithm is likely faster than the one above.  But it may produce
229
     iconv() returns for an E2BIG reason, when the output size guess is too
230
     small.  Therefore it can only be used when we don't need the number of
231
     irreversible conversions performed.  */
232
16.0k
  char *result;
233
16.0k
  size_t result_size;
234
16.0k
  size_t length;
235
16.0k
  const char *inptr = src;
236
16.0k
  size_t inbytes_remaining = strlen (src);
237
238
  /* Make a guess for the worst-case output size, in order to avoid a
239
     realloc.  It's OK if the guess is wrong as long as it is not zero and
240
     doesn't lead to an integer overflow.  */
241
16.0k
  result_size = inbytes_remaining;
242
16.0k
  {
243
16.0k
    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
244
16.0k
    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
245
16.0k
      result_size *= MB_LEN_MAX;
246
16.0k
  }
247
16.0k
  result_size += 1; /* for the terminating NUL */
248
249
16.0k
  result = (char *) malloc (result_size);
250
16.0k
  if (result == NULL)
251
0
    {
252
0
      errno = ENOMEM;
253
0
      return NULL;
254
0
    }
255
256
  /* Set to the initial state.  */
257
16.0k
  iconv (cd, NULL, NULL, NULL, NULL);
258
259
  /* Do the conversion.  */
260
16.0k
  {
261
16.0k
    char *outptr = result;
262
16.0k
    size_t outbytes_remaining = result_size - 1;
263
264
16.0k
    for (;;)
265
16.0k
      {
266
        /* Here inptr + inbytes_remaining = src + strlen (src),
267
                outptr + outbytes_remaining = result + result_size - 1.  */
268
16.0k
        size_t res = iconv (cd,
269
16.0k
                            (ICONV_CONST char **) &inptr, &inbytes_remaining,
270
16.0k
                            &outptr, &outbytes_remaining);
271
272
16.0k
        if (res == (size_t)(-1))
273
10.3k
          {
274
10.3k
            if (errno == EINVAL)
275
0
              break;
276
10.3k
            else if (errno == E2BIG)
277
0
              {
278
0
                size_t used = outptr - result;
279
0
                size_t newsize = result_size * 2;
280
0
                char *newresult;
281
282
0
                if (!(newsize > result_size))
283
0
                  {
284
0
                    errno = ENOMEM;
285
0
                    goto failed;
286
0
                  }
287
0
                newresult = (char *) realloc (result, newsize);
288
0
                if (newresult == NULL)
289
0
                  {
290
0
                    errno = ENOMEM;
291
0
                    goto failed;
292
0
                  }
293
0
                result = newresult;
294
0
                result_size = newsize;
295
0
                outptr = result + used;
296
0
                outbytes_remaining = result_size - 1 - used;
297
0
              }
298
10.3k
            else
299
10.3k
              goto failed;
300
10.3k
          }
301
5.67k
        else
302
5.67k
          break;
303
16.0k
      }
304
5.67k
    for (;;)
305
5.67k
      {
306
        /* Here outptr + outbytes_remaining = result + result_size - 1.  */
307
5.67k
        size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
308
309
5.67k
        if (res == (size_t)(-1))
310
0
          {
311
0
            if (errno == E2BIG)
312
0
              {
313
0
                size_t used = outptr - result;
314
0
                size_t newsize = result_size * 2;
315
0
                char *newresult;
316
317
0
                if (!(newsize > result_size))
318
0
                  {
319
0
                    errno = ENOMEM;
320
0
                    goto failed;
321
0
                  }
322
0
                newresult = (char *) realloc (result, newsize);
323
0
                if (newresult == NULL)
324
0
                  {
325
0
                    errno = ENOMEM;
326
0
                    goto failed;
327
0
                  }
328
0
                result = newresult;
329
0
                result_size = newsize;
330
0
                outptr = result + used;
331
0
                outbytes_remaining = result_size - 1 - used;
332
0
              }
333
0
            else
334
0
              goto failed;
335
0
          }
336
5.67k
        else
337
5.67k
          break;
338
5.67k
      }
339
340
    /* Add the terminating NUL byte.  */
341
5.67k
    *outptr++ = '\0';
342
343
5.67k
    length = outptr - result;
344
5.67k
  }
345
346
  /* Give away unused memory.  */
347
5.67k
  if (length < result_size)
348
5.67k
    {
349
5.67k
      char *smaller_result = (char *) realloc (result, length);
350
351
5.67k
      if (smaller_result != NULL)
352
5.67k
        result = smaller_result;
353
5.67k
    }
354
355
5.67k
  return result;
356
357
10.3k
 failed:
358
10.3k
  free (result);
359
10.3k
  return NULL;
360
361
5.67k
# endif
362
5.67k
}
363
364
#endif
365
366
char *
367
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
368
16.4k
{
369
16.4k
  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
370
382
    {
371
382
      char *result = strdup (src);
372
373
382
      if (result == NULL)
374
382
        errno = ENOMEM;
375
382
      return result;
376
382
    }
377
16.0k
  else
378
16.0k
    {
379
16.0k
#if HAVE_ICONV
380
16.0k
      iconv_t cd;
381
16.0k
      char *result;
382
383
16.0k
      cd = iconv_open (to_codeset, from_codeset);
384
16.0k
      if (cd == (iconv_t) -1)
385
0
        return NULL;
386
387
16.0k
      result = str_cd_iconv (src, cd);
388
389
16.0k
      if (result == NULL)
390
10.3k
        {
391
          /* Close cd, but preserve the errno from str_cd_iconv.  */
392
10.3k
          int saved_errno = errno;
393
10.3k
          iconv_close (cd);
394
10.3k
          errno = saved_errno;
395
10.3k
        }
396
5.67k
      else
397
5.67k
        {
398
5.67k
          if (iconv_close (cd) < 0)
399
0
            {
400
0
              free (result);
401
0
              return NULL;
402
0
            }
403
5.67k
        }
404
16.0k
      return result;
405
#else
406
      /* This is a different error code than if iconv_open existed but didn't
407
         support from_codeset and to_codeset, so that the caller can emit
408
         an error message such as
409
           "iconv() is not supported. Installing GNU libiconv and
410
            then reinstalling this package would fix this."  */
411
      errno = ENOSYS;
412
      return NULL;
413
#endif
414
16.0k
    }
415
16.4k
}