Coverage Report

Created: 2023-09-25 06:05

/src/libidn/lib/gl/striconv.c
Line
Count
Source (jump to first uncovered line)
1
/* Charset conversion.
2
   Copyright (C) 2001-2007, 2010-2023 Free Software Foundation, Inc.
3
   Written by Bruno Haible and Simon Josefsson.
4
5
   This file is free software: you can redistribute it and/or modify
6
   it under the terms of the GNU Lesser General Public License as
7
   published by the Free Software Foundation; either version 2.1 of the
8
   License, or (at your option) any later version.
9
10
   This file is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU Lesser General Public License for more details.
14
15
   You should have received a copy of the GNU Lesser General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18
#include <config.h>
19
20
/* Specification.  */
21
#include "striconv.h"
22
23
#include <errno.h>
24
#include <stdlib.h>
25
#include <string.h>
26
27
#if HAVE_ICONV
28
# include <iconv.h>
29
/* Get MB_LEN_MAX, CHAR_BIT.  */
30
# include <limits.h>
31
#endif
32
33
#include "c-strcase.h"
34
35
#ifndef SIZE_MAX
36
11.0k
# define SIZE_MAX ((size_t) -1)
37
#endif
38
39
40
#if HAVE_ICONV
41
42
int
43
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
44
              char **resultp, size_t *lengthp)
45
0
{
46
0
# define tmpbufsize 4096
47
0
  size_t length;
48
0
  char *result;
49
50
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
51
0
# if defined _LIBICONV_VERSION \
52
0
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
53
0
          || defined __sun)
54
  /* Set to the initial state.  */
55
0
  iconv (cd, NULL, NULL, NULL, NULL);
56
0
# endif
57
58
  /* Determine the length we need.  */
59
0
  {
60
0
    size_t count = 0;
61
    /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
62
       libiconv's UCS-4-INTERNAL encoding.  */
63
0
    union { unsigned int align; char buf[tmpbufsize]; } tmp;
64
0
# define tmpbuf tmp.buf
65
0
    const char *inptr = src;
66
0
    size_t insize = srclen;
67
68
0
    while (insize > 0)
69
0
      {
70
0
        char *outptr = tmpbuf;
71
0
        size_t outsize = tmpbufsize;
72
0
        size_t res = iconv (cd,
73
0
                            (ICONV_CONST char **) &inptr, &insize,
74
0
                            &outptr, &outsize);
75
76
0
        if (res == (size_t)(-1))
77
0
          {
78
0
            if (errno == E2BIG)
79
0
              ;
80
0
            else if (errno == EINVAL)
81
0
              break;
82
0
            else
83
0
              return -1;
84
0
          }
85
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
86
        /* Irix iconv() inserts a NUL byte if it cannot convert.
87
           NetBSD iconv() inserts a question mark if it cannot convert.
88
           Only GNU libiconv and GNU libc are known to prefer to fail rather
89
           than doing a lossy conversion.  */
90
        else if (res > 0)
91
          {
92
            errno = EILSEQ;
93
            return -1;
94
          }
95
# endif
96
0
        count += outptr - tmpbuf;
97
0
      }
98
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
99
0
# if defined _LIBICONV_VERSION \
100
0
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
101
0
          || defined __sun)
102
0
    {
103
0
      char *outptr = tmpbuf;
104
0
      size_t outsize = tmpbufsize;
105
0
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
106
107
0
      if (res == (size_t)(-1))
108
0
        return -1;
109
0
      count += outptr - tmpbuf;
110
0
    }
111
0
# endif
112
0
    length = count;
113
0
# undef tmpbuf
114
0
  }
115
116
0
  if (length == 0)
117
0
    {
118
0
      *lengthp = 0;
119
0
      return 0;
120
0
    }
121
0
  if (*resultp != NULL && *lengthp >= length)
122
0
    result = *resultp;
123
0
  else
124
0
    {
125
0
      result = (char *) malloc (length);
126
0
      if (result == NULL)
127
0
        {
128
0
          errno = ENOMEM;
129
0
          return -1;
130
0
        }
131
0
    }
132
133
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
134
0
# if defined _LIBICONV_VERSION \
135
0
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
136
0
          || defined __sun)
137
  /* Return to the initial state.  */
138
0
  iconv (cd, NULL, NULL, NULL, NULL);
139
0
# endif
140
141
  /* Do the conversion for real.  */
142
0
  {
143
0
    const char *inptr = src;
144
0
    size_t insize = srclen;
145
0
    char *outptr = result;
146
0
    size_t outsize = length;
147
148
0
    while (insize > 0)
149
0
      {
150
0
        size_t res = iconv (cd,
151
0
                            (ICONV_CONST char **) &inptr, &insize,
152
0
                            &outptr, &outsize);
153
154
0
        if (res == (size_t)(-1))
155
0
          {
156
0
            if (errno == EINVAL)
157
0
              break;
158
0
            else
159
0
              goto fail;
160
0
          }
161
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
162
        /* Irix iconv() inserts a NUL byte if it cannot convert.
163
           NetBSD iconv() inserts a question mark if it cannot convert.
164
           Only GNU libiconv and GNU libc are known to prefer to fail rather
165
           than doing a lossy conversion.  */
166
        else if (res > 0)
167
          {
168
            errno = EILSEQ;
169
            goto fail;
170
          }
171
# endif
172
0
      }
173
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
174
0
# if defined _LIBICONV_VERSION \
175
0
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
176
0
          || defined __sun)
177
0
    {
178
0
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
179
180
0
      if (res == (size_t)(-1))
181
0
        goto fail;
182
0
    }
183
0
# endif
184
0
    if (outsize != 0)
185
0
      abort ();
186
0
  }
187
188
0
  *resultp = result;
189
0
  *lengthp = length;
190
191
0
  return 0;
192
193
0
 fail:
194
0
  {
195
0
    if (result != *resultp)
196
0
      free (result);
197
0
    return -1;
198
0
  }
199
0
# undef tmpbufsize
200
0
}
201
202
char *
203
str_cd_iconv (const char *src, iconv_t cd)
204
11.0k
{
205
  /* For most encodings, a trailing NUL byte in the input will be converted
206
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
207
     function is usable for UTF-7, we have to exclude the NUL byte from the
208
     conversion and add it by hand afterwards.  */
209
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
210
  /* Irix iconv() inserts a NUL byte if it cannot convert.
211
     NetBSD iconv() inserts a question mark if it cannot convert.
212
     Only GNU libiconv and GNU libc are known to prefer to fail rather
213
     than doing a lossy conversion.  For other iconv() implementations,
214
     we have to look at the number of irreversible conversions returned;
215
     but this information is lost when iconv() returns for an E2BIG reason.
216
     Therefore we cannot use the second, faster algorithm.  */
217
218
  char *result = NULL;
219
  size_t length = 0;
220
  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
221
  char *final_result;
222
223
  if (retval < 0)
224
    {
225
      if (result != NULL)
226
        abort ();
227
      return NULL;
228
    }
229
230
  /* Add the terminating NUL byte.  */
231
  final_result =
232
    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
233
  if (final_result == NULL)
234
    {
235
      free (result);
236
      errno = ENOMEM;
237
      return NULL;
238
    }
239
  final_result[length] = '\0';
240
241
  return final_result;
242
243
# else
244
  /* This algorithm is likely faster than the one above.  But it may produce
245
     iconv() returns for an E2BIG reason, when the output size guess is too
246
     small.  Therefore it can only be used when we don't need the number of
247
     irreversible conversions performed.  */
248
11.0k
  char *result;
249
11.0k
  size_t result_size;
250
11.0k
  size_t length;
251
11.0k
  const char *inptr = src;
252
11.0k
  size_t inbytes_remaining = strlen (src);
253
254
  /* Make a guess for the worst-case output size, in order to avoid a
255
     realloc.  It's OK if the guess is wrong as long as it is not zero and
256
     doesn't lead to an integer overflow.  */
257
11.0k
  result_size = inbytes_remaining;
258
11.0k
  {
259
11.0k
    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
260
11.0k
    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
261
11.0k
      result_size *= MB_LEN_MAX;
262
11.0k
  }
263
11.0k
  result_size += 1; /* for the terminating NUL */
264
265
11.0k
  result = (char *) malloc (result_size);
266
11.0k
  if (result == NULL)
267
0
    {
268
0
      errno = ENOMEM;
269
0
      return NULL;
270
0
    }
271
272
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
273
11.0k
# if defined _LIBICONV_VERSION \
274
11.0k
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
275
11.0k
          || defined __sun)
276
  /* Set to the initial state.  */
277
11.0k
  iconv (cd, NULL, NULL, NULL, NULL);
278
11.0k
# endif
279
280
  /* Do the conversion.  */
281
11.0k
  {
282
11.0k
    char *outptr = result;
283
11.0k
    size_t outbytes_remaining = result_size - 1;
284
285
11.0k
    for (;;)
286
11.0k
      {
287
        /* Here inptr + inbytes_remaining = src + strlen (src),
288
                outptr + outbytes_remaining = result + result_size - 1.  */
289
11.0k
        size_t res = iconv (cd,
290
11.0k
                            (ICONV_CONST char **) &inptr, &inbytes_remaining,
291
11.0k
                            &outptr, &outbytes_remaining);
292
293
11.0k
        if (res == (size_t)(-1))
294
5.72k
          {
295
5.72k
            if (errno == EINVAL)
296
0
              break;
297
5.72k
            else if (errno == E2BIG)
298
0
              {
299
0
                size_t used = outptr - result;
300
0
                size_t newsize = result_size * 2;
301
0
                char *newresult;
302
303
0
                if (!(newsize > result_size))
304
0
                  {
305
0
                    errno = ENOMEM;
306
0
                    goto failed;
307
0
                  }
308
0
                newresult = (char *) realloc (result, newsize);
309
0
                if (newresult == NULL)
310
0
                  {
311
0
                    errno = ENOMEM;
312
0
                    goto failed;
313
0
                  }
314
0
                result = newresult;
315
0
                result_size = newsize;
316
0
                outptr = result + used;
317
0
                outbytes_remaining = result_size - 1 - used;
318
0
              }
319
5.72k
            else
320
5.72k
              goto failed;
321
5.72k
          }
322
5.31k
        else
323
5.31k
          break;
324
11.0k
      }
325
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
326
5.31k
# if defined _LIBICONV_VERSION \
327
5.31k
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
328
5.31k
          || defined __sun)
329
5.31k
    for (;;)
330
5.31k
      {
331
        /* Here outptr + outbytes_remaining = result + result_size - 1.  */
332
5.31k
        size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
333
334
5.31k
        if (res == (size_t)(-1))
335
0
          {
336
0
            if (errno == E2BIG)
337
0
              {
338
0
                size_t used = outptr - result;
339
0
                size_t newsize = result_size * 2;
340
0
                char *newresult;
341
342
0
                if (!(newsize > result_size))
343
0
                  {
344
0
                    errno = ENOMEM;
345
0
                    goto failed;
346
0
                  }
347
0
                newresult = (char *) realloc (result, newsize);
348
0
                if (newresult == NULL)
349
0
                  {
350
0
                    errno = ENOMEM;
351
0
                    goto failed;
352
0
                  }
353
0
                result = newresult;
354
0
                result_size = newsize;
355
0
                outptr = result + used;
356
0
                outbytes_remaining = result_size - 1 - used;
357
0
              }
358
0
            else
359
0
              goto failed;
360
0
          }
361
5.31k
        else
362
5.31k
          break;
363
5.31k
      }
364
5.31k
# endif
365
366
    /* Add the terminating NUL byte.  */
367
5.31k
    *outptr++ = '\0';
368
369
5.31k
    length = outptr - result;
370
5.31k
  }
371
372
  /* Give away unused memory.  */
373
5.31k
  if (length < result_size)
374
5.31k
    {
375
5.31k
      char *smaller_result = (char *) realloc (result, length);
376
377
5.31k
      if (smaller_result != NULL)
378
5.31k
        result = smaller_result;
379
5.31k
    }
380
381
5.31k
  return result;
382
383
5.72k
 failed:
384
5.72k
  free (result);
385
5.72k
  return NULL;
386
387
5.31k
# endif
388
5.31k
}
389
390
#endif
391
392
char *
393
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
394
11.3k
{
395
11.3k
  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
396
282
    {
397
282
      char *result = strdup (src);
398
399
282
      if (result == NULL)
400
0
        errno = ENOMEM;
401
282
      return result;
402
282
    }
403
11.0k
  else
404
11.0k
    {
405
11.0k
#if HAVE_ICONV
406
11.0k
      iconv_t cd;
407
11.0k
      char *result;
408
409
      /* Avoid glibc-2.1 bug with EUC-KR.  */
410
# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
411
     && !defined _LIBICONV_VERSION
412
      if (c_strcasecmp (from_codeset, "EUC-KR") == 0
413
          || c_strcasecmp (to_codeset, "EUC-KR") == 0)
414
        {
415
          errno = EINVAL;
416
          return NULL;
417
        }
418
# endif
419
11.0k
      cd = iconv_open (to_codeset, from_codeset);
420
11.0k
      if (cd == (iconv_t) -1)
421
0
        return NULL;
422
423
11.0k
      result = str_cd_iconv (src, cd);
424
425
11.0k
      if (result == NULL)
426
5.72k
        {
427
          /* Close cd, but preserve the errno from str_cd_iconv.  */
428
5.72k
          int saved_errno = errno;
429
5.72k
          iconv_close (cd);
430
5.72k
          errno = saved_errno;
431
5.72k
        }
432
5.31k
      else
433
5.31k
        {
434
5.31k
          if (iconv_close (cd) < 0)
435
0
            {
436
0
              free (result);
437
0
              return NULL;
438
0
            }
439
5.31k
        }
440
11.0k
      return result;
441
#else
442
      /* This is a different error code than if iconv_open existed but didn't
443
         support from_codeset and to_codeset, so that the caller can emit
444
         an error message such as
445
           "iconv() is not supported. Installing GNU libiconv and
446
            then reinstalling this package would fix this."  */
447
      errno = ENOSYS;
448
      return NULL;
449
#endif
450
11.0k
    }
451
11.3k
}