Coverage Report

Created: 2023-03-26 07:10

/src/libidn/lib/gl/striconv.c
Line
Count
Source (jump to first uncovered line)
1
/* Charset conversion.
2
   Copyright (C) 2001-2007, 2010-2023 Free Software Foundation, Inc.
3
   Written by Bruno Haible and Simon Josefsson.
4
5
   This file is free software: you can redistribute it and/or modify
6
   it under the terms of the GNU Lesser General Public License as
7
   published by the Free Software Foundation; either version 2.1 of the
8
   License, or (at your option) any later version.
9
10
   This file is distributed in the hope that it will be useful,
11
   but WITHOUT ANY WARRANTY; without even the implied warranty of
12
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13
   GNU Lesser General Public License for more details.
14
15
   You should have received a copy of the GNU Lesser General Public License
16
   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
17
18
#include <config.h>
19
20
/* Specification.  */
21
#include "striconv.h"
22
23
#include <errno.h>
24
#include <stdlib.h>
25
#include <string.h>
26
27
#if HAVE_ICONV
28
# include <iconv.h>
29
/* Get MB_LEN_MAX, CHAR_BIT.  */
30
# include <limits.h>
31
#endif
32
33
#include "c-strcase.h"
34
35
#ifndef SIZE_MAX
36
2.56k
# define SIZE_MAX ((size_t) -1)
37
#endif
38
39
40
#if HAVE_ICONV
41
42
int
43
mem_cd_iconv (const char *src, size_t srclen, iconv_t cd,
44
              char **resultp, size_t *lengthp)
45
0
{
46
0
# define tmpbufsize 4096
47
0
  size_t length;
48
0
  char *result;
49
50
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
51
0
# if defined _LIBICONV_VERSION \
52
0
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
53
0
          || defined __sun)
54
  /* Set to the initial state.  */
55
0
  iconv (cd, NULL, NULL, NULL, NULL);
56
0
# endif
57
58
  /* Determine the length we need.  */
59
0
  {
60
0
    size_t count = 0;
61
    /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
62
       libiconv's UCS-4-INTERNAL encoding.  */
63
0
    union { unsigned int align; char buf[tmpbufsize]; } tmp;
64
0
# define tmpbuf tmp.buf
65
0
    const char *inptr = src;
66
0
    size_t insize = srclen;
67
68
0
    while (insize > 0)
69
0
      {
70
0
        char *outptr = tmpbuf;
71
0
        size_t outsize = tmpbufsize;
72
0
        size_t res = iconv (cd,
73
0
                            (ICONV_CONST char **) &inptr, &insize,
74
0
                            &outptr, &outsize);
75
76
0
        if (res == (size_t)(-1))
77
0
          {
78
0
            if (errno == E2BIG)
79
0
              ;
80
0
            else if (errno == EINVAL)
81
0
              break;
82
0
            else
83
0
              return -1;
84
0
          }
85
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
86
        /* Irix iconv() inserts a NUL byte if it cannot convert.
87
           NetBSD iconv() inserts a question mark if it cannot convert.
88
           Only GNU libiconv and GNU libc are known to prefer to fail rather
89
           than doing a lossy conversion.  */
90
        else if (res > 0)
91
          {
92
            errno = EILSEQ;
93
            return -1;
94
          }
95
# endif
96
0
        count += outptr - tmpbuf;
97
0
      }
98
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
99
0
# if defined _LIBICONV_VERSION \
100
0
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
101
0
          || defined __sun)
102
0
    {
103
0
      char *outptr = tmpbuf;
104
0
      size_t outsize = tmpbufsize;
105
0
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
106
107
0
      if (res == (size_t)(-1))
108
0
        return -1;
109
0
      count += outptr - tmpbuf;
110
0
    }
111
0
# endif
112
0
    length = count;
113
0
# undef tmpbuf
114
0
  }
115
116
0
  if (length == 0)
117
0
    {
118
0
      *lengthp = 0;
119
0
      return 0;
120
0
    }
121
0
  if (*resultp != NULL && *lengthp >= length)
122
0
    result = *resultp;
123
0
  else
124
0
    {
125
0
      result = (char *) malloc (length);
126
0
      if (result == NULL)
127
0
        {
128
0
          errno = ENOMEM;
129
0
          return -1;
130
0
        }
131
0
    }
132
133
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
134
0
# if defined _LIBICONV_VERSION \
135
0
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
136
0
          || defined __sun)
137
  /* Return to the initial state.  */
138
0
  iconv (cd, NULL, NULL, NULL, NULL);
139
0
# endif
140
141
  /* Do the conversion for real.  */
142
0
  {
143
0
    const char *inptr = src;
144
0
    size_t insize = srclen;
145
0
    char *outptr = result;
146
0
    size_t outsize = length;
147
148
0
    while (insize > 0)
149
0
      {
150
0
        size_t res = iconv (cd,
151
0
                            (ICONV_CONST char **) &inptr, &insize,
152
0
                            &outptr, &outsize);
153
154
0
        if (res == (size_t)(-1))
155
0
          {
156
0
            if (errno == EINVAL)
157
0
              break;
158
0
            else
159
0
              goto fail;
160
0
          }
161
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
162
        /* Irix iconv() inserts a NUL byte if it cannot convert.
163
           NetBSD iconv() inserts a question mark if it cannot convert.
164
           Only GNU libiconv and GNU libc are known to prefer to fail rather
165
           than doing a lossy conversion.  */
166
        else if (res > 0)
167
          {
168
            errno = EILSEQ;
169
            goto fail;
170
          }
171
# endif
172
0
      }
173
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
174
0
# if defined _LIBICONV_VERSION \
175
0
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
176
0
          || defined __sun)
177
0
    {
178
0
      size_t res = iconv (cd, NULL, NULL, &outptr, &outsize);
179
180
0
      if (res == (size_t)(-1))
181
0
        goto fail;
182
0
    }
183
0
# endif
184
0
    if (outsize != 0)
185
0
      abort ();
186
0
  }
187
188
0
  *resultp = result;
189
0
  *lengthp = length;
190
191
0
  return 0;
192
193
0
 fail:
194
0
  {
195
0
    if (result != *resultp)
196
0
      free (result);
197
0
    return -1;
198
0
  }
199
0
# undef tmpbufsize
200
0
}
201
202
char *
203
str_cd_iconv (const char *src, iconv_t cd)
204
2.56k
{
205
  /* For most encodings, a trailing NUL byte in the input will be converted
206
     to a trailing NUL byte in the output.  But not for UTF-7.  So that this
207
     function is usable for UTF-7, we have to exclude the NUL byte from the
208
     conversion and add it by hand afterwards.  */
209
# if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
210
  /* Irix iconv() inserts a NUL byte if it cannot convert.
211
     NetBSD iconv() inserts a question mark if it cannot convert.
212
     Only GNU libiconv and GNU libc are known to prefer to fail rather
213
     than doing a lossy conversion.  For other iconv() implementations,
214
     we have to look at the number of irreversible conversions returned;
215
     but this information is lost when iconv() returns for an E2BIG reason.
216
     Therefore we cannot use the second, faster algorithm.  */
217
218
  char *result = NULL;
219
  size_t length = 0;
220
  int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length);
221
  char *final_result;
222
223
  if (retval < 0)
224
    {
225
      if (result != NULL)
226
        abort ();
227
      return NULL;
228
    }
229
230
  /* Add the terminating NUL byte.  */
231
  final_result =
232
    (result != NULL ? realloc (result, length + 1) : malloc (length + 1));
233
  if (final_result == NULL)
234
    {
235
      free (result);
236
      errno = ENOMEM;
237
      return NULL;
238
    }
239
  final_result[length] = '\0';
240
241
  return final_result;
242
243
# else
244
  /* This algorithm is likely faster than the one above.  But it may produce
245
     iconv() returns for an E2BIG reason, when the output size guess is too
246
     small.  Therefore it can only be used when we don't need the number of
247
     irreversible conversions performed.  */
248
2.56k
  char *result;
249
2.56k
  size_t result_size;
250
2.56k
  size_t length;
251
2.56k
  const char *inptr = src;
252
2.56k
  size_t inbytes_remaining = strlen (src);
253
254
  /* Make a guess for the worst-case output size, in order to avoid a
255
     realloc.  It's OK if the guess is wrong as long as it is not zero and
256
     doesn't lead to an integer overflow.  */
257
2.56k
  result_size = inbytes_remaining;
258
2.56k
  {
259
2.56k
    size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2);
260
2.56k
    if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX)
261
2.56k
      result_size *= MB_LEN_MAX;
262
2.56k
  }
263
2.56k
  result_size += 1; /* for the terminating NUL */
264
265
2.56k
  result = (char *) malloc (result_size);
266
2.56k
  if (result == NULL)
267
0
    {
268
0
      errno = ENOMEM;
269
0
      return NULL;
270
0
    }
271
272
  /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */
273
2.56k
# if defined _LIBICONV_VERSION \
274
2.56k
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
275
2.56k
          || defined __sun)
276
  /* Set to the initial state.  */
277
2.56k
  iconv (cd, NULL, NULL, NULL, NULL);
278
2.56k
# endif
279
280
  /* Do the conversion.  */
281
2.56k
  {
282
2.56k
    char *outptr = result;
283
2.56k
    size_t outbytes_remaining = result_size - 1;
284
285
2.56k
    for (;;)
286
2.56k
      {
287
        /* Here inptr + inbytes_remaining = src + strlen (src),
288
                outptr + outbytes_remaining = result + result_size - 1.  */
289
2.56k
        size_t res = iconv (cd,
290
2.56k
                            (ICONV_CONST char **) &inptr, &inbytes_remaining,
291
2.56k
                            &outptr, &outbytes_remaining);
292
293
2.56k
        if (res == (size_t)(-1))
294
2.12k
          {
295
2.12k
            if (errno == EINVAL)
296
0
              break;
297
2.12k
            else if (errno == E2BIG)
298
0
              {
299
0
                size_t used = outptr - result;
300
0
                size_t newsize = result_size * 2;
301
0
                char *newresult;
302
303
0
                if (!(newsize > result_size))
304
0
                  {
305
0
                    errno = ENOMEM;
306
0
                    goto failed;
307
0
                  }
308
0
                newresult = (char *) realloc (result, newsize);
309
0
                if (newresult == NULL)
310
0
                  {
311
0
                    errno = ENOMEM;
312
0
                    goto failed;
313
0
                  }
314
0
                result = newresult;
315
0
                result_size = newsize;
316
0
                outptr = result + used;
317
0
                outbytes_remaining = result_size - 1 - used;
318
0
              }
319
2.12k
            else
320
2.12k
              goto failed;
321
2.12k
          }
322
436
        else
323
436
          break;
324
2.56k
      }
325
    /* Avoid glibc-2.1 bug and Solaris 2.7 bug.  */
326
436
# if defined _LIBICONV_VERSION \
327
436
     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
328
436
          || defined __sun)
329
436
    for (;;)
330
436
      {
331
        /* Here outptr + outbytes_remaining = result + result_size - 1.  */
332
436
        size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining);
333
334
436
        if (res == (size_t)(-1))
335
0
          {
336
0
            if (errno == E2BIG)
337
0
              {
338
0
                size_t used = outptr - result;
339
0
                size_t newsize = result_size * 2;
340
0
                char *newresult;
341
342
0
                if (!(newsize > result_size))
343
0
                  {
344
0
                    errno = ENOMEM;
345
0
                    goto failed;
346
0
                  }
347
0
                newresult = (char *) realloc (result, newsize);
348
0
                if (newresult == NULL)
349
0
                  {
350
0
                    errno = ENOMEM;
351
0
                    goto failed;
352
0
                  }
353
0
                result = newresult;
354
0
                result_size = newsize;
355
0
                outptr = result + used;
356
0
                outbytes_remaining = result_size - 1 - used;
357
0
              }
358
0
            else
359
0
              goto failed;
360
0
          }
361
436
        else
362
436
          break;
363
436
      }
364
436
# endif
365
366
    /* Add the terminating NUL byte.  */
367
436
    *outptr++ = '\0';
368
369
436
    length = outptr - result;
370
436
  }
371
372
  /* Give away unused memory.  */
373
436
  if (length < result_size)
374
436
    {
375
436
      char *smaller_result = (char *) realloc (result, length);
376
377
436
      if (smaller_result != NULL)
378
436
        result = smaller_result;
379
436
    }
380
381
436
  return result;
382
383
2.12k
 failed:
384
2.12k
  free (result);
385
2.12k
  return NULL;
386
387
436
# endif
388
436
}
389
390
#endif
391
392
char *
393
str_iconv (const char *src, const char *from_codeset, const char *to_codeset)
394
2.67k
{
395
2.67k
  if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
396
107
    {
397
107
      char *result = strdup (src);
398
399
107
      if (result == NULL)
400
0
        errno = ENOMEM;
401
107
      return result;
402
107
    }
403
2.56k
  else
404
2.56k
    {
405
2.56k
#if HAVE_ICONV
406
2.56k
      iconv_t cd;
407
2.56k
      char *result;
408
409
      /* Avoid glibc-2.1 bug with EUC-KR.  */
410
# if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
411
     && !defined _LIBICONV_VERSION
412
      if (c_strcasecmp (from_codeset, "EUC-KR") == 0
413
          || c_strcasecmp (to_codeset, "EUC-KR") == 0)
414
        {
415
          errno = EINVAL;
416
          return NULL;
417
        }
418
# endif
419
2.56k
      cd = iconv_open (to_codeset, from_codeset);
420
2.56k
      if (cd == (iconv_t) -1)
421
0
        return NULL;
422
423
2.56k
      result = str_cd_iconv (src, cd);
424
425
2.56k
      if (result == NULL)
426
2.12k
        {
427
          /* Close cd, but preserve the errno from str_cd_iconv.  */
428
2.12k
          int saved_errno = errno;
429
2.12k
          iconv_close (cd);
430
2.12k
          errno = saved_errno;
431
2.12k
        }
432
436
      else
433
436
        {
434
436
          if (iconv_close (cd) < 0)
435
0
            {
436
0
              free (result);
437
0
              return NULL;
438
0
            }
439
436
        }
440
2.56k
      return result;
441
#else
442
      /* This is a different error code than if iconv_open existed but didn't
443
         support from_codeset and to_codeset, so that the caller can emit
444
         an error message such as
445
           "iconv() is not supported. Installing GNU libiconv and
446
            then reinstalling this package would fix this."  */
447
      errno = ENOSYS;
448
      return NULL;
449
#endif
450
2.56k
    }
451
2.67k
}