/src/libidn/lib/gl/striconv.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Charset conversion. |
2 | | Copyright (C) 2001-2007, 2010-2024 Free Software Foundation, Inc. |
3 | | Written by Bruno Haible and Simon Josefsson. |
4 | | |
5 | | This file is free software: you can redistribute it and/or modify |
6 | | it under the terms of the GNU Lesser General Public License as |
7 | | published by the Free Software Foundation; either version 2.1 of the |
8 | | License, or (at your option) any later version. |
9 | | |
10 | | This file is distributed in the hope that it will be useful, |
11 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 | | GNU Lesser General Public License for more details. |
14 | | |
15 | | You should have received a copy of the GNU Lesser General Public License |
16 | | along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
17 | | |
18 | | #include <config.h> |
19 | | |
20 | | /* Specification. */ |
21 | | #include "striconv.h" |
22 | | |
23 | | #include <errno.h> |
24 | | #include <stdlib.h> |
25 | | #include <string.h> |
26 | | |
27 | | #if HAVE_ICONV |
28 | | # include <iconv.h> |
29 | | /* Get MB_LEN_MAX, CHAR_BIT. */ |
30 | | # include <limits.h> |
31 | | #endif |
32 | | |
33 | | #include "c-strcase.h" |
34 | | |
35 | | #ifndef SIZE_MAX |
36 | 0 | # define SIZE_MAX ((size_t) -1) |
37 | | #endif |
38 | | |
39 | | |
40 | | #if HAVE_ICONV |
41 | | |
42 | | int |
43 | | mem_cd_iconv (const char *src, size_t srclen, iconv_t cd, |
44 | | char **resultp, size_t *lengthp) |
45 | 0 | { |
46 | 0 | # define tmpbufsize 4096 |
47 | 0 | size_t length; |
48 | 0 | char *result; |
49 | | |
50 | | /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ |
51 | 0 | # if defined _LIBICONV_VERSION \ |
52 | 0 | || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ |
53 | 0 | || defined __sun) |
54 | | /* Set to the initial state. */ |
55 | 0 | iconv (cd, NULL, NULL, NULL, NULL); |
56 | 0 | # endif |
57 | | |
58 | | /* Determine the length we need. */ |
59 | 0 | { |
60 | 0 | size_t count = 0; |
61 | | /* The alignment is needed when converting e.g. to glibc's WCHAR_T or |
62 | | libiconv's UCS-4-INTERNAL encoding. */ |
63 | 0 | union { unsigned int align; char buf[tmpbufsize]; } tmp; |
64 | 0 | # define tmpbuf tmp.buf |
65 | 0 | const char *inptr = src; |
66 | 0 | size_t insize = srclen; |
67 | |
|
68 | 0 | while (insize > 0) |
69 | 0 | { |
70 | 0 | char *outptr = tmpbuf; |
71 | 0 | size_t outsize = tmpbufsize; |
72 | 0 | size_t res = iconv (cd, |
73 | 0 | (ICONV_CONST char **) &inptr, &insize, |
74 | 0 | &outptr, &outsize); |
75 | |
|
76 | 0 | if (res == (size_t)(-1)) |
77 | 0 | { |
78 | 0 | if (errno == E2BIG) |
79 | 0 | ; |
80 | 0 | else if (errno == EINVAL) |
81 | 0 | break; |
82 | 0 | else |
83 | 0 | return -1; |
84 | 0 | } |
85 | | # if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \ |
86 | | && !(defined __GLIBC__ && !defined __UCLIBC__) |
87 | | /* Irix iconv() inserts a NUL byte if it cannot convert. |
88 | | NetBSD iconv() inserts a question mark if it cannot convert. |
89 | | Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc |
90 | | are known to prefer to fail rather than doing a lossy conversion. */ |
91 | | else if (res > 0) |
92 | | { |
93 | | errno = EILSEQ; |
94 | | return -1; |
95 | | } |
96 | | # endif |
97 | 0 | count += outptr - tmpbuf; |
98 | 0 | } |
99 | | /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ |
100 | 0 | # if defined _LIBICONV_VERSION \ |
101 | 0 | || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ |
102 | 0 | || defined __sun) |
103 | 0 | { |
104 | 0 | char *outptr = tmpbuf; |
105 | 0 | size_t outsize = tmpbufsize; |
106 | 0 | size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); |
107 | |
|
108 | 0 | if (res == (size_t)(-1)) |
109 | 0 | return -1; |
110 | 0 | count += outptr - tmpbuf; |
111 | 0 | } |
112 | 0 | # endif |
113 | 0 | length = count; |
114 | 0 | # undef tmpbuf |
115 | 0 | } |
116 | | |
117 | 0 | if (length == 0) |
118 | 0 | { |
119 | 0 | *lengthp = 0; |
120 | 0 | return 0; |
121 | 0 | } |
122 | 0 | if (*resultp != NULL && *lengthp >= length) |
123 | 0 | result = *resultp; |
124 | 0 | else |
125 | 0 | { |
126 | 0 | result = (char *) malloc (length); |
127 | 0 | if (result == NULL) |
128 | 0 | { |
129 | 0 | errno = ENOMEM; |
130 | 0 | return -1; |
131 | 0 | } |
132 | 0 | } |
133 | | |
134 | | /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ |
135 | 0 | # if defined _LIBICONV_VERSION \ |
136 | 0 | || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ |
137 | 0 | || defined __sun) |
138 | | /* Return to the initial state. */ |
139 | 0 | iconv (cd, NULL, NULL, NULL, NULL); |
140 | 0 | # endif |
141 | | |
142 | | /* Do the conversion for real. */ |
143 | 0 | { |
144 | 0 | const char *inptr = src; |
145 | 0 | size_t insize = srclen; |
146 | 0 | char *outptr = result; |
147 | 0 | size_t outsize = length; |
148 | |
|
149 | 0 | while (insize > 0) |
150 | 0 | { |
151 | 0 | size_t res = iconv (cd, |
152 | 0 | (ICONV_CONST char **) &inptr, &insize, |
153 | 0 | &outptr, &outsize); |
154 | |
|
155 | 0 | if (res == (size_t)(-1)) |
156 | 0 | { |
157 | 0 | if (errno == EINVAL) |
158 | 0 | break; |
159 | 0 | else |
160 | 0 | goto fail; |
161 | 0 | } |
162 | | # if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \ |
163 | | && !(defined __GLIBC__ && !defined __UCLIBC__) |
164 | | /* Irix iconv() inserts a NUL byte if it cannot convert. |
165 | | NetBSD iconv() inserts a question mark if it cannot convert. |
166 | | Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc |
167 | | are known to prefer to fail rather than doing a lossy conversion. */ |
168 | | else if (res > 0) |
169 | | { |
170 | | errno = EILSEQ; |
171 | | goto fail; |
172 | | } |
173 | | # endif |
174 | 0 | } |
175 | | /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ |
176 | 0 | # if defined _LIBICONV_VERSION \ |
177 | 0 | || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ |
178 | 0 | || defined __sun) |
179 | 0 | { |
180 | 0 | size_t res = iconv (cd, NULL, NULL, &outptr, &outsize); |
181 | |
|
182 | 0 | if (res == (size_t)(-1)) |
183 | 0 | goto fail; |
184 | 0 | } |
185 | 0 | # endif |
186 | 0 | if (outsize != 0) |
187 | 0 | abort (); |
188 | 0 | } |
189 | | |
190 | 0 | *resultp = result; |
191 | 0 | *lengthp = length; |
192 | |
|
193 | 0 | return 0; |
194 | | |
195 | 0 | fail: |
196 | 0 | { |
197 | 0 | if (result != *resultp) |
198 | 0 | free (result); |
199 | 0 | return -1; |
200 | 0 | } |
201 | 0 | # undef tmpbufsize |
202 | 0 | } |
203 | | |
204 | | char * |
205 | | str_cd_iconv (const char *src, iconv_t cd) |
206 | 0 | { |
207 | | /* For most encodings, a trailing NUL byte in the input will be converted |
208 | | to a trailing NUL byte in the output. But not for UTF-7. So that this |
209 | | function is usable for UTF-7, we have to exclude the NUL byte from the |
210 | | conversion and add it by hand afterwards. */ |
211 | | # if !(defined _LIBICONV_VERSION && !(_LIBICONV_VERSION == 0x10b && defined __APPLE__)) \ |
212 | | && !(defined __GLIBC__ && !defined __UCLIBC__) |
213 | | /* Irix iconv() inserts a NUL byte if it cannot convert. |
214 | | NetBSD iconv() inserts a question mark if it cannot convert. |
215 | | Only GNU libiconv (excluding the bastard Apple iconv) and GNU libc are |
216 | | known to prefer to fail rather than doing a lossy conversion. For other |
217 | | iconv() implementations, we have to look at the number of irreversible |
218 | | conversions returned; but this information is lost when iconv() returns |
219 | | for an E2BIG reason. Therefore we cannot use the second, faster |
220 | | algorithm. */ |
221 | | |
222 | | char *result = NULL; |
223 | | size_t length = 0; |
224 | | int retval = mem_cd_iconv (src, strlen (src), cd, &result, &length); |
225 | | char *final_result; |
226 | | |
227 | | if (retval < 0) |
228 | | { |
229 | | if (result != NULL) |
230 | | abort (); |
231 | | return NULL; |
232 | | } |
233 | | |
234 | | /* Add the terminating NUL byte. */ |
235 | | final_result = |
236 | | (result != NULL ? realloc (result, length + 1) : malloc (length + 1)); |
237 | | if (final_result == NULL) |
238 | | { |
239 | | free (result); |
240 | | errno = ENOMEM; |
241 | | return NULL; |
242 | | } |
243 | | final_result[length] = '\0'; |
244 | | |
245 | | return final_result; |
246 | | |
247 | | # else |
248 | | /* This algorithm is likely faster than the one above. But it may produce |
249 | | iconv() returns for an E2BIG reason, when the output size guess is too |
250 | | small. Therefore it can only be used when we don't need the number of |
251 | | irreversible conversions performed. */ |
252 | 0 | char *result; |
253 | 0 | size_t result_size; |
254 | 0 | size_t length; |
255 | 0 | const char *inptr = src; |
256 | 0 | size_t inbytes_remaining = strlen (src); |
257 | | |
258 | | /* Make a guess for the worst-case output size, in order to avoid a |
259 | | realloc. It's OK if the guess is wrong as long as it is not zero and |
260 | | doesn't lead to an integer overflow. */ |
261 | 0 | result_size = inbytes_remaining; |
262 | 0 | { |
263 | 0 | size_t approx_sqrt_SIZE_MAX = SIZE_MAX >> (sizeof (size_t) * CHAR_BIT / 2); |
264 | 0 | if (result_size <= approx_sqrt_SIZE_MAX / MB_LEN_MAX) |
265 | 0 | result_size *= MB_LEN_MAX; |
266 | 0 | } |
267 | 0 | result_size += 1; /* for the terminating NUL */ |
268 | |
|
269 | 0 | result = (char *) malloc (result_size); |
270 | 0 | if (result == NULL) |
271 | 0 | { |
272 | 0 | errno = ENOMEM; |
273 | 0 | return NULL; |
274 | 0 | } |
275 | | |
276 | | /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ |
277 | 0 | # if defined _LIBICONV_VERSION \ |
278 | 0 | || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ |
279 | 0 | || defined __sun) |
280 | | /* Set to the initial state. */ |
281 | 0 | iconv (cd, NULL, NULL, NULL, NULL); |
282 | 0 | # endif |
283 | | |
284 | | /* Do the conversion. */ |
285 | 0 | { |
286 | 0 | char *outptr = result; |
287 | 0 | size_t outbytes_remaining = result_size - 1; |
288 | |
|
289 | 0 | for (;;) |
290 | 0 | { |
291 | | /* Here inptr + inbytes_remaining = src + strlen (src), |
292 | | outptr + outbytes_remaining = result + result_size - 1. */ |
293 | 0 | size_t res = iconv (cd, |
294 | 0 | (ICONV_CONST char **) &inptr, &inbytes_remaining, |
295 | 0 | &outptr, &outbytes_remaining); |
296 | |
|
297 | 0 | if (res == (size_t)(-1)) |
298 | 0 | { |
299 | 0 | if (errno == EINVAL) |
300 | 0 | break; |
301 | 0 | else if (errno == E2BIG) |
302 | 0 | { |
303 | 0 | size_t used = outptr - result; |
304 | 0 | size_t newsize = result_size * 2; |
305 | 0 | char *newresult; |
306 | |
|
307 | 0 | if (!(newsize > result_size)) |
308 | 0 | { |
309 | 0 | errno = ENOMEM; |
310 | 0 | goto failed; |
311 | 0 | } |
312 | 0 | newresult = (char *) realloc (result, newsize); |
313 | 0 | if (newresult == NULL) |
314 | 0 | { |
315 | 0 | errno = ENOMEM; |
316 | 0 | goto failed; |
317 | 0 | } |
318 | 0 | result = newresult; |
319 | 0 | result_size = newsize; |
320 | 0 | outptr = result + used; |
321 | 0 | outbytes_remaining = result_size - 1 - used; |
322 | 0 | } |
323 | 0 | else |
324 | 0 | goto failed; |
325 | 0 | } |
326 | 0 | else |
327 | 0 | break; |
328 | 0 | } |
329 | | /* Avoid glibc-2.1 bug and Solaris 2.7 bug. */ |
330 | 0 | # if defined _LIBICONV_VERSION \ |
331 | 0 | || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ |
332 | 0 | || defined __sun) |
333 | 0 | for (;;) |
334 | 0 | { |
335 | | /* Here outptr + outbytes_remaining = result + result_size - 1. */ |
336 | 0 | size_t res = iconv (cd, NULL, NULL, &outptr, &outbytes_remaining); |
337 | |
|
338 | 0 | if (res == (size_t)(-1)) |
339 | 0 | { |
340 | 0 | if (errno == E2BIG) |
341 | 0 | { |
342 | 0 | size_t used = outptr - result; |
343 | 0 | size_t newsize = result_size * 2; |
344 | 0 | char *newresult; |
345 | |
|
346 | 0 | if (!(newsize > result_size)) |
347 | 0 | { |
348 | 0 | errno = ENOMEM; |
349 | 0 | goto failed; |
350 | 0 | } |
351 | 0 | newresult = (char *) realloc (result, newsize); |
352 | 0 | if (newresult == NULL) |
353 | 0 | { |
354 | 0 | errno = ENOMEM; |
355 | 0 | goto failed; |
356 | 0 | } |
357 | 0 | result = newresult; |
358 | 0 | result_size = newsize; |
359 | 0 | outptr = result + used; |
360 | 0 | outbytes_remaining = result_size - 1 - used; |
361 | 0 | } |
362 | 0 | else |
363 | 0 | goto failed; |
364 | 0 | } |
365 | 0 | else |
366 | 0 | break; |
367 | 0 | } |
368 | 0 | # endif |
369 | | |
370 | | /* Add the terminating NUL byte. */ |
371 | 0 | *outptr++ = '\0'; |
372 | |
|
373 | 0 | length = outptr - result; |
374 | 0 | } |
375 | | |
376 | | /* Give away unused memory. */ |
377 | 0 | if (length < result_size) |
378 | 0 | { |
379 | 0 | char *smaller_result = (char *) realloc (result, length); |
380 | |
|
381 | 0 | if (smaller_result != NULL) |
382 | 0 | result = smaller_result; |
383 | 0 | } |
384 | |
|
385 | 0 | return result; |
386 | | |
387 | 0 | failed: |
388 | 0 | free (result); |
389 | 0 | return NULL; |
390 | |
|
391 | 0 | # endif |
392 | 0 | } |
393 | | |
394 | | #endif |
395 | | |
396 | | char * |
397 | | str_iconv (const char *src, const char *from_codeset, const char *to_codeset) |
398 | 0 | { |
399 | 0 | if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) |
400 | 0 | { |
401 | 0 | char *result = strdup (src); |
402 | |
|
403 | 0 | if (result == NULL) |
404 | 0 | errno = ENOMEM; |
405 | 0 | return result; |
406 | 0 | } |
407 | 0 | else |
408 | 0 | { |
409 | 0 | #if HAVE_ICONV |
410 | 0 | iconv_t cd; |
411 | 0 | char *result; |
412 | | |
413 | | /* Avoid glibc-2.1 bug with EUC-KR. */ |
414 | | # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ |
415 | | && !defined _LIBICONV_VERSION |
416 | | if (c_strcasecmp (from_codeset, "EUC-KR") == 0 |
417 | | || c_strcasecmp (to_codeset, "EUC-KR") == 0) |
418 | | { |
419 | | errno = EINVAL; |
420 | | return NULL; |
421 | | } |
422 | | # endif |
423 | 0 | cd = iconv_open (to_codeset, from_codeset); |
424 | 0 | if (cd == (iconv_t) -1) |
425 | 0 | return NULL; |
426 | | |
427 | 0 | result = str_cd_iconv (src, cd); |
428 | |
|
429 | 0 | if (result == NULL) |
430 | 0 | { |
431 | | /* Close cd, but preserve the errno from str_cd_iconv. */ |
432 | 0 | int saved_errno = errno; |
433 | 0 | iconv_close (cd); |
434 | 0 | errno = saved_errno; |
435 | 0 | } |
436 | 0 | else |
437 | 0 | { |
438 | 0 | if (iconv_close (cd) < 0) |
439 | 0 | { |
440 | 0 | free (result); |
441 | 0 | return NULL; |
442 | 0 | } |
443 | 0 | } |
444 | 0 | return result; |
445 | | #else |
446 | | /* This is a different error code than if iconv_open existed but didn't |
447 | | support from_codeset and to_codeset, so that the caller can emit |
448 | | an error message such as |
449 | | "iconv() is not supported. Installing GNU libiconv and |
450 | | then reinstalling this package would fix this." */ |
451 | | errno = ENOSYS; |
452 | | return NULL; |
453 | | #endif |
454 | 0 | } |
455 | 0 | } |