/src/libunistring/lib/striconveh.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /* Character set conversion with error handling.  | 
2  |  |    Copyright (C) 2001-2023 Free Software Foundation, Inc.  | 
3  |  |    Written by Bruno Haible and Simon Josefsson.  | 
4  |  |  | 
5  |  |    This file is free software: you can redistribute it and/or modify  | 
6  |  |    it under the terms of the GNU Lesser General Public License as  | 
7  |  |    published by the Free Software Foundation; either version 2.1 of the  | 
8  |  |    License, or (at your option) any later version.  | 
9  |  |  | 
10  |  |    This file is distributed in the hope that it will be useful,  | 
11  |  |    but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
12  |  |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  | 
13  |  |    GNU Lesser General Public License for more details.  | 
14  |  |  | 
15  |  |    You should have received a copy of the GNU Lesser General Public License  | 
16  |  |    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */  | 
17  |  |  | 
18  |  | #include <config.h>  | 
19  |  |  | 
20  |  | /* Specification.  */  | 
21  |  | #include "striconveh.h"  | 
22  |  |  | 
23  |  | #include <errno.h>  | 
24  |  | #include <stdlib.h>  | 
25  |  | #include <string.h>  | 
26  |  |  | 
27  |  | #if HAVE_ICONV  | 
28  |  | # include <iconv.h>  | 
29  |  | # include "unistr.h"  | 
30  |  | #endif  | 
31  |  |  | 
32  |  | #include "c-strcase.h"  | 
33  |  | #include "c-strcaseeq.h"  | 
34  |  |  | 
35  |  | #ifndef SIZE_MAX  | 
36  |  | # define SIZE_MAX ((size_t) -1)  | 
37  |  | #endif  | 
38  |  |  | 
39  |  |  | 
40  |  | #if HAVE_ICONV  | 
41  |  |  | 
42  |  | /* The caller must provide an iconveh_t, not just an iconv_t, because when a  | 
43  |  |    conversion error occurs, we may have to determine the Unicode representation  | 
44  |  |    of the inconvertible character.  */  | 
45  |  |  | 
46  |  | int  | 
47  |  | iconveh_open (const char *to_codeset, const char *from_codeset, iconveh_t *cdp)  | 
48  | 0  | { | 
49  | 0  |   iconv_t cd;  | 
50  | 0  |   iconv_t cd1;  | 
51  | 0  |   iconv_t cd2;  | 
52  |  |  | 
53  |  |   /* Avoid glibc-2.1 bug with EUC-KR.  */  | 
54  |  | # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \  | 
55  |  |      && !defined _LIBICONV_VERSION  | 
56  |  |   if (c_strcasecmp (from_codeset, "EUC-KR") == 0  | 
57  |  |       || c_strcasecmp (to_codeset, "EUC-KR") == 0)  | 
58  |  |     { | 
59  |  |       errno = EINVAL;  | 
60  |  |       return -1;  | 
61  |  |     }  | 
62  |  | # endif  | 
63  |  | 
  | 
64  | 0  |   cd = iconv_open (to_codeset, from_codeset);  | 
65  |  | 
  | 
66  | 0  |   if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))  | 
67  | 0  |     cd1 = (iconv_t)(-1);  | 
68  | 0  |   else  | 
69  | 0  |     { | 
70  | 0  |       cd1 = iconv_open ("UTF-8", from_codeset); | 
71  | 0  |       if (cd1 == (iconv_t)(-1))  | 
72  | 0  |         { | 
73  | 0  |           int saved_errno = errno;  | 
74  | 0  |           if (cd != (iconv_t)(-1))  | 
75  | 0  |             iconv_close (cd);  | 
76  | 0  |           errno = saved_errno;  | 
77  | 0  |           return -1;  | 
78  | 0  |         }  | 
79  | 0  |     }  | 
80  |  |  | 
81  | 0  |   if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)  | 
82  | 0  | # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \  | 
83  | 0  |       && !defined __UCLIBC__) \  | 
84  | 0  |      || _LIBICONV_VERSION >= 0x0105  | 
85  | 0  |       || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0  | 
86  | 0  | # endif  | 
87  | 0  |      )  | 
88  | 0  |     cd2 = (iconv_t)(-1);  | 
89  | 0  |   else  | 
90  | 0  |     { | 
91  | 0  |       cd2 = iconv_open (to_codeset, "UTF-8");  | 
92  | 0  |       if (cd2 == (iconv_t)(-1))  | 
93  | 0  |         { | 
94  | 0  |           int saved_errno = errno;  | 
95  | 0  |           if (cd1 != (iconv_t)(-1))  | 
96  | 0  |             iconv_close (cd1);  | 
97  | 0  |           if (cd != (iconv_t)(-1))  | 
98  | 0  |             iconv_close (cd);  | 
99  | 0  |           errno = saved_errno;  | 
100  | 0  |           return -1;  | 
101  | 0  |         }  | 
102  | 0  |     }  | 
103  |  |  | 
104  | 0  |   cdp->cd = cd;  | 
105  | 0  |   cdp->cd1 = cd1;  | 
106  | 0  |   cdp->cd2 = cd2;  | 
107  | 0  |   return 0;  | 
108  | 0  | }  | 
109  |  |  | 
110  |  | int  | 
111  |  | iconveh_close (const iconveh_t *cd)  | 
112  | 0  | { | 
113  | 0  |   if (cd->cd2 != (iconv_t)(-1) && iconv_close (cd->cd2) < 0)  | 
114  | 0  |     { | 
115  |  |       /* Return -1, but preserve the errno from iconv_close.  */  | 
116  | 0  |       int saved_errno = errno;  | 
117  | 0  |       if (cd->cd1 != (iconv_t)(-1))  | 
118  | 0  |         iconv_close (cd->cd1);  | 
119  | 0  |       if (cd->cd != (iconv_t)(-1))  | 
120  | 0  |         iconv_close (cd->cd);  | 
121  | 0  |       errno = saved_errno;  | 
122  | 0  |       return -1;  | 
123  | 0  |     }  | 
124  | 0  |   if (cd->cd1 != (iconv_t)(-1) && iconv_close (cd->cd1) < 0)  | 
125  | 0  |     { | 
126  |  |       /* Return -1, but preserve the errno from iconv_close.  */  | 
127  | 0  |       int saved_errno = errno;  | 
128  | 0  |       if (cd->cd != (iconv_t)(-1))  | 
129  | 0  |         iconv_close (cd->cd);  | 
130  | 0  |       errno = saved_errno;  | 
131  | 0  |       return -1;  | 
132  | 0  |     }  | 
133  | 0  |   if (cd->cd != (iconv_t)(-1) && iconv_close (cd->cd) < 0)  | 
134  | 0  |     return -1;  | 
135  | 0  |   return 0;  | 
136  | 0  | }  | 
137  |  |  | 
138  |  | /* iconv_carefully is like iconv, except that it stops as soon as it encounters  | 
139  |  |    a conversion error, and it returns in *INCREMENTED a boolean telling whether  | 
140  |  |    it has incremented the input pointers past the error location.  */  | 
141  |  | # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)  | 
142  |  | /* Irix iconv() inserts a NUL byte if it cannot convert.  | 
143  |  |    NetBSD iconv() inserts a question mark if it cannot convert.  | 
144  |  |    Only GNU libiconv and GNU libc are known to prefer to fail rather  | 
145  |  |    than doing a lossy conversion.  */  | 
146  |  | static size_t  | 
147  |  | iconv_carefully (iconv_t cd,  | 
148  |  |                  const char **inbuf, size_t *inbytesleft,  | 
149  |  |                  char **outbuf, size_t *outbytesleft,  | 
150  |  |                  bool *incremented)  | 
151  |  | { | 
152  |  |   const char *inptr = *inbuf;  | 
153  |  |   const char *inptr_end = inptr + *inbytesleft;  | 
154  |  |   char *outptr = *outbuf;  | 
155  |  |   size_t outsize = *outbytesleft;  | 
156  |  |   const char *inptr_before;  | 
157  |  |   size_t res;  | 
158  |  |  | 
159  |  |   do  | 
160  |  |     { | 
161  |  |       size_t insize;  | 
162  |  |  | 
163  |  |       inptr_before = inptr;  | 
164  |  |       res = (size_t)(-1);  | 
165  |  |  | 
166  |  |       for (insize = 1; inptr + insize <= inptr_end; insize++)  | 
167  |  |         { | 
168  |  |           res = iconv (cd,  | 
169  |  |                        (ICONV_CONST char **) &inptr, &insize,  | 
170  |  |                        &outptr, &outsize);  | 
171  |  |           if (!(res == (size_t)(-1) && errno == EINVAL))  | 
172  |  |             break;  | 
173  |  |           /* iconv can eat up a shift sequence but give EINVAL while attempting  | 
174  |  |              to convert the first character.  E.g. libiconv does this.  */  | 
175  |  |           if (inptr > inptr_before)  | 
176  |  |             { | 
177  |  |               res = 0;  | 
178  |  |               break;  | 
179  |  |             }  | 
180  |  |         }  | 
181  |  |  | 
182  |  |       if (res == 0)  | 
183  |  |         { | 
184  |  |           *outbuf = outptr;  | 
185  |  |           *outbytesleft = outsize;  | 
186  |  |         }  | 
187  |  |     }  | 
188  |  |   while (res == 0 && inptr < inptr_end);  | 
189  |  |  | 
190  |  |   *inbuf = inptr;  | 
191  |  |   *inbytesleft = inptr_end - inptr;  | 
192  |  |   if (res != (size_t)(-1) && res > 0)  | 
193  |  |     { | 
194  |  |       /* iconv() has already incremented INPTR.  We cannot go back to a  | 
195  |  |          previous INPTR, otherwise the state inside CD would become invalid,  | 
196  |  |          if FROM_CODESET is a stateful encoding.  So, tell the caller that  | 
197  |  |          *INBUF has already been incremented.  */  | 
198  |  |       *incremented = (inptr > inptr_before);  | 
199  |  |       errno = EILSEQ;  | 
200  |  |       return (size_t)(-1);  | 
201  |  |     }  | 
202  |  |   else  | 
203  |  |     { | 
204  |  |       *incremented = false;  | 
205  |  |       return res;  | 
206  |  |     }  | 
207  |  | }  | 
208  |  | # else  | 
209  |  | #  define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \  | 
210  | 0  |      (*(incremented) = false, \  | 
211  | 0  |       iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))  | 
212  |  | # endif  | 
213  |  |  | 
214  |  | /* iconv_carefully_1 is like iconv_carefully, except that it stops after  | 
215  |  |    converting one character or one shift sequence.  */  | 
216  |  | static size_t  | 
217  |  | iconv_carefully_1 (iconv_t cd,  | 
218  |  |                    const char **inbuf, size_t *inbytesleft,  | 
219  |  |                    char **outbuf, size_t *outbytesleft,  | 
220  |  |                    bool *incremented)  | 
221  | 0  | { | 
222  | 0  |   const char *inptr_before = *inbuf;  | 
223  | 0  |   const char *inptr = inptr_before;  | 
224  | 0  |   const char *inptr_end = inptr_before + *inbytesleft;  | 
225  | 0  |   char *outptr = *outbuf;  | 
226  | 0  |   size_t outsize = *outbytesleft;  | 
227  | 0  |   size_t res = (size_t)(-1);  | 
228  | 0  |   size_t insize;  | 
229  |  | 
  | 
230  | 0  |   for (insize = 1; inptr_before + insize <= inptr_end; insize++)  | 
231  | 0  |     { | 
232  | 0  |       inptr = inptr_before;  | 
233  | 0  |       res = iconv (cd,  | 
234  | 0  |                    (ICONV_CONST char **) &inptr, &insize,  | 
235  | 0  |                    &outptr, &outsize);  | 
236  | 0  |       if (!(res == (size_t)(-1) && errno == EINVAL))  | 
237  | 0  |         break;  | 
238  |  |       /* iconv can eat up a shift sequence but give EINVAL while attempting  | 
239  |  |          to convert the first character.  E.g. libiconv does this.  */  | 
240  | 0  |       if (inptr > inptr_before)  | 
241  | 0  |         { | 
242  | 0  |           res = 0;  | 
243  | 0  |           break;  | 
244  | 0  |         }  | 
245  | 0  |     }  | 
246  |  | 
  | 
247  | 0  |   *inbuf = inptr;  | 
248  | 0  |   *inbytesleft = inptr_end - inptr;  | 
249  |  | # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)  | 
250  |  |   /* Irix iconv() inserts a NUL byte if it cannot convert.  | 
251  |  |      NetBSD iconv() inserts a question mark if it cannot convert.  | 
252  |  |      Only GNU libiconv and GNU libc are known to prefer to fail rather  | 
253  |  |      than doing a lossy conversion.  */  | 
254  |  |   if (res != (size_t)(-1) && res > 0)  | 
255  |  |     { | 
256  |  |       /* iconv() has already incremented INPTR.  We cannot go back to a  | 
257  |  |          previous INPTR, otherwise the state inside CD would become invalid,  | 
258  |  |          if FROM_CODESET is a stateful encoding.  So, tell the caller that  | 
259  |  |          *INBUF has already been incremented.  */  | 
260  |  |       *incremented = (inptr > inptr_before);  | 
261  |  |       errno = EILSEQ;  | 
262  |  |       return (size_t)(-1);  | 
263  |  |     }  | 
264  |  | # endif  | 
265  |  | 
  | 
266  | 0  |   if (res != (size_t)(-1))  | 
267  | 0  |     { | 
268  | 0  |       *outbuf = outptr;  | 
269  | 0  |       *outbytesleft = outsize;  | 
270  | 0  |     }  | 
271  | 0  |   *incremented = false;  | 
272  | 0  |   return res;  | 
273  | 0  | }  | 
274  |  |  | 
275  |  | /* utf8conv_carefully is like iconv, except that  | 
276  |  |      - it converts from UTF-8 to UTF-8,  | 
277  |  |      - it stops as soon as it encounters a conversion error, and it returns  | 
278  |  |        in *INCREMENTED a boolean telling whether it has incremented the input  | 
279  |  |        pointers past the error location,  | 
280  |  |      - if one_character_only is true, it stops after converting one  | 
281  |  |        character.  */  | 
282  |  | static size_t  | 
283  |  | utf8conv_carefully (bool one_character_only,  | 
284  |  |                     const char **inbuf, size_t *inbytesleft,  | 
285  |  |                     char **outbuf, size_t *outbytesleft,  | 
286  |  |                     bool *incremented)  | 
287  | 0  | { | 
288  | 0  |   const char *inptr = *inbuf;  | 
289  | 0  |   size_t insize = *inbytesleft;  | 
290  | 0  |   char *outptr = *outbuf;  | 
291  | 0  |   size_t outsize = *outbytesleft;  | 
292  | 0  |   size_t res;  | 
293  |  | 
  | 
294  | 0  |   res = 0;  | 
295  | 0  |   do  | 
296  | 0  |     { | 
297  | 0  |       ucs4_t uc;  | 
298  | 0  |       int n;  | 
299  | 0  |       int m;  | 
300  |  | 
  | 
301  | 0  |       n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize);  | 
302  | 0  |       if (n < 0)  | 
303  | 0  |         { | 
304  | 0  |           errno = (n == -2 ? EINVAL : EILSEQ);  | 
305  | 0  |           n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize);  | 
306  | 0  |           inptr += n;  | 
307  | 0  |           insize -= n;  | 
308  | 0  |           res = (size_t)(-1);  | 
309  | 0  |           *incremented = true;  | 
310  | 0  |           break;  | 
311  | 0  |         }  | 
312  | 0  |       if (outsize == 0)  | 
313  | 0  |         { | 
314  | 0  |           errno = E2BIG;  | 
315  | 0  |           res = (size_t)(-1);  | 
316  | 0  |           *incremented = false;  | 
317  | 0  |           break;  | 
318  | 0  |         }  | 
319  | 0  |       m = u8_uctomb ((uint8_t *) outptr, uc, outsize);  | 
320  | 0  |       if (m == -2)  | 
321  | 0  |         { | 
322  | 0  |           errno = E2BIG;  | 
323  | 0  |           res = (size_t)(-1);  | 
324  | 0  |           *incremented = false;  | 
325  | 0  |           break;  | 
326  | 0  |         }  | 
327  | 0  |       inptr += n;  | 
328  | 0  |       insize -= n;  | 
329  | 0  |       if (m == -1)  | 
330  | 0  |         { | 
331  | 0  |           errno = EILSEQ;  | 
332  | 0  |           res = (size_t)(-1);  | 
333  | 0  |           *incremented = true;  | 
334  | 0  |           break;  | 
335  | 0  |         }  | 
336  | 0  |       outptr += m;  | 
337  | 0  |       outsize -= m;  | 
338  | 0  |     }  | 
339  | 0  |   while (!one_character_only && insize > 0);  | 
340  |  |  | 
341  | 0  |   *inbuf = inptr;  | 
342  | 0  |   *inbytesleft = insize;  | 
343  | 0  |   *outbuf = outptr;  | 
344  | 0  |   *outbytesleft = outsize;  | 
345  | 0  |   return res;  | 
346  | 0  | }  | 
347  |  |  | 
348  |  | static int  | 
349  |  | mem_cd_iconveh_internal (const char *src, size_t srclen,  | 
350  |  |                          iconv_t cd, iconv_t cd1, iconv_t cd2,  | 
351  |  |                          enum iconv_ilseq_handler handler,  | 
352  |  |                          size_t extra_alloc,  | 
353  |  |                          size_t *offsets,  | 
354  |  |                          char **resultp, size_t *lengthp)  | 
355  | 0  | { | 
356  |  |   /* When a conversion error occurs, we cannot start using CD1 and CD2 at  | 
357  |  |      this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.  | 
358  |  |      Instead, we have to start afresh from the beginning of SRC.  */  | 
359  |  |   /* Use a temporary buffer, so that for small strings, a single malloc()  | 
360  |  |      call will be sufficient.  */  | 
361  | 0  | # define tmpbufsize 4096  | 
362  |  |   /* The alignment is needed when converting e.g. to glibc's WCHAR_T or  | 
363  |  |      libiconv's UCS-4-INTERNAL encoding.  */  | 
364  | 0  |   union { unsigned int align; char buf[tmpbufsize]; } tmp; | 
365  | 0  | # define tmpbuf tmp.buf  | 
366  |  | 
  | 
367  | 0  |   char *initial_result;  | 
368  | 0  |   char *result;  | 
369  | 0  |   size_t allocated;  | 
370  | 0  |   size_t length;  | 
371  | 0  |   size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */  | 
372  |  | 
  | 
373  | 0  |   if (*resultp != NULL && *lengthp >= sizeof (tmpbuf))  | 
374  | 0  |     { | 
375  | 0  |       initial_result = *resultp;  | 
376  | 0  |       allocated = *lengthp;  | 
377  | 0  |     }  | 
378  | 0  |   else  | 
379  | 0  |     { | 
380  | 0  |       initial_result = tmpbuf;  | 
381  | 0  |       allocated = sizeof (tmpbuf);  | 
382  | 0  |     }  | 
383  | 0  |   result = initial_result;  | 
384  |  |  | 
385  |  |   /* Test whether a direct conversion is possible at all.  */  | 
386  | 0  |   if (cd == (iconv_t)(-1))  | 
387  | 0  |     goto indirectly;  | 
388  |  |  | 
389  | 0  |   if (offsets != NULL)  | 
390  | 0  |     { | 
391  | 0  |       size_t i;  | 
392  |  | 
  | 
393  | 0  |       for (i = 0; i < srclen; i++)  | 
394  | 0  |         offsets[i] = (size_t)(-1);  | 
395  |  | 
  | 
396  | 0  |       last_length = (size_t)(-1);  | 
397  | 0  |     }  | 
398  | 0  |   length = 0;  | 
399  |  |  | 
400  |  |   /* First, try a direct conversion, and see whether a conversion error  | 
401  |  |      occurs at all.  */  | 
402  | 0  |   { | 
403  | 0  |     const char *inptr = src;  | 
404  | 0  |     size_t insize = srclen;  | 
405  |  |  | 
406  |  |     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */  | 
407  | 0  | # if defined _LIBICONV_VERSION \  | 
408  | 0  |      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \  | 
409  | 0  |           || defined __sun)  | 
410  |  |     /* Set to the initial state.  */  | 
411  | 0  |     iconv (cd, NULL, NULL, NULL, NULL);  | 
412  | 0  | # endif  | 
413  |  | 
  | 
414  | 0  |     while (insize > 0)  | 
415  | 0  |       { | 
416  | 0  |         char *outptr = result + length;  | 
417  | 0  |         size_t outsize = allocated - extra_alloc - length;  | 
418  | 0  |         bool incremented;  | 
419  | 0  |         size_t res;  | 
420  | 0  |         bool grow;  | 
421  |  | 
  | 
422  | 0  |         if (offsets != NULL)  | 
423  | 0  |           { | 
424  | 0  |             if (length != last_length) /* ensure that offset[] be increasing */  | 
425  | 0  |               { | 
426  | 0  |                 offsets[inptr - src] = length;  | 
427  | 0  |                 last_length = length;  | 
428  | 0  |               }  | 
429  | 0  |             res = iconv_carefully_1 (cd,  | 
430  | 0  |                                      &inptr, &insize,  | 
431  | 0  |                                      &outptr, &outsize,  | 
432  | 0  |                                      &incremented);  | 
433  | 0  |           }  | 
434  | 0  |         else  | 
435  |  |           /* Use iconv_carefully instead of iconv here, because:  | 
436  |  |              - If TO_CODESET is UTF-8, we can do the error handling in this  | 
437  |  |                loop, no need for a second loop,  | 
438  |  |              - With iconv() implementations other than GNU libiconv and GNU  | 
439  |  |                libc, if we use iconv() in a big swoop, checking for an E2BIG  | 
440  |  |                return, we lose the number of irreversible conversions.  */  | 
441  | 0  |           res = iconv_carefully (cd,  | 
442  | 0  |                                  &inptr, &insize,  | 
443  | 0  |                                  &outptr, &outsize,  | 
444  | 0  |                                  &incremented);  | 
445  |  | 
  | 
446  | 0  |         length = outptr - result;  | 
447  | 0  |         grow = (length + extra_alloc > allocated / 2);  | 
448  | 0  |         if (res == (size_t)(-1))  | 
449  | 0  |           { | 
450  | 0  |             if (errno == E2BIG)  | 
451  | 0  |               grow = true;  | 
452  | 0  |             else if (errno == EINVAL)  | 
453  | 0  |               break;  | 
454  | 0  |             else if (errno == EILSEQ && handler != iconveh_error)  | 
455  | 0  |               { | 
456  | 0  |                 if (cd2 == (iconv_t)(-1))  | 
457  | 0  |                   { | 
458  |  |                     /* TO_CODESET is UTF-8.  */  | 
459  |  |                     /* Error handling can produce up to 1 or 3 bytes of  | 
460  |  |                        output.  */  | 
461  | 0  |                     size_t extra_need =  | 
462  | 0  |                       (handler == iconveh_replacement_character ? 3 : 1);  | 
463  | 0  |                     if (length + extra_need + extra_alloc > allocated)  | 
464  | 0  |                       { | 
465  | 0  |                         char *memory;  | 
466  |  | 
  | 
467  | 0  |                         allocated = 2 * allocated;  | 
468  | 0  |                         if (length + extra_need + extra_alloc > allocated)  | 
469  | 0  |                           allocated = 2 * allocated;  | 
470  | 0  |                         if (length + extra_need + extra_alloc > allocated)  | 
471  | 0  |                           abort ();  | 
472  | 0  |                         if (result == initial_result)  | 
473  | 0  |                           memory = (char *) malloc (allocated);  | 
474  | 0  |                         else  | 
475  | 0  |                           memory = (char *) realloc (result, allocated);  | 
476  | 0  |                         if (memory == NULL)  | 
477  | 0  |                           { | 
478  | 0  |                             if (result != initial_result)  | 
479  | 0  |                               free (result);  | 
480  | 0  |                             errno = ENOMEM;  | 
481  | 0  |                             return -1;  | 
482  | 0  |                           }  | 
483  | 0  |                         if (result == initial_result)  | 
484  | 0  |                           memcpy (memory, initial_result, length);  | 
485  | 0  |                         result = memory;  | 
486  | 0  |                         grow = false;  | 
487  | 0  |                       }  | 
488  |  |                     /* The input is invalid in FROM_CODESET.  Eat up one byte  | 
489  |  |                        and emit a replacement character or a question mark.  */  | 
490  | 0  |                     if (!incremented)  | 
491  | 0  |                       { | 
492  | 0  |                         if (insize == 0)  | 
493  | 0  |                           abort ();  | 
494  | 0  |                         inptr++;  | 
495  | 0  |                         insize--;  | 
496  | 0  |                       }  | 
497  | 0  |                     if (handler == iconveh_replacement_character)  | 
498  | 0  |                       { | 
499  |  |                         /* U+FFFD in UTF-8 encoding.  */  | 
500  | 0  |                         result[length+0] = '\357';  | 
501  | 0  |                         result[length+1] = '\277';  | 
502  | 0  |                         result[length+2] = '\275';  | 
503  | 0  |                         length += 3;  | 
504  | 0  |                       }  | 
505  | 0  |                     else  | 
506  | 0  |                       { | 
507  | 0  |                         result[length] = '?';  | 
508  | 0  |                         length++;  | 
509  | 0  |                       }  | 
510  | 0  |                   }  | 
511  | 0  |                 else  | 
512  | 0  |                   goto indirectly;  | 
513  | 0  |               }  | 
514  | 0  |             else  | 
515  | 0  |               { | 
516  | 0  |                 if (result != initial_result)  | 
517  | 0  |                   free (result);  | 
518  | 0  |                 return -1;  | 
519  | 0  |               }  | 
520  | 0  |           }  | 
521  | 0  |         if (insize == 0)  | 
522  | 0  |           break;  | 
523  | 0  |         if (grow)  | 
524  | 0  |           { | 
525  | 0  |             char *memory;  | 
526  |  | 
  | 
527  | 0  |             allocated = 2 * allocated;  | 
528  | 0  |             if (result == initial_result)  | 
529  | 0  |               memory = (char *) malloc (allocated);  | 
530  | 0  |             else  | 
531  | 0  |               memory = (char *) realloc (result, allocated);  | 
532  | 0  |             if (memory == NULL)  | 
533  | 0  |               { | 
534  | 0  |                 if (result != initial_result)  | 
535  | 0  |                   free (result);  | 
536  | 0  |                 errno = ENOMEM;  | 
537  | 0  |                 return -1;  | 
538  | 0  |               }  | 
539  | 0  |             if (result == initial_result)  | 
540  | 0  |               memcpy (memory, initial_result, length);  | 
541  | 0  |             result = memory;  | 
542  | 0  |           }  | 
543  | 0  |       }  | 
544  | 0  |   }  | 
545  |  |  | 
546  |  |   /* Now get the conversion state back to the initial state.  | 
547  |  |      But avoid glibc-2.1 bug and Solaris 2.7 bug.  */  | 
548  | 0  | #if defined _LIBICONV_VERSION \  | 
549  | 0  |     || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \  | 
550  | 0  |          || defined __sun)  | 
551  | 0  |   for (;;)  | 
552  | 0  |     { | 
553  | 0  |       char *outptr = result + length;  | 
554  | 0  |       size_t outsize = allocated - extra_alloc - length;  | 
555  | 0  |       size_t res;  | 
556  |  | 
  | 
557  | 0  |       res = iconv (cd, NULL, NULL, &outptr, &outsize);  | 
558  | 0  |       length = outptr - result;  | 
559  | 0  |       if (res == (size_t)(-1))  | 
560  | 0  |         { | 
561  | 0  |           if (errno == E2BIG)  | 
562  | 0  |             { | 
563  | 0  |               char *memory;  | 
564  |  | 
  | 
565  | 0  |               allocated = 2 * allocated;  | 
566  | 0  |               if (result == initial_result)  | 
567  | 0  |                 memory = (char *) malloc (allocated);  | 
568  | 0  |               else  | 
569  | 0  |                 memory = (char *) realloc (result, allocated);  | 
570  | 0  |               if (memory == NULL)  | 
571  | 0  |                 { | 
572  | 0  |                   if (result != initial_result)  | 
573  | 0  |                     free (result);  | 
574  | 0  |                   errno = ENOMEM;  | 
575  | 0  |                   return -1;  | 
576  | 0  |                 }  | 
577  | 0  |               if (result == initial_result)  | 
578  | 0  |                 memcpy (memory, initial_result, length);  | 
579  | 0  |               result = memory;  | 
580  | 0  |             }  | 
581  | 0  |           else  | 
582  | 0  |             { | 
583  | 0  |               if (result != initial_result)  | 
584  | 0  |                 free (result);  | 
585  | 0  |               return -1;  | 
586  | 0  |             }  | 
587  | 0  |         }  | 
588  | 0  |       else  | 
589  | 0  |         break;  | 
590  | 0  |     }  | 
591  | 0  | #endif  | 
592  |  |  | 
593  |  |   /* The direct conversion succeeded.  */  | 
594  | 0  |   goto done;  | 
595  |  |  | 
596  | 0  |  indirectly:  | 
597  |  |   /* The direct conversion failed.  | 
598  |  |      Use a conversion through UTF-8.  */  | 
599  | 0  |   if (offsets != NULL)  | 
600  | 0  |     { | 
601  | 0  |       size_t i;  | 
602  |  | 
  | 
603  | 0  |       for (i = 0; i < srclen; i++)  | 
604  | 0  |         offsets[i] = (size_t)(-1);  | 
605  |  | 
  | 
606  | 0  |       last_length = (size_t)(-1);  | 
607  | 0  |     }  | 
608  | 0  |   length = 0;  | 
609  | 0  |   { | 
610  | 0  |     const bool slowly = (offsets != NULL || handler == iconveh_error);  | 
611  | 0  | # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */  | 
612  | 0  |     char utf8buf[utf8bufsize + 3];  | 
613  | 0  |     size_t utf8len = 0;  | 
614  | 0  |     const char *in1ptr = src;  | 
615  | 0  |     size_t in1size = srclen;  | 
616  | 0  |     bool do_final_flush1 = true;  | 
617  | 0  |     bool do_final_flush2 = true;  | 
618  |  |  | 
619  |  |     /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug.  */  | 
620  | 0  | # if defined _LIBICONV_VERSION \  | 
621  | 0  |      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \  | 
622  | 0  |           || defined __sun)  | 
623  |  |     /* Set to the initial state.  */  | 
624  | 0  |     if (cd1 != (iconv_t)(-1))  | 
625  | 0  |       iconv (cd1, NULL, NULL, NULL, NULL);  | 
626  | 0  |     if (cd2 != (iconv_t)(-1))  | 
627  | 0  |       iconv (cd2, NULL, NULL, NULL, NULL);  | 
628  | 0  | # endif  | 
629  |  | 
  | 
630  | 0  |     while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)  | 
631  | 0  |       { | 
632  | 0  |         char *out1ptr = utf8buf + utf8len;  | 
633  | 0  |         size_t out1size = utf8bufsize - utf8len;  | 
634  | 0  |         bool incremented1;  | 
635  | 0  |         size_t res1;  | 
636  | 0  |         int errno1;  | 
637  |  |  | 
638  |  |         /* Conversion step 1: from FROM_CODESET to UTF-8.  */  | 
639  | 0  |         if (in1size > 0)  | 
640  | 0  |           { | 
641  | 0  |             if (offsets != NULL  | 
642  | 0  |                 && length != last_length) /* ensure that offset[] be increasing */  | 
643  | 0  |               { | 
644  | 0  |                 offsets[in1ptr - src] = length;  | 
645  | 0  |                 last_length = length;  | 
646  | 0  |               }  | 
647  | 0  |             if (cd1 != (iconv_t)(-1))  | 
648  | 0  |               { | 
649  | 0  |                 if (slowly)  | 
650  | 0  |                   res1 = iconv_carefully_1 (cd1,  | 
651  | 0  |                                             &in1ptr, &in1size,  | 
652  | 0  |                                             &out1ptr, &out1size,  | 
653  | 0  |                                             &incremented1);  | 
654  | 0  |                 else  | 
655  | 0  |                   res1 = iconv_carefully (cd1,  | 
656  | 0  |                                           &in1ptr, &in1size,  | 
657  | 0  |                                           &out1ptr, &out1size,  | 
658  | 0  |                                           &incremented1);  | 
659  | 0  |               }  | 
660  | 0  |             else  | 
661  | 0  |               { | 
662  |  |                 /* FROM_CODESET is UTF-8.  */  | 
663  | 0  |                 res1 = utf8conv_carefully (slowly,  | 
664  | 0  |                                            &in1ptr, &in1size,  | 
665  | 0  |                                            &out1ptr, &out1size,  | 
666  | 0  |                                            &incremented1);  | 
667  | 0  |               }  | 
668  | 0  |           }  | 
669  | 0  |         else if (do_final_flush1)  | 
670  | 0  |           { | 
671  |  |             /* Now get the conversion state of CD1 back to the initial state.  | 
672  |  |                But avoid glibc-2.1 bug and Solaris 2.7 bug.  */  | 
673  | 0  | # if defined _LIBICONV_VERSION \  | 
674  | 0  |      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \  | 
675  | 0  |           || defined __sun)  | 
676  | 0  |             if (cd1 != (iconv_t)(-1))  | 
677  | 0  |               res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);  | 
678  | 0  |             else  | 
679  | 0  | # endif  | 
680  | 0  |               res1 = 0;  | 
681  | 0  |             do_final_flush1 = false;  | 
682  | 0  |             incremented1 = true;  | 
683  | 0  |           }  | 
684  | 0  |         else  | 
685  | 0  |           { | 
686  | 0  |             res1 = 0;  | 
687  | 0  |             incremented1 = true;  | 
688  | 0  |           }  | 
689  | 0  |         if (res1 == (size_t)(-1)  | 
690  | 0  |             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))  | 
691  | 0  |           { | 
692  | 0  |             if (result != initial_result)  | 
693  | 0  |               free (result);  | 
694  | 0  |             return -1;  | 
695  | 0  |           }  | 
696  | 0  |         if (res1 == (size_t)(-1)  | 
697  | 0  |             && errno == EILSEQ && handler != iconveh_error)  | 
698  | 0  |           { | 
699  |  |             /* The input is invalid in FROM_CODESET.  Eat up one byte and  | 
700  |  |                emit a U+FFFD character or a question mark.  Room for this  | 
701  |  |                character was allocated at the end of utf8buf.  */  | 
702  | 0  |             if (!incremented1)  | 
703  | 0  |               { | 
704  | 0  |                 if (in1size == 0)  | 
705  | 0  |                   abort ();  | 
706  | 0  |                 in1ptr++;  | 
707  | 0  |                 in1size--;  | 
708  | 0  |               }  | 
709  | 0  |             if (handler == iconveh_replacement_character)  | 
710  | 0  |               { | 
711  |  |                 /* U+FFFD in UTF-8 encoding.  */  | 
712  | 0  |                 out1ptr[0] = '\357';  | 
713  | 0  |                 out1ptr[1] = '\277';  | 
714  | 0  |                 out1ptr[2] = '\275';  | 
715  | 0  |                 out1ptr += 3;  | 
716  | 0  |               }  | 
717  | 0  |             else  | 
718  | 0  |               *out1ptr++ = '?';  | 
719  | 0  |             res1 = 0;  | 
720  | 0  |           }  | 
721  | 0  |         errno1 = errno;  | 
722  | 0  |         utf8len = out1ptr - utf8buf;  | 
723  |  | 
  | 
724  | 0  |         if (offsets != NULL  | 
725  | 0  |             || in1size == 0  | 
726  | 0  |             || utf8len > utf8bufsize / 2  | 
727  | 0  |             || (res1 == (size_t)(-1) && errno1 == E2BIG))  | 
728  | 0  |           { | 
729  |  |             /* Conversion step 2: from UTF-8 to TO_CODESET.  */  | 
730  | 0  |             const char *in2ptr = utf8buf;  | 
731  | 0  |             size_t in2size = utf8len;  | 
732  |  | 
  | 
733  | 0  |             while (in2size > 0  | 
734  | 0  |                    || (in1size == 0 && !do_final_flush1 && do_final_flush2))  | 
735  | 0  |               { | 
736  | 0  |                 char *out2ptr = result + length;  | 
737  | 0  |                 size_t out2size = allocated - extra_alloc - length;  | 
738  | 0  |                 bool incremented2;  | 
739  | 0  |                 size_t res2;  | 
740  | 0  |                 bool grow;  | 
741  |  | 
  | 
742  | 0  |                 if (in2size > 0)  | 
743  | 0  |                   { | 
744  | 0  |                     if (cd2 != (iconv_t)(-1))  | 
745  | 0  |                       res2 = iconv_carefully (cd2,  | 
746  | 0  |                                               &in2ptr, &in2size,  | 
747  | 0  |                                               &out2ptr, &out2size,  | 
748  | 0  |                                               &incremented2);  | 
749  | 0  |                     else  | 
750  |  |                       /* TO_CODESET is UTF-8.  */  | 
751  | 0  |                       res2 = utf8conv_carefully (false,  | 
752  | 0  |                                                  &in2ptr, &in2size,  | 
753  | 0  |                                                  &out2ptr, &out2size,  | 
754  | 0  |                                                  &incremented2);  | 
755  | 0  |                   }  | 
756  | 0  |                 else /* in1size == 0 && !do_final_flush1  | 
757  |  |                         && in2size == 0 && do_final_flush2 */  | 
758  | 0  |                   { | 
759  |  |                     /* Now get the conversion state of CD1 back to the initial  | 
760  |  |                        state.  But avoid glibc-2.1 bug and Solaris 2.7 bug.  */  | 
761  | 0  | # if defined _LIBICONV_VERSION \  | 
762  | 0  |      || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \  | 
763  | 0  |           || defined __sun)  | 
764  | 0  |                     if (cd2 != (iconv_t)(-1))  | 
765  | 0  |                       res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);  | 
766  | 0  |                     else  | 
767  | 0  | # endif  | 
768  | 0  |                       res2 = 0;  | 
769  | 0  |                     do_final_flush2 = false;  | 
770  | 0  |                     incremented2 = true;  | 
771  | 0  |                   }  | 
772  |  | 
  | 
773  | 0  |                 length = out2ptr - result;  | 
774  | 0  |                 grow = (length + extra_alloc > allocated / 2);  | 
775  | 0  |                 if (res2 == (size_t)(-1))  | 
776  | 0  |                   { | 
777  | 0  |                     if (errno == E2BIG)  | 
778  | 0  |                       grow = true;  | 
779  | 0  |                     else if (errno == EINVAL)  | 
780  | 0  |                       break;  | 
781  | 0  |                     else if (errno == EILSEQ && handler != iconveh_error)  | 
782  | 0  |                       { | 
783  |  |                         /* Error handling can produce up to 10 bytes of UTF-8  | 
784  |  |                            output.  But TO_CODESET may be UCS-2, UTF-16 or  | 
785  |  |                            UCS-4, so use CD2 here as well.  */  | 
786  | 0  |                         char scratchbuf[10];  | 
787  | 0  |                         size_t scratchlen;  | 
788  | 0  |                         ucs4_t uc;  | 
789  | 0  |                         const char *inptr;  | 
790  | 0  |                         size_t insize;  | 
791  | 0  |                         size_t res;  | 
792  |  | 
  | 
793  | 0  |                         if (incremented2)  | 
794  | 0  |                           { | 
795  | 0  |                             if (u8_prev (&uc, (const uint8_t *) in2ptr,  | 
796  | 0  |                                          (const uint8_t *) utf8buf)  | 
797  | 0  |                                 == NULL)  | 
798  | 0  |                               abort ();  | 
799  | 0  |                           }  | 
800  | 0  |                         else  | 
801  | 0  |                           { | 
802  | 0  |                             int n;  | 
803  | 0  |                             if (in2size == 0)  | 
804  | 0  |                               abort ();  | 
805  | 0  |                             n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr,  | 
806  | 0  |                                                   in2size);  | 
807  | 0  |                             in2ptr += n;  | 
808  | 0  |                             in2size -= n;  | 
809  | 0  |                           }  | 
810  |  |  | 
811  | 0  |                         if (handler == iconveh_escape_sequence)  | 
812  | 0  |                           { | 
813  | 0  |                             static char hex[16] = "0123456789ABCDEF";  | 
814  | 0  |                             scratchlen = 0;  | 
815  | 0  |                             scratchbuf[scratchlen++] = '\\';  | 
816  | 0  |                             if (uc < 0x10000)  | 
817  | 0  |                               scratchbuf[scratchlen++] = 'u';  | 
818  | 0  |                             else  | 
819  | 0  |                               { | 
820  | 0  |                                 scratchbuf[scratchlen++] = 'U';  | 
821  | 0  |                                 scratchbuf[scratchlen++] = hex[(uc>>28) & 15];  | 
822  | 0  |                                 scratchbuf[scratchlen++] = hex[(uc>>24) & 15];  | 
823  | 0  |                                 scratchbuf[scratchlen++] = hex[(uc>>20) & 15];  | 
824  | 0  |                                 scratchbuf[scratchlen++] = hex[(uc>>16) & 15];  | 
825  | 0  |                               }  | 
826  | 0  |                             scratchbuf[scratchlen++] = hex[(uc>>12) & 15];  | 
827  | 0  |                             scratchbuf[scratchlen++] = hex[(uc>>8) & 15];  | 
828  | 0  |                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];  | 
829  | 0  |                             scratchbuf[scratchlen++] = hex[uc & 15];  | 
830  | 0  |                           }  | 
831  | 0  |                         else if (handler == iconveh_replacement_character)  | 
832  | 0  |                           { | 
833  |  |                             /* U+FFFD in UTF-8 encoding.  */  | 
834  | 0  |                             scratchbuf[0] = '\357';  | 
835  | 0  |                             scratchbuf[1] = '\277';  | 
836  | 0  |                             scratchbuf[2] = '\275';  | 
837  | 0  |                             scratchlen = 3;  | 
838  | 0  |                           }  | 
839  | 0  |                         else  | 
840  | 0  |                           { | 
841  | 0  |                             scratchbuf[0] = '?';  | 
842  | 0  |                             scratchlen = 1;  | 
843  | 0  |                           }  | 
844  |  | 
  | 
845  | 0  |                         inptr = scratchbuf;  | 
846  | 0  |                         insize = scratchlen;  | 
847  | 0  |                         if (cd2 != (iconv_t)(-1))  | 
848  | 0  |                           { | 
849  | 0  |                             char *out2ptr_try = out2ptr;  | 
850  | 0  |                             size_t out2size_try = out2size;  | 
851  | 0  |                             res = iconv (cd2,  | 
852  | 0  |                                          (ICONV_CONST char **) &inptr, &insize,  | 
853  | 0  |                                          &out2ptr_try, &out2size_try);  | 
854  | 0  |                             if (handler == iconveh_replacement_character  | 
855  | 0  |                                 && (res == (size_t)(-1)  | 
856  | 0  |                                     ? errno == EILSEQ  | 
857  |  |                                     /* FreeBSD iconv(), NetBSD iconv(), and  | 
858  |  |                                        Solaris 11 iconv() insert a '?' if they  | 
859  |  |                                        cannot convert.  This is what we want.  | 
860  |  |                                        But IRIX iconv() inserts a NUL byte if it  | 
861  |  |                                        cannot convert.  | 
862  |  |                                        And musl libc iconv() inserts a '*' if it  | 
863  |  |                                        cannot convert.  */  | 
864  | 0  |                                     : (res > 0  | 
865  | 0  |                                        && !(out2ptr_try - out2ptr == 1  | 
866  | 0  |                                             && *out2ptr == '?'))))  | 
867  | 0  |                               { | 
868  |  |                                 /* The iconv() call failed.  | 
869  |  |                                    U+FFFD can't be converted to TO_CODESET.  | 
870  |  |                                    Use '?' instead.  */  | 
871  | 0  |                                 scratchbuf[0] = '?';  | 
872  | 0  |                                 scratchlen = 1;  | 
873  | 0  |                                 inptr = scratchbuf;  | 
874  | 0  |                                 insize = scratchlen;  | 
875  | 0  |                                 res = iconv (cd2,  | 
876  | 0  |                                              (ICONV_CONST char **) &inptr, &insize,  | 
877  | 0  |                                              &out2ptr, &out2size);  | 
878  | 0  |                               }  | 
879  | 0  |                             else  | 
880  | 0  |                               { | 
881  |  |                                 /* Accept the results of the iconv() call.  */  | 
882  | 0  |                                 out2ptr = out2ptr_try;  | 
883  | 0  |                                 out2size = out2size_try;  | 
884  | 0  |                                 res = 0;  | 
885  | 0  |                               }  | 
886  | 0  |                           }  | 
887  | 0  |                         else  | 
888  | 0  |                           { | 
889  |  |                             /* TO_CODESET is UTF-8.  */  | 
890  | 0  |                             if (out2size >= insize)  | 
891  | 0  |                               { | 
892  | 0  |                                 memcpy (out2ptr, inptr, insize);  | 
893  | 0  |                                 out2ptr += insize;  | 
894  | 0  |                                 out2size -= insize;  | 
895  | 0  |                                 inptr += insize;  | 
896  | 0  |                                 insize = 0;  | 
897  | 0  |                                 res = 0;  | 
898  | 0  |                               }  | 
899  | 0  |                             else  | 
900  | 0  |                               { | 
901  | 0  |                                 errno = E2BIG;  | 
902  | 0  |                                 res = (size_t)(-1);  | 
903  | 0  |                               }  | 
904  | 0  |                           }  | 
905  | 0  |                         length = out2ptr - result;  | 
906  | 0  |                         if (res == (size_t)(-1) && errno == E2BIG)  | 
907  | 0  |                           { | 
908  | 0  |                             char *memory;  | 
909  |  | 
  | 
910  | 0  |                             allocated = 2 * allocated;  | 
911  | 0  |                             if (length + 1 + extra_alloc > allocated)  | 
912  | 0  |                               abort ();  | 
913  | 0  |                             if (result == initial_result)  | 
914  | 0  |                               memory = (char *) malloc (allocated);  | 
915  | 0  |                             else  | 
916  | 0  |                               memory = (char *) realloc (result, allocated);  | 
917  | 0  |                             if (memory == NULL)  | 
918  | 0  |                               { | 
919  | 0  |                                 if (result != initial_result)  | 
920  | 0  |                                   free (result);  | 
921  | 0  |                                 errno = ENOMEM;  | 
922  | 0  |                                 return -1;  | 
923  | 0  |                               }  | 
924  | 0  |                             if (result == initial_result)  | 
925  | 0  |                               memcpy (memory, initial_result, length);  | 
926  | 0  |                             result = memory;  | 
927  | 0  |                             grow = false;  | 
928  |  | 
  | 
929  | 0  |                             out2ptr = result + length;  | 
930  | 0  |                             out2size = allocated - extra_alloc - length;  | 
931  | 0  |                             if (cd2 != (iconv_t)(-1))  | 
932  | 0  |                               res = iconv (cd2,  | 
933  | 0  |                                            (ICONV_CONST char **) &inptr,  | 
934  | 0  |                                            &insize,  | 
935  | 0  |                                            &out2ptr, &out2size);  | 
936  | 0  |                             else  | 
937  | 0  |                               { | 
938  |  |                                 /* TO_CODESET is UTF-8.  */  | 
939  | 0  |                                 if (!(out2size >= insize))  | 
940  | 0  |                                   abort ();  | 
941  | 0  |                                 memcpy (out2ptr, inptr, insize);  | 
942  | 0  |                                 out2ptr += insize;  | 
943  | 0  |                                 out2size -= insize;  | 
944  | 0  |                                 inptr += insize;  | 
945  | 0  |                                 insize = 0;  | 
946  | 0  |                                 res = 0;  | 
947  | 0  |                               }  | 
948  | 0  |                             length = out2ptr - result;  | 
949  | 0  |                           }  | 
950  |  | # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)  | 
951  |  |                         /* IRIX iconv() inserts a NUL byte if it cannot convert.  | 
952  |  |                            FreeBSD iconv(), NetBSD iconv(), and Solaris 11  | 
953  |  |                            iconv() insert a '?' if they cannot convert.  | 
954  |  |                            musl libc iconv() inserts a '*' if it cannot convert.  | 
955  |  |                            Only GNU libiconv and GNU libc are known to prefer  | 
956  |  |                            to fail rather than doing a lossy conversion.  */  | 
957  |  |                         if (res != (size_t)(-1) && res > 0)  | 
958  |  |                           { | 
959  |  |                             errno = EILSEQ;  | 
960  |  |                             res = (size_t)(-1);  | 
961  |  |                           }  | 
962  |  | # endif  | 
963  | 0  |                         if (res == (size_t)(-1))  | 
964  | 0  |                           { | 
965  |  |                             /* Failure converting the ASCII replacement.  */  | 
966  | 0  |                             if (result != initial_result)  | 
967  | 0  |                               free (result);  | 
968  | 0  |                             return -1;  | 
969  | 0  |                           }  | 
970  | 0  |                       }  | 
971  | 0  |                     else  | 
972  | 0  |                       { | 
973  | 0  |                         if (result != initial_result)  | 
974  | 0  |                           free (result);  | 
975  | 0  |                         return -1;  | 
976  | 0  |                       }  | 
977  | 0  |                   }  | 
978  | 0  |                 if (!(in2size > 0  | 
979  | 0  |                       || (in1size == 0 && !do_final_flush1 && do_final_flush2)))  | 
980  | 0  |                   break;  | 
981  | 0  |                 if (grow)  | 
982  | 0  |                   { | 
983  | 0  |                     char *memory;  | 
984  |  | 
  | 
985  | 0  |                     allocated = 2 * allocated;  | 
986  | 0  |                     if (result == initial_result)  | 
987  | 0  |                       memory = (char *) malloc (allocated);  | 
988  | 0  |                     else  | 
989  | 0  |                       memory = (char *) realloc (result, allocated);  | 
990  | 0  |                     if (memory == NULL)  | 
991  | 0  |                       { | 
992  | 0  |                         if (result != initial_result)  | 
993  | 0  |                           free (result);  | 
994  | 0  |                         errno = ENOMEM;  | 
995  | 0  |                         return -1;  | 
996  | 0  |                       }  | 
997  | 0  |                     if (result == initial_result)  | 
998  | 0  |                       memcpy (memory, initial_result, length);  | 
999  | 0  |                     result = memory;  | 
1000  | 0  |                   }  | 
1001  | 0  |               }  | 
1002  |  |  | 
1003  |  |             /* Move the remaining bytes to the beginning of utf8buf.  */  | 
1004  | 0  |             if (in2size > 0)  | 
1005  | 0  |               memmove (utf8buf, in2ptr, in2size);  | 
1006  | 0  |             utf8len = in2size;  | 
1007  | 0  |           }  | 
1008  |  |  | 
1009  | 0  |         if (res1 == (size_t)(-1))  | 
1010  | 0  |           { | 
1011  | 0  |             if (errno1 == EINVAL)  | 
1012  | 0  |               in1size = 0;  | 
1013  | 0  |             else if (errno1 == EILSEQ)  | 
1014  | 0  |               { | 
1015  | 0  |                 if (result != initial_result)  | 
1016  | 0  |                   free (result);  | 
1017  | 0  |                 errno = errno1;  | 
1018  | 0  |                 return -1;  | 
1019  | 0  |               }  | 
1020  | 0  |           }  | 
1021  | 0  |       }  | 
1022  | 0  | # undef utf8bufsize  | 
1023  | 0  |   }  | 
1024  |  |  | 
1025  | 0  |  done:  | 
1026  |  |   /* Now the final memory allocation.  */  | 
1027  | 0  |   if (result == tmpbuf)  | 
1028  | 0  |     { | 
1029  | 0  |       size_t memsize = length + extra_alloc;  | 
1030  |  | 
  | 
1031  | 0  |       if (*resultp != NULL && *lengthp >= memsize)  | 
1032  | 0  |         result = *resultp;  | 
1033  | 0  |       else  | 
1034  | 0  |         { | 
1035  | 0  |           char *memory;  | 
1036  |  | 
  | 
1037  | 0  |           memory = (char *) malloc (memsize > 0 ? memsize : 1);  | 
1038  | 0  |           if (memory != NULL)  | 
1039  | 0  |             result = memory;  | 
1040  | 0  |           else  | 
1041  | 0  |             { | 
1042  | 0  |               errno = ENOMEM;  | 
1043  | 0  |               return -1;  | 
1044  | 0  |             }  | 
1045  | 0  |         }  | 
1046  | 0  |       memcpy (result, tmpbuf, length);  | 
1047  | 0  |     }  | 
1048  | 0  |   else if (result != *resultp && length + extra_alloc < allocated)  | 
1049  | 0  |     { | 
1050  |  |       /* Shrink the allocated memory if possible.  */  | 
1051  | 0  |       size_t memsize = length + extra_alloc;  | 
1052  | 0  |       char *memory;  | 
1053  |  | 
  | 
1054  | 0  |       memory = (char *) realloc (result, memsize > 0 ? memsize : 1);  | 
1055  | 0  |       if (memory != NULL)  | 
1056  | 0  |         result = memory;  | 
1057  | 0  |     }  | 
1058  | 0  |   *resultp = result;  | 
1059  | 0  |   *lengthp = length;  | 
1060  | 0  |   return 0;  | 
1061  | 0  | # undef tmpbuf  | 
1062  | 0  | # undef tmpbufsize  | 
1063  | 0  | }  | 
1064  |  |  | 
1065  |  | int  | 
1066  |  | mem_cd_iconveh (const char *src, size_t srclen,  | 
1067  |  |                 const iconveh_t *cd,  | 
1068  |  |                 enum iconv_ilseq_handler handler,  | 
1069  |  |                 size_t *offsets,  | 
1070  |  |                 char **resultp, size_t *lengthp)  | 
1071  | 0  | { | 
1072  | 0  |   return mem_cd_iconveh_internal (src, srclen, cd->cd, cd->cd1, cd->cd2,  | 
1073  | 0  |                                   handler, 0, offsets, resultp, lengthp);  | 
1074  | 0  | }  | 
1075  |  |  | 
1076  |  | char *  | 
1077  |  | str_cd_iconveh (const char *src,  | 
1078  |  |                 const iconveh_t *cd,  | 
1079  |  |                 enum iconv_ilseq_handler handler)  | 
1080  | 0  | { | 
1081  |  |   /* For most encodings, a trailing NUL byte in the input will be converted  | 
1082  |  |      to a trailing NUL byte in the output.  But not for UTF-7.  So that this  | 
1083  |  |      function is usable for UTF-7, we have to exclude the NUL byte from the  | 
1084  |  |      conversion and add it by hand afterwards.  */  | 
1085  | 0  |   char *result = NULL;  | 
1086  | 0  |   size_t length = 0;  | 
1087  | 0  |   int retval = mem_cd_iconveh_internal (src, strlen (src),  | 
1088  | 0  |                                         cd->cd, cd->cd1, cd->cd2, handler, 1,  | 
1089  | 0  |                                         NULL, &result, &length);  | 
1090  |  | 
  | 
1091  | 0  |   if (retval < 0)  | 
1092  | 0  |     { | 
1093  | 0  |       free (result);  | 
1094  | 0  |       return NULL;  | 
1095  | 0  |     }  | 
1096  |  |  | 
1097  |  |   /* Add the terminating NUL byte.  */  | 
1098  | 0  |   result[length] = '\0';  | 
1099  |  | 
  | 
1100  | 0  |   return result;  | 
1101  | 0  | }  | 
1102  |  |  | 
1103  |  | #endif  | 
1104  |  |  | 
1105  |  | int  | 
1106  |  | mem_iconveh (const char *src, size_t srclen,  | 
1107  |  |              const char *from_codeset, const char *to_codeset,  | 
1108  |  |              enum iconv_ilseq_handler handler,  | 
1109  |  |              size_t *offsets,  | 
1110  |  |              char **resultp, size_t *lengthp)  | 
1111  | 0  | { | 
1112  | 0  |   if (srclen == 0)  | 
1113  | 0  |     { | 
1114  |  |       /* Nothing to convert.  */  | 
1115  | 0  |       *lengthp = 0;  | 
1116  | 0  |       return 0;  | 
1117  | 0  |     }  | 
1118  | 0  |   else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)  | 
1119  | 0  |     { | 
1120  | 0  |       char *result;  | 
1121  |  | 
  | 
1122  | 0  |       if (*resultp != NULL && *lengthp >= srclen)  | 
1123  | 0  |         result = *resultp;  | 
1124  | 0  |       else  | 
1125  | 0  |         { | 
1126  | 0  |           result = (char *) malloc (srclen);  | 
1127  | 0  |           if (result == NULL)  | 
1128  | 0  |             { | 
1129  | 0  |               errno = ENOMEM;  | 
1130  | 0  |               return -1;  | 
1131  | 0  |             }  | 
1132  | 0  |         }  | 
1133  | 0  |       memcpy (result, src, srclen);  | 
1134  | 0  |       *resultp = result;  | 
1135  | 0  |       *lengthp = srclen;  | 
1136  | 0  |       return 0;  | 
1137  | 0  |     }  | 
1138  | 0  |   else  | 
1139  | 0  |     { | 
1140  | 0  | #if HAVE_ICONV  | 
1141  | 0  |       iconveh_t cd;  | 
1142  | 0  |       char *result;  | 
1143  | 0  |       size_t length;  | 
1144  | 0  |       int retval;  | 
1145  |  | 
  | 
1146  | 0  |       if (iconveh_open (to_codeset, from_codeset, &cd) < 0)  | 
1147  | 0  |         return -1;  | 
1148  |  |  | 
1149  | 0  |       result = *resultp;  | 
1150  | 0  |       length = *lengthp;  | 
1151  | 0  |       retval = mem_cd_iconveh (src, srclen, &cd, handler, offsets,  | 
1152  | 0  |                                &result, &length);  | 
1153  |  | 
  | 
1154  | 0  |       if (retval < 0)  | 
1155  | 0  |         { | 
1156  |  |           /* Close cd, but preserve the errno from str_cd_iconv.  */  | 
1157  | 0  |           int saved_errno = errno;  | 
1158  | 0  |           iconveh_close (&cd);  | 
1159  | 0  |           errno = saved_errno;  | 
1160  | 0  |         }  | 
1161  | 0  |       else  | 
1162  | 0  |         { | 
1163  | 0  |           if (iconveh_close (&cd) < 0)  | 
1164  | 0  |             { | 
1165  | 0  |               if (result != *resultp)  | 
1166  | 0  |                 free (result);  | 
1167  | 0  |               return -1;  | 
1168  | 0  |             }  | 
1169  | 0  |           *resultp = result;  | 
1170  | 0  |           *lengthp = length;  | 
1171  | 0  |         }  | 
1172  | 0  |       return retval;  | 
1173  |  | #else  | 
1174  |  |       /* This is a different error code than if iconv_open existed but didn't  | 
1175  |  |          support from_codeset and to_codeset, so that the caller can emit  | 
1176  |  |          an error message such as  | 
1177  |  |            "iconv() is not supported. Installing GNU libiconv and  | 
1178  |  |             then reinstalling this package would fix this."  */  | 
1179  |  |       errno = ENOSYS;  | 
1180  |  |       return -1;  | 
1181  |  | #endif  | 
1182  | 0  |     }  | 
1183  | 0  | }  | 
1184  |  |  | 
1185  |  | char *  | 
1186  |  | str_iconveh (const char *src,  | 
1187  |  |              const char *from_codeset, const char *to_codeset,  | 
1188  |  |              enum iconv_ilseq_handler handler)  | 
1189  | 0  | { | 
1190  | 0  |   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)  | 
1191  | 0  |     { | 
1192  | 0  |       char *result = strdup (src);  | 
1193  |  | 
  | 
1194  | 0  |       if (result == NULL)  | 
1195  | 0  |         errno = ENOMEM;  | 
1196  | 0  |       return result;  | 
1197  | 0  |     }  | 
1198  | 0  |   else  | 
1199  | 0  |     { | 
1200  | 0  | #if HAVE_ICONV  | 
1201  | 0  |       iconveh_t cd;  | 
1202  | 0  |       char *result;  | 
1203  |  | 
  | 
1204  | 0  |       if (iconveh_open (to_codeset, from_codeset, &cd) < 0)  | 
1205  | 0  |         return NULL;  | 
1206  |  |  | 
1207  | 0  |       result = str_cd_iconveh (src, &cd, handler);  | 
1208  |  | 
  | 
1209  | 0  |       if (result == NULL)  | 
1210  | 0  |         { | 
1211  |  |           /* Close cd, but preserve the errno from str_cd_iconv.  */  | 
1212  | 0  |           int saved_errno = errno;  | 
1213  | 0  |           iconveh_close (&cd);  | 
1214  | 0  |           errno = saved_errno;  | 
1215  | 0  |         }  | 
1216  | 0  |       else  | 
1217  | 0  |         { | 
1218  | 0  |           if (iconveh_close (&cd) < 0)  | 
1219  | 0  |             { | 
1220  | 0  |               free (result);  | 
1221  | 0  |               return NULL;  | 
1222  | 0  |             }  | 
1223  | 0  |         }  | 
1224  | 0  |       return result;  | 
1225  |  | #else  | 
1226  |  |       /* This is a different error code than if iconv_open existed but didn't  | 
1227  |  |          support from_codeset and to_codeset, so that the caller can emit  | 
1228  |  |          an error message such as  | 
1229  |  |            "iconv() is not supported. Installing GNU libiconv and  | 
1230  |  |             then reinstalling this package would fix this."  */  | 
1231  |  |       errno = ENOSYS;  | 
1232  |  |       return NULL;  | 
1233  |  | #endif  | 
1234  | 0  |     }  | 
1235  | 0  | }  |