Coverage Report

Created: 2026-03-03 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gnupg/common/utf8conv.c
Line
Count
Source
1
/* utf8conf.c -  UTF8 character set conversion
2
 * Copyright (C) 1994, 1998, 1999, 2000, 2001, 2003, 2006,
3
 *               2008, 2010  Free Software Foundation, Inc.
4
 *
5
 * This file is part of GnuPG.
6
 *
7
 * GnuPG is free software; you can redistribute and/or modify this
8
 * part of GnuPG under the terms of either
9
 *
10
 *   - the GNU Lesser General Public License as published by the Free
11
 *     Software Foundation; either version 3 of the License, or (at
12
 *     your option) any later version.
13
 *
14
 * or
15
 *
16
 *   - the GNU General Public License as published by the Free
17
 *     Software Foundation; either version 2 of the License, or (at
18
 *     your option) any later version.
19
 *
20
 * or both in parallel, as here.
21
 *
22
 * GnuPG is distributed in the hope that it will be useful, but
23
 * WITHOUT ANY WARRANTY; without even the implied warranty of
24
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25
 * General Public License for more details.
26
 *
27
 * You should have received a copies of the GNU General Public License
28
 * and the GNU Lesser General Public License along with this program;
29
 * if not, see <https://www.gnu.org/licenses/>.
30
 */
31
32
#include <config.h>
33
#include <stdlib.h>
34
#include <string.h>
35
#include <stdarg.h>
36
#include <ctype.h>
37
#ifdef HAVE_LANGINFO_CODESET
38
#include <langinfo.h>
39
#endif
40
#include <errno.h>
41
42
#if HAVE_W32_SYSTEM
43
# /* Tell libgpg-error to provide the iconv macros.  */
44
# define GPGRT_ENABLE_W32_ICONV_MACROS 1
45
#elif HAVE_ANDROID_SYSTEM
46
# /* No iconv support.  */
47
#else
48
# include <iconv.h>
49
#endif
50
51
52
#include "util.h"
53
#include "common-defs.h"
54
#include "i18n.h"
55
#include "stringhelp.h"
56
#include "utf8conv.h"
57
58
#ifdef HAVE_W32_SYSTEM
59
#include <windows.h>
60
#endif
61
62
#ifndef MB_LEN_MAX
63
0
#define MB_LEN_MAX 16
64
#endif
65
66
static const char *active_charset_name = "iso-8859-1";
67
static int no_translation;     /* Set to true if we let simply pass through. */
68
static int use_iconv;          /* iconv conversion functions required. */
69
70
71
#ifdef HAVE_ANDROID_SYSTEM
72
/* Fake stuff to get things building.  */
73
typedef void *iconv_t;
74
#define ICONV_CONST
75
76
static iconv_t
77
iconv_open (const char *tocode, const char *fromcode)
78
{
79
  (void)tocode;
80
  (void)fromcode;
81
  return (iconv_t)(-1);
82
}
83
84
static size_t
85
iconv (iconv_t cd, char **inbuf, size_t *inbytesleft,
86
       char **outbuf, size_t *outbytesleft)
87
{
88
  (void)cd;
89
  (void)inbuf;
90
  (void)inbytesleft;
91
  (void)outbuf;
92
  (void)outbytesleft;
93
  return (size_t)(0);
94
}
95
96
static int
97
iconv_close (iconv_t cd)
98
{
99
  (void)cd;
100
  return 0;
101
}
102
#endif /*HAVE_ANDROID_SYSTEM*/
103
104
105
/* Error handler for iconv failures. This is needed to not clutter the
106
   output with repeated diagnostics about a missing conversion. */
107
static void
108
handle_iconv_error (const char *to, const char *from, int use_fallback)
109
0
{
110
0
  if (errno == EINVAL)
111
0
    {
112
0
      static int shown1, shown2;
113
0
      int x;
114
115
0
      if (to && !strcmp (to, "utf-8"))
116
0
        {
117
0
          x = shown1;
118
0
          shown1 = 1;
119
0
        }
120
0
      else
121
0
        {
122
0
          x = shown2;
123
0
          shown2 = 1;
124
0
        }
125
126
0
      if (!x)
127
0
        log_info (_("conversion from '%s' to '%s' not available\n"),
128
0
                  from, to);
129
0
    }
130
0
  else
131
0
    {
132
0
      static int shown;
133
134
0
      if (!shown)
135
0
        log_info (_("iconv_open failed: %s\n"), strerror (errno));
136
0
      shown = 1;
137
0
    }
138
139
0
  if (use_fallback)
140
0
    {
141
      /* To avoid further error messages we fallback to UTF-8 for the
142
         native encoding.  Nowadays this seems to be the best bet in
143
         case of errors from iconv or nl_langinfo.  */
144
0
      active_charset_name = "utf-8";
145
0
      no_translation = 1;
146
0
      use_iconv = 0;
147
0
    }
148
0
}
149
150
151
152
int
153
set_native_charset (const char *newset)
154
0
{
155
0
  const char *full_newset;
156
157
0
  if (!newset)
158
0
    {
159
#ifdef HAVE_ANDROID_SYSTEM
160
      newset = "utf-8";
161
#elif defined HAVE_W32_SYSTEM
162
      static char codepage[30];
163
      unsigned int cpno;
164
      const char *aliases;
165
166
      /* We are a console program thus we need to use the
167
         GetConsoleOutputCP function and not the GetACP which
168
         would give the codepage for a GUI program.  Note this is not
169
         a bulletproof detection because GetConsoleCP might return a
170
         different one for console input.  Not sure how to cope with
171
         that.  If the console Code page is not known we fall back to
172
         the system code page.  */
173
      cpno = GetConsoleOutputCP ();
174
      if (!cpno)
175
        cpno = GetACP ();
176
      sprintf (codepage, "CP%u", cpno );
177
      /* Resolve alias.  We use a long string string and not the usual
178
         array to optimize if the code is taken to a DSO.  Taken from
179
         libiconv 1.9.2. */
180
      newset = codepage;
181
      for (aliases = ("CP936"   "\0" "GBK" "\0"
182
                      "CP1361"  "\0" "JOHAB" "\0"
183
                      "CP20127" "\0" "ASCII" "\0"
184
                      "CP20866" "\0" "KOI8-R" "\0"
185
                      "CP21866" "\0" "KOI8-RU" "\0"
186
                      "CP28591" "\0" "ISO-8859-1" "\0"
187
                      "CP28592" "\0" "ISO-8859-2" "\0"
188
                      "CP28593" "\0" "ISO-8859-3" "\0"
189
                      "CP28594" "\0" "ISO-8859-4" "\0"
190
                      "CP28595" "\0" "ISO-8859-5" "\0"
191
                      "CP28596" "\0" "ISO-8859-6" "\0"
192
                      "CP28597" "\0" "ISO-8859-7" "\0"
193
                      "CP28598" "\0" "ISO-8859-8" "\0"
194
                      "CP28599" "\0" "ISO-8859-9" "\0"
195
                      "CP28605" "\0" "ISO-8859-15" "\0"
196
                      "CP65001" "\0" "UTF-8" "\0");
197
           *aliases;
198
           aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
199
        {
200
          if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
201
            {
202
              newset = aliases + strlen (aliases) + 1;
203
              break;
204
            }
205
        }
206
207
#else /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
208
209
0
#ifdef HAVE_LANGINFO_CODESET
210
0
      newset = nl_langinfo (CODESET);
211
#else /*!HAVE_LANGINFO_CODESET*/
212
      /* Try to get the used charset from environment variables.  */
213
      static char codepage[30];
214
      const char *lc, *dot, *mod;
215
216
      strcpy (codepage, "iso-8859-1");
217
      lc = getenv ("LC_ALL");
218
      if (!lc || !*lc)
219
        {
220
          lc = getenv ("LC_CTYPE");
221
          if (!lc || !*lc)
222
            lc = getenv ("LANG");
223
        }
224
      if (lc && *lc)
225
        {
226
          dot = strchr (lc, '.');
227
          if (dot)
228
            {
229
              mod = strchr (++dot, '@');
230
              if (!mod)
231
                mod = dot + strlen (dot);
232
              if (mod - dot < sizeof codepage && dot != mod)
233
                {
234
                  memcpy (codepage, dot, mod - dot);
235
                  codepage [mod - dot] = 0;
236
                }
237
            }
238
        }
239
      newset = codepage;
240
#endif /*!HAVE_LANGINFO_CODESET*/
241
0
#endif /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
242
0
    }
243
244
0
  full_newset = newset;
245
0
  if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
246
0
    {
247
0
      newset += 3;
248
0
      if (*newset == '-' || *newset == '_')
249
0
        newset++;
250
0
    }
251
252
  /* Note that we silently assume that plain ASCII is actually meant
253
     as Latin-1.  This makes sense because many Unix system don't have
254
     their locale set up properly and thus would get annoying error
255
     messages and we have to handle all the "bug" reports. Latin-1 has
256
     traditionally been the character set used for 8 bit characters on
257
     Unix systems. */
258
0
  if ( !*newset
259
0
       || !ascii_strcasecmp (newset, "8859-1" )
260
0
       || !ascii_strcasecmp (newset, "646" )
261
0
       || !ascii_strcasecmp (newset, "ASCII" )
262
0
       || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
263
0
       )
264
0
    {
265
0
      active_charset_name = "iso-8859-1";
266
0
      no_translation = 0;
267
0
      use_iconv = 0;
268
0
    }
269
0
  else if ( !ascii_strcasecmp (newset, "utf8" )
270
0
            || !ascii_strcasecmp(newset, "utf-8") )
271
0
    {
272
0
      active_charset_name = "utf-8";
273
0
      no_translation = 1;
274
0
      use_iconv = 0;
275
0
    }
276
0
  else
277
0
    {
278
0
      iconv_t cd;
279
280
0
      cd = iconv_open (full_newset, "utf-8");
281
0
      if (cd == (iconv_t)-1)
282
0
        {
283
0
          handle_iconv_error (full_newset, "utf-8", 0);
284
0
          return -1;
285
0
        }
286
0
      iconv_close (cd);
287
0
      cd = iconv_open ("utf-8", full_newset);
288
0
      if (cd == (iconv_t)-1)
289
0
        {
290
0
          handle_iconv_error ("utf-8", full_newset, 0);
291
0
          return -1;
292
0
        }
293
0
      iconv_close (cd);
294
0
      active_charset_name = full_newset;
295
0
      no_translation = 0;
296
0
      use_iconv = 1;
297
0
    }
298
0
  return 0;
299
0
}
300
301
const char *
302
get_native_charset (void)
303
0
{
304
0
  return active_charset_name;
305
0
}
306
307
/* Return true if the native charset is utf-8.  */
308
int
309
is_native_utf8 (void)
310
0
{
311
0
  return no_translation;
312
0
}
313
314
315
/* Convert string, which is in native encoding to UTF8 and return a
316
   new allocated UTF-8 string.  This function terminates the process
317
   on memory shortage.  */
318
char *
319
native_to_utf8 (const char *orig_string)
320
0
{
321
0
  const unsigned char *string = (const unsigned char *)orig_string;
322
0
  const unsigned char *s;
323
0
  char *buffer;
324
0
  unsigned char *p;
325
0
  size_t length = 0;
326
327
0
  if (no_translation)
328
0
    {
329
      /* Already utf-8 encoded. */
330
0
      buffer = xstrdup (orig_string);
331
0
    }
332
0
  else if (!use_iconv)
333
0
    {
334
      /* For Latin-1 we can avoid the iconv overhead. */
335
0
      for (s = string; *s; s++)
336
0
  {
337
0
    length++;
338
0
    if (*s & 0x80)
339
0
      length++;
340
0
  }
341
0
      buffer = xmalloc (length + 1);
342
0
      for (p = (unsigned char *)buffer, s = string; *s; s++)
343
0
  {
344
0
    if ( (*s & 0x80 ))
345
0
      {
346
0
        *p++ = 0xc0 | ((*s >> 6) & 3);
347
0
        *p++ = 0x80 | (*s & 0x3f);
348
0
      }
349
0
    else
350
0
      *p++ = *s;
351
0
  }
352
0
      *p = 0;
353
0
    }
354
0
  else
355
0
    {
356
      /* Need to use iconv.  */
357
0
      iconv_t cd;
358
0
      const char *inptr;
359
0
      char *outptr;
360
0
      size_t inbytes, outbytes;
361
362
0
      cd = iconv_open ("utf-8", active_charset_name);
363
0
      if (cd == (iconv_t)-1)
364
0
        {
365
0
          handle_iconv_error ("utf-8", active_charset_name, 1);
366
0
          return native_to_utf8 (string);
367
0
        }
368
369
0
      for (s=string; *s; s++ )
370
0
        {
371
0
          length++;
372
0
          if ((*s & 0x80))
373
0
            length += 5; /* We may need up to 6 bytes for the utf8 output. */
374
0
        }
375
0
      buffer = xmalloc (length + 1);
376
377
0
      inptr = string;
378
0
      inbytes = strlen (string);
379
0
      outptr = buffer;
380
0
      outbytes = length;
381
0
      if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
382
0
                  &outptr, &outbytes) == (size_t)-1)
383
0
        {
384
0
          static int shown;
385
386
0
          if (!shown)
387
0
            log_info (_("conversion from '%s' to '%s' failed: %s\n"),
388
0
                      active_charset_name, "utf-8", strerror (errno));
389
0
          shown = 1;
390
          /* We don't do any conversion at all but use the strings as is. */
391
0
          strcpy (buffer, string);
392
0
        }
393
0
      else /* Success.  */
394
0
        {
395
0
          *outptr = 0;
396
          /* We could realloc the buffer now but I doubt that it makes
397
             much sense given that it will get freed anyway soon
398
             after.  */
399
0
        }
400
0
      iconv_close (cd);
401
0
    }
402
0
  return buffer;
403
0
}
404
405
406
407
static char *
408
do_utf8_to_native (const char *string, size_t length, int delim,
409
                   int with_iconv)
410
5.72k
{
411
5.72k
  int nleft;
412
5.72k
  int i;
413
5.72k
  unsigned char encbuf[8];
414
5.72k
  int encidx;
415
5.72k
  const unsigned char *s;
416
5.72k
  size_t n;
417
5.72k
  char *buffer = NULL;
418
5.72k
  char *p = NULL;
419
5.72k
  unsigned long val = 0;
420
5.72k
  size_t slen;
421
5.72k
  int resync = 0;
422
423
  /* First pass (p==NULL): count the extended utf-8 characters.  */
424
  /* Second pass (p!=NULL): create string.  */
425
5.72k
  for (;;)
426
11.4k
    {
427
11.4k
      for (slen = length, nleft = encidx = 0, n = 0,
428
11.4k
             s = (const unsigned char *)string;
429
366k
           slen;
430
355k
     s++, slen--)
431
355k
  {
432
355k
    if (resync)
433
178k
      {
434
178k
        if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
435
147k
    {
436
      /* Still invalid. */
437
147k
      if (p)
438
74.0k
        {
439
74.0k
          sprintf (p, "\\x%02x", *s);
440
74.0k
          p += 4;
441
74.0k
        }
442
147k
      n += 4;
443
147k
      continue;
444
147k
    }
445
31.1k
        resync = 0;
446
31.1k
      }
447
207k
    if (!nleft)
448
166k
      {
449
166k
        if (!(*s & 0x80))
450
126k
    {
451
                  /* Plain ascii. */
452
126k
      if ( delim != -1
453
126k
                       && (*s < 0x20 || *s == 0x7f || *s == delim
454
92.4k
                           || (delim && *s == '\\')))
455
33.7k
        {
456
33.7k
          n++;
457
33.7k
          if (p)
458
16.8k
      *p++ = '\\';
459
33.7k
          switch (*s)
460
33.7k
      {
461
1.81k
                        case '\n': n++; if ( p ) *p++ = 'n'; break;
462
1.85k
                        case '\r': n++; if ( p ) *p++ = 'r'; break;
463
1.25k
                        case '\f': n++; if ( p ) *p++ = 'f'; break;
464
3.38k
                        case '\v': n++; if ( p ) *p++ = 'v'; break;
465
1.18k
                        case '\b': n++; if ( p ) *p++ = 'b'; break;
466
9.99k
                        case    0: n++; if ( p ) *p++ = '0'; break;
467
14.2k
      default:
468
14.2k
        n += 3;
469
14.2k
        if (p)
470
7.14k
          {
471
7.14k
            sprintf (p, "x%02x", *s);
472
7.14k
            p += 3;
473
7.14k
          }
474
14.2k
        break;
475
33.7k
      }
476
33.7k
        }
477
92.4k
      else
478
92.4k
        {
479
92.4k
          if (p)
480
46.2k
      *p++ = *s;
481
92.4k
          n++;
482
92.4k
        }
483
126k
    }
484
40.2k
        else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */
485
14.7k
    {
486
14.7k
      val = *s & 0x1f;
487
14.7k
      nleft = 1;
488
14.7k
      encidx = 0;
489
14.7k
      encbuf[encidx++] = *s;
490
14.7k
    }
491
25.5k
        else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */
492
4.16k
    {
493
4.16k
      val = *s & 0x0f;
494
4.16k
      nleft = 2;
495
4.16k
      encidx = 0;
496
4.16k
      encbuf[encidx++] = *s;
497
4.16k
    }
498
21.3k
        else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */
499
4.02k
    {
500
4.02k
      val = *s & 0x07;
501
4.02k
      nleft = 3;
502
4.02k
      encidx = 0;
503
4.02k
      encbuf[encidx++] = *s;
504
4.02k
    }
505
17.3k
        else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */
506
1.81k
    {
507
1.81k
      val = *s & 0x03;
508
1.81k
      nleft = 4;
509
1.81k
      encidx = 0;
510
1.81k
      encbuf[encidx++] = *s;
511
1.81k
    }
512
15.5k
        else if ((*s & 0xfe) == 0xfc) /* 1111 110x */
513
2.62k
    {
514
2.62k
      val = *s & 0x01;
515
2.62k
      nleft = 5;
516
2.62k
      encidx = 0;
517
2.62k
      encbuf[encidx++] = *s;
518
2.62k
    }
519
12.9k
        else /* Invalid encoding: print as \xNN. */
520
12.9k
    {
521
12.9k
      if (p)
522
6.10k
        {
523
6.10k
          sprintf (p, "\\x%02x", *s);
524
6.10k
          p += 4;
525
6.10k
        }
526
12.9k
      n += 4;
527
12.9k
      resync = 1;
528
12.9k
    }
529
166k
      }
530
41.3k
    else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */
531
20.5k
      {
532
20.5k
        if (p)
533
10.2k
    {
534
24.3k
      for (i = 0; i < encidx; i++)
535
14.0k
        {
536
14.0k
          sprintf (p, "\\x%02x", encbuf[i]);
537
14.0k
          p += 4;
538
14.0k
        }
539
10.2k
      sprintf (p, "\\x%02x", *s);
540
10.2k
      p += 4;
541
10.2k
    }
542
20.5k
        n += 4 + 4 * encidx;
543
20.5k
        nleft = 0;
544
20.5k
        encidx = 0;
545
20.5k
        resync = 1;
546
20.5k
      }
547
20.8k
    else
548
20.8k
      {
549
20.8k
        encbuf[encidx++] = *s;
550
20.8k
        val <<= 6;
551
20.8k
        val |= *s & 0x3f;
552
20.8k
        if (!--nleft)  /* Ready. */
553
6.64k
    {
554
6.64k
      if (no_translation)
555
0
        {
556
0
          if (p)
557
0
      {
558
0
        for (i = 0; i < encidx; i++)
559
0
          *p++ = encbuf[i];
560
0
      }
561
0
          n += encidx;
562
0
          encidx = 0;
563
0
        }
564
6.64k
                  else if (with_iconv)
565
0
                    {
566
                      /* Our strategy for using iconv is a bit strange
567
                         but it better keeps compatibility with
568
                         previous versions in regard to how invalid
569
                         encodings are displayed.  What we do is to
570
                         keep the utf-8 as is and have the real
571
                         translation step then at the end.  Yes, I
572
                         know that this is ugly.  However we are short
573
                         of the 1.4 release and for this branch we
574
                         should not mess too much around with iconv
575
                         things.  One reason for this is that we don't
576
                         know enough about non-GNU iconv
577
                         implementation and want to minimize the risk
578
                         of breaking the code on too many platforms.  */
579
0
                        if ( p )
580
0
                          {
581
0
                            for (i=0; i < encidx; i++ )
582
0
                              *p++ = encbuf[i];
583
0
                          }
584
0
                        n += encidx;
585
0
                        encidx = 0;
586
0
                    }
587
6.64k
      else  /* Latin-1 case. */
588
6.64k
                    {
589
6.64k
          if (val >= 0x80 && val < 256)
590
1.08k
      {
591
                          /* We can simply print this character */
592
1.08k
        n++;
593
1.08k
        if (p)
594
543
          *p++ = val;
595
1.08k
      }
596
5.55k
          else
597
5.55k
      {
598
                          /* We do not have a translation: print utf8. */
599
5.55k
        if (p)
600
2.77k
          {
601
11.5k
            for (i = 0; i < encidx; i++)
602
8.81k
        {
603
8.81k
          sprintf (p, "\\x%02x", encbuf[i]);
604
8.81k
          p += 4;
605
8.81k
        }
606
2.77k
          }
607
5.55k
        n += encidx * 4;
608
5.55k
        encidx = 0;
609
5.55k
      }
610
6.64k
        }
611
6.64k
    }
612
613
20.8k
      }
614
207k
  }
615
11.4k
      if (!buffer)
616
5.72k
  {
617
          /* Allocate the buffer after the first pass. */
618
5.72k
    buffer = p = xmalloc (n + 1);
619
5.72k
  }
620
5.72k
      else if (with_iconv)
621
0
        {
622
          /* Note: See above for comments.  */
623
0
          iconv_t cd;
624
0
          const char *inptr;
625
0
          char *outbuf, *outptr;
626
0
          size_t inbytes, outbytes;
627
628
0
          *p = 0;  /* Terminate the buffer. */
629
630
0
          cd = iconv_open (active_charset_name, "utf-8");
631
0
          if (cd == (iconv_t)-1)
632
0
            {
633
0
              handle_iconv_error (active_charset_name, "utf-8", 1);
634
0
              xfree (buffer);
635
0
              return utf8_to_native (string, length, delim);
636
0
            }
637
638
          /* Allocate a new buffer large enough to hold all possible
639
             encodings. */
640
0
          n = p - buffer + 1;
641
0
          inbytes = n - 1;;
642
0
          inptr = buffer;
643
0
          outbytes = n * MB_LEN_MAX;
644
0
          if (outbytes / MB_LEN_MAX != n)
645
0
            BUG (); /* Actually an overflow. */
646
0
          outbuf = outptr = xmalloc (outbytes);
647
0
          if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
648
0
                      &outptr, &outbytes) == (size_t)-1)
649
0
            {
650
0
              static int shown;
651
652
0
              if (!shown)
653
0
                log_info (_("conversion from '%s' to '%s' failed: %s\n"),
654
0
                          "utf-8", active_charset_name, strerror (errno));
655
0
              shown = 1;
656
              /* Didn't worked out.  Try again but without iconv.  */
657
0
              xfree (buffer);
658
0
              buffer = NULL;
659
0
              xfree (outbuf);
660
0
              outbuf = do_utf8_to_native (string, length, delim, 0);
661
0
            }
662
0
            else /* Success.  */
663
0
              {
664
0
                *outptr = 0; /* Make sure it is a string. */
665
                /* We could realloc the buffer now but I doubt that it
666
                   makes much sense given that it will get freed
667
                   anyway soon after.  */
668
0
                xfree (buffer);
669
0
              }
670
0
          iconv_close (cd);
671
0
          return outbuf;
672
0
        }
673
5.72k
      else /* Not using iconv. */
674
5.72k
  {
675
5.72k
    *p = 0; /* Make sure it is a string. */
676
5.72k
    return buffer;
677
5.72k
  }
678
11.4k
    }
679
5.72k
}
680
681
/* Convert string, which is in UTF-8 to native encoding.  Replace
682
   illegal encodings by some "\xnn" and quote all control
683
   characters. A character with value DELIM will always be quoted, it
684
   must be a vanilla ASCII character.  A DELIM value of -1 is special:
685
   it disables all quoting of control characters.  This function
686
   terminates the process on memory shortage.  */
687
char *
688
utf8_to_native (const char *string, size_t length, int delim)
689
5.72k
{
690
5.72k
  return do_utf8_to_native (string, length, delim, use_iconv);
691
5.72k
}
692
693
694
695
696
/* Wrapper function for iconv_open, required for W32 as we dlopen that
697
   library on that system.  */
698
jnlib_iconv_t
699
jnlib_iconv_open (const char *tocode, const char *fromcode)
700
0
{
701
0
  return (jnlib_iconv_t)iconv_open (tocode, fromcode);
702
0
}
703
704
705
/* Wrapper function for iconv, required for W32 as we dlopen that
706
   library on that system.  */
707
size_t
708
jnlib_iconv (jnlib_iconv_t cd,
709
             const char **inbuf, size_t *inbytesleft,
710
             char **outbuf, size_t *outbytesleft)
711
0
{
712
0
  return iconv ((iconv_t)cd, (ICONV_CONST char**)inbuf, inbytesleft,
713
0
                outbuf, outbytesleft);
714
0
}
715
716
/* Wrapper function for iconv_close, required for W32 as we dlopen that
717
   library on that system.  */
718
int
719
jnlib_iconv_close (jnlib_iconv_t cd)
720
0
{
721
0
  return iconv_close ((iconv_t)cd);
722
0
}
723
724
725
#ifdef HAVE_W32_SYSTEM
726
/* Return a malloced string encoded for CODEPAGE from the wide char input
727
   string STRING.  Caller must free this value.  Returns NULL and sets
728
   ERRNO on failure.  Calling this function with STRING set to NULL is
729
   not defined.  */
730
static char *
731
wchar_to_cp (const wchar_t *string, unsigned int codepage)
732
{
733
  int n;
734
  char *result;
735
736
  n = WideCharToMultiByte (codepage, 0, string, -1, NULL, 0, NULL, NULL);
737
  if (n < 0)
738
    {
739
      gpg_err_set_errno (EINVAL);
740
      return NULL;
741
    }
742
743
  result = xtrymalloc (n+1);
744
  if (!result)
745
    return NULL;
746
747
  n = WideCharToMultiByte (codepage, 0, string, -1, result, n, NULL, NULL);
748
  if (n < 0)
749
    {
750
      xfree (result);
751
      gpg_err_set_errno (EINVAL);
752
      result = NULL;
753
    }
754
  return result;
755
}
756
757
758
/* Return a malloced wide char string from a CODEPAGE encoded input
759
   string STRING.  Caller must free this value.  Returns NULL and sets
760
   ERRNO on failure.  Calling this function with STRING set to NULL is
761
   not defined.  */
762
static wchar_t *
763
cp_to_wchar (const char *string, unsigned int codepage)
764
{
765
  int n;
766
  size_t nbytes;
767
  wchar_t *result;
768
769
  n = MultiByteToWideChar (codepage, 0, string, -1, NULL, 0);
770
  if (n < 0)
771
    {
772
      gpg_err_set_errno (EINVAL);
773
      return NULL;
774
    }
775
776
  nbytes = (size_t)(n+1) * sizeof(*result);
777
  if (nbytes / sizeof(*result) != (n+1))
778
    {
779
      gpg_err_set_errno (ENOMEM);
780
      return NULL;
781
    }
782
  result = xtrymalloc (nbytes);
783
  if (!result)
784
    return NULL;
785
786
  n = MultiByteToWideChar (codepage, 0, string, -1, result, n);
787
  if (n < 0)
788
    {
789
      xfree (result);
790
      gpg_err_set_errno (EINVAL);
791
      result = NULL;
792
    }
793
  return result;
794
}
795
796
797
/* Get the current codepage as used by wchar_to_native and
798
 * native_to_char.  Note that these functions intentionally do not use
799
 * iconv based conversion machinery.  */
800
static unsigned int
801
get_w32_codepage (void)
802
{
803
  static unsigned int cp;
804
805
  if (!cp)
806
    {
807
      cp = GetConsoleOutputCP ();
808
      if (!cp)
809
        cp = GetACP ();
810
    }
811
  return cp;
812
}
813
814
/* Return a malloced string encoded in the active code page from the
815
 * wide char input string STRING.  Caller must free this value.
816
 * Returns NULL and sets ERRNO on failure.  Calling this function with
817
 * STRING set to NULL is not defined.  */
818
char *
819
wchar_to_native (const wchar_t *string)
820
{
821
  return wchar_to_cp (string, get_w32_codepage ());
822
}
823
824
825
/* Return a malloced wide char string from native encoded input
826
 * string STRING.  Caller must free this value.  Returns NULL and sets
827
 * ERRNO on failure.  Calling this function with STRING set to NULL is
828
 * not defined.  */
829
wchar_t *
830
native_to_wchar (const char *string)
831
{
832
  return cp_to_wchar (string, get_w32_codepage ());
833
}
834
835
836
/* Return a malloced string encoded in UTF-8 from the wide char input
837
 * string STRING.  Caller must free this value.  Returns NULL and sets
838
 * ERRNO on failure.  Calling this function with STRING set to NULL is
839
 * not defined.  */
840
char *
841
wchar_to_utf8 (const wchar_t *string)
842
{
843
  return wchar_to_cp (string, CP_UTF8);
844
}
845
846
847
/* Return a malloced wide char string from an UTF-8 encoded input
848
 * string STRING.  Caller must free this value.  Returns NULL and sets
849
 * ERRNO on failure.  Calling this function with STRING set to NULL is
850
 * not defined.  */
851
wchar_t *
852
utf8_to_wchar (const char *string)
853
{
854
  return cp_to_wchar (string, CP_UTF8);
855
}
856
857
#endif /*HAVE_W32_SYSTEM*/