Coverage Report

Created: 2026-02-26 06:54

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gnupg/common/utf8conv.c
Line
Count
Source
1
/* utf8conf.c -  UTF8 character set conversion
2
 * Copyright (C) 1994, 1998, 1999, 2000, 2001, 2003, 2006,
3
 *               2008, 2010  Free Software Foundation, Inc.
4
 *
5
 * This file is part of GnuPG.
6
 *
7
 * GnuPG is free software; you can redistribute and/or modify this
8
 * part of GnuPG under the terms of either
9
 *
10
 *   - the GNU Lesser General Public License as published by the Free
11
 *     Software Foundation; either version 3 of the License, or (at
12
 *     your option) any later version.
13
 *
14
 * or
15
 *
16
 *   - the GNU General Public License as published by the Free
17
 *     Software Foundation; either version 2 of the License, or (at
18
 *     your option) any later version.
19
 *
20
 * or both in parallel, as here.
21
 *
22
 * GnuPG is distributed in the hope that it will be useful, but
23
 * WITHOUT ANY WARRANTY; without even the implied warranty of
24
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25
 * General Public License for more details.
26
 *
27
 * You should have received a copies of the GNU General Public License
28
 * and the GNU Lesser General Public License along with this program;
29
 * if not, see <https://www.gnu.org/licenses/>.
30
 */
31
32
#include <config.h>
33
#include <stdlib.h>
34
#include <string.h>
35
#include <stdarg.h>
36
#include <ctype.h>
37
#ifdef HAVE_LANGINFO_CODESET
38
#include <langinfo.h>
39
#endif
40
#include <errno.h>
41
42
#if HAVE_W32_SYSTEM
43
# /* Tell libgpg-error to provide the iconv macros.  */
44
# define GPGRT_ENABLE_W32_ICONV_MACROS 1
45
#elif HAVE_ANDROID_SYSTEM
46
# /* No iconv support.  */
47
#else
48
# include <iconv.h>
49
#endif
50
51
52
#include "util.h"
53
#include "common-defs.h"
54
#include "i18n.h"
55
#include "stringhelp.h"
56
#include "utf8conv.h"
57
58
#ifdef HAVE_W32_SYSTEM
59
#include <windows.h>
60
#endif
61
62
#ifndef MB_LEN_MAX
63
0
#define MB_LEN_MAX 16
64
#endif
65
66
static const char *active_charset_name = "iso-8859-1";
67
static int no_translation;     /* Set to true if we let simply pass through. */
68
static int use_iconv;          /* iconv conversion functions required. */
69
70
71
#ifdef HAVE_ANDROID_SYSTEM
72
/* Fake stuff to get things building.  */
73
typedef void *iconv_t;
74
#define ICONV_CONST
75
76
static iconv_t
77
iconv_open (const char *tocode, const char *fromcode)
78
{
79
  (void)tocode;
80
  (void)fromcode;
81
  return (iconv_t)(-1);
82
}
83
84
static size_t
85
iconv (iconv_t cd, char **inbuf, size_t *inbytesleft,
86
       char **outbuf, size_t *outbytesleft)
87
{
88
  (void)cd;
89
  (void)inbuf;
90
  (void)inbytesleft;
91
  (void)outbuf;
92
  (void)outbytesleft;
93
  return (size_t)(0);
94
}
95
96
static int
97
iconv_close (iconv_t cd)
98
{
99
  (void)cd;
100
  return 0;
101
}
102
#endif /*HAVE_ANDROID_SYSTEM*/
103
104
105
/* Error handler for iconv failures. This is needed to not clutter the
106
   output with repeated diagnostics about a missing conversion. */
107
static void
108
handle_iconv_error (const char *to, const char *from, int use_fallback)
109
0
{
110
0
  if (errno == EINVAL)
111
0
    {
112
0
      static int shown1, shown2;
113
0
      int x;
114
115
0
      if (to && !strcmp (to, "utf-8"))
116
0
        {
117
0
          x = shown1;
118
0
          shown1 = 1;
119
0
        }
120
0
      else
121
0
        {
122
0
          x = shown2;
123
0
          shown2 = 1;
124
0
        }
125
126
0
      if (!x)
127
0
        log_info (_("conversion from '%s' to '%s' not available\n"),
128
0
                  from, to);
129
0
    }
130
0
  else
131
0
    {
132
0
      static int shown;
133
134
0
      if (!shown)
135
0
        log_info (_("iconv_open failed: %s\n"), strerror (errno));
136
0
      shown = 1;
137
0
    }
138
139
0
  if (use_fallback)
140
0
    {
141
      /* To avoid further error messages we fallback to UTF-8 for the
142
         native encoding.  Nowadays this seems to be the best bet in
143
         case of errors from iconv or nl_langinfo.  */
144
0
      active_charset_name = "utf-8";
145
0
      no_translation = 1;
146
0
      use_iconv = 0;
147
0
    }
148
0
}
149
150
151
152
int
153
set_native_charset (const char *newset)
154
0
{
155
0
  const char *full_newset;
156
157
0
  if (!newset)
158
0
    {
159
#ifdef HAVE_ANDROID_SYSTEM
160
      newset = "utf-8";
161
#elif defined HAVE_W32_SYSTEM
162
      static char codepage[30];
163
      unsigned int cpno;
164
      const char *aliases;
165
166
      /* We are a console program thus we need to use the
167
         GetConsoleOutputCP function and not the GetACP which
168
         would give the codepage for a GUI program.  Note this is not
169
         a bulletproof detection because GetConsoleCP might return a
170
         different one for console input.  Not sure how to cope with
171
         that.  If the console Code page is not known we fall back to
172
         the system code page.  */
173
      cpno = GetConsoleOutputCP ();
174
      if (!cpno)
175
        cpno = GetACP ();
176
      sprintf (codepage, "CP%u", cpno );
177
      /* Resolve alias.  We use a long string string and not the usual
178
         array to optimize if the code is taken to a DSO.  Taken from
179
         libiconv 1.9.2. */
180
      newset = codepage;
181
      for (aliases = ("CP936"   "\0" "GBK" "\0"
182
                      "CP1361"  "\0" "JOHAB" "\0"
183
                      "CP20127" "\0" "ASCII" "\0"
184
                      "CP20866" "\0" "KOI8-R" "\0"
185
                      "CP21866" "\0" "KOI8-RU" "\0"
186
                      "CP28591" "\0" "ISO-8859-1" "\0"
187
                      "CP28592" "\0" "ISO-8859-2" "\0"
188
                      "CP28593" "\0" "ISO-8859-3" "\0"
189
                      "CP28594" "\0" "ISO-8859-4" "\0"
190
                      "CP28595" "\0" "ISO-8859-5" "\0"
191
                      "CP28596" "\0" "ISO-8859-6" "\0"
192
                      "CP28597" "\0" "ISO-8859-7" "\0"
193
                      "CP28598" "\0" "ISO-8859-8" "\0"
194
                      "CP28599" "\0" "ISO-8859-9" "\0"
195
                      "CP28605" "\0" "ISO-8859-15" "\0"
196
                      "CP65001" "\0" "UTF-8" "\0");
197
           *aliases;
198
           aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
199
        {
200
          if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
201
            {
202
              newset = aliases + strlen (aliases) + 1;
203
              break;
204
            }
205
        }
206
207
#else /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
208
209
0
#ifdef HAVE_LANGINFO_CODESET
210
0
      newset = nl_langinfo (CODESET);
211
#else /*!HAVE_LANGINFO_CODESET*/
212
      /* Try to get the used charset from environment variables.  */
213
      static char codepage[30];
214
      const char *lc, *dot, *mod;
215
216
      strcpy (codepage, "iso-8859-1");
217
      lc = getenv ("LC_ALL");
218
      if (!lc || !*lc)
219
        {
220
          lc = getenv ("LC_CTYPE");
221
          if (!lc || !*lc)
222
            lc = getenv ("LANG");
223
        }
224
      if (lc && *lc)
225
        {
226
          dot = strchr (lc, '.');
227
          if (dot)
228
            {
229
              mod = strchr (++dot, '@');
230
              if (!mod)
231
                mod = dot + strlen (dot);
232
              if (mod - dot < sizeof codepage && dot != mod)
233
                {
234
                  memcpy (codepage, dot, mod - dot);
235
                  codepage [mod - dot] = 0;
236
                }
237
            }
238
        }
239
      newset = codepage;
240
#endif /*!HAVE_LANGINFO_CODESET*/
241
0
#endif /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
242
0
    }
243
244
0
  full_newset = newset;
245
0
  if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
246
0
    {
247
0
      newset += 3;
248
0
      if (*newset == '-' || *newset == '_')
249
0
        newset++;
250
0
    }
251
252
  /* Note that we silently assume that plain ASCII is actually meant
253
     as Latin-1.  This makes sense because many Unix system don't have
254
     their locale set up properly and thus would get annoying error
255
     messages and we have to handle all the "bug" reports. Latin-1 has
256
     traditionally been the character set used for 8 bit characters on
257
     Unix systems. */
258
0
  if ( !*newset
259
0
       || !ascii_strcasecmp (newset, "8859-1" )
260
0
       || !ascii_strcasecmp (newset, "646" )
261
0
       || !ascii_strcasecmp (newset, "ASCII" )
262
0
       || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
263
0
       )
264
0
    {
265
0
      active_charset_name = "iso-8859-1";
266
0
      no_translation = 0;
267
0
      use_iconv = 0;
268
0
    }
269
0
  else if ( !ascii_strcasecmp (newset, "utf8" )
270
0
            || !ascii_strcasecmp(newset, "utf-8") )
271
0
    {
272
0
      active_charset_name = "utf-8";
273
0
      no_translation = 1;
274
0
      use_iconv = 0;
275
0
    }
276
0
  else
277
0
    {
278
0
      iconv_t cd;
279
280
0
      cd = iconv_open (full_newset, "utf-8");
281
0
      if (cd == (iconv_t)-1)
282
0
        {
283
0
          handle_iconv_error (full_newset, "utf-8", 0);
284
0
          return -1;
285
0
        }
286
0
      iconv_close (cd);
287
0
      cd = iconv_open ("utf-8", full_newset);
288
0
      if (cd == (iconv_t)-1)
289
0
        {
290
0
          handle_iconv_error ("utf-8", full_newset, 0);
291
0
          return -1;
292
0
        }
293
0
      iconv_close (cd);
294
0
      active_charset_name = full_newset;
295
0
      no_translation = 0;
296
0
      use_iconv = 1;
297
0
    }
298
0
  return 0;
299
0
}
300
301
const char *
302
get_native_charset (void)
303
0
{
304
0
  return active_charset_name;
305
0
}
306
307
/* Return true if the native charset is utf-8.  */
308
int
309
is_native_utf8 (void)
310
0
{
311
0
  return no_translation;
312
0
}
313
314
315
/* Convert string, which is in native encoding to UTF8 and return a
316
   new allocated UTF-8 string.  This function terminates the process
317
   on memory shortage.  */
318
char *
319
native_to_utf8 (const char *orig_string)
320
0
{
321
0
  const unsigned char *string = (const unsigned char *)orig_string;
322
0
  const unsigned char *s;
323
0
  char *buffer;
324
0
  unsigned char *p;
325
0
  size_t length = 0;
326
327
0
  if (no_translation)
328
0
    {
329
      /* Already utf-8 encoded. */
330
0
      buffer = xstrdup (orig_string);
331
0
    }
332
0
  else if (!use_iconv)
333
0
    {
334
      /* For Latin-1 we can avoid the iconv overhead. */
335
0
      for (s = string; *s; s++)
336
0
  {
337
0
    length++;
338
0
    if (*s & 0x80)
339
0
      length++;
340
0
  }
341
0
      buffer = xmalloc (length + 1);
342
0
      for (p = (unsigned char *)buffer, s = string; *s; s++)
343
0
  {
344
0
    if ( (*s & 0x80 ))
345
0
      {
346
0
        *p++ = 0xc0 | ((*s >> 6) & 3);
347
0
        *p++ = 0x80 | (*s & 0x3f);
348
0
      }
349
0
    else
350
0
      *p++ = *s;
351
0
  }
352
0
      *p = 0;
353
0
    }
354
0
  else
355
0
    {
356
      /* Need to use iconv.  */
357
0
      iconv_t cd;
358
0
      const char *inptr;
359
0
      char *outptr;
360
0
      size_t inbytes, outbytes;
361
362
0
      cd = iconv_open ("utf-8", active_charset_name);
363
0
      if (cd == (iconv_t)-1)
364
0
        {
365
0
          handle_iconv_error ("utf-8", active_charset_name, 1);
366
0
          return native_to_utf8 (string);
367
0
        }
368
369
0
      for (s=string; *s; s++ )
370
0
        {
371
0
          length++;
372
0
          if ((*s & 0x80))
373
0
            length += 5; /* We may need up to 6 bytes for the utf8 output. */
374
0
        }
375
0
      buffer = xmalloc (length + 1);
376
377
0
      inptr = string;
378
0
      inbytes = strlen (string);
379
0
      outptr = buffer;
380
0
      outbytes = length;
381
0
      if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
382
0
                  &outptr, &outbytes) == (size_t)-1)
383
0
        {
384
0
          static int shown;
385
386
0
          if (!shown)
387
0
            log_info (_("conversion from '%s' to '%s' failed: %s\n"),
388
0
                      active_charset_name, "utf-8", strerror (errno));
389
0
          shown = 1;
390
          /* We don't do any conversion at all but use the strings as is. */
391
0
          strcpy (buffer, string);
392
0
        }
393
0
      else /* Success.  */
394
0
        {
395
0
          *outptr = 0;
396
          /* We could realloc the buffer now but I doubt that it makes
397
             much sense given that it will get freed anyway soon
398
             after.  */
399
0
        }
400
0
      iconv_close (cd);
401
0
    }
402
0
  return buffer;
403
0
}
404
405
406
407
static char *
408
do_utf8_to_native (const char *string, size_t length, int delim,
409
                   int with_iconv)
410
5.66k
{
411
5.66k
  int nleft;
412
5.66k
  int i;
413
5.66k
  unsigned char encbuf[8];
414
5.66k
  int encidx;
415
5.66k
  const unsigned char *s;
416
5.66k
  size_t n;
417
5.66k
  char *buffer = NULL;
418
5.66k
  char *p = NULL;
419
5.66k
  unsigned long val = 0;
420
5.66k
  size_t slen;
421
5.66k
  int resync = 0;
422
423
  /* First pass (p==NULL): count the extended utf-8 characters.  */
424
  /* Second pass (p!=NULL): create string.  */
425
5.66k
  for (;;)
426
11.3k
    {
427
11.3k
      for (slen = length, nleft = encidx = 0, n = 0,
428
11.3k
             s = (const unsigned char *)string;
429
358k
           slen;
430
347k
     s++, slen--)
431
347k
  {
432
347k
    if (resync)
433
176k
      {
434
176k
        if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
435
146k
    {
436
      /* Still invalid. */
437
146k
      if (p)
438
73.7k
        {
439
73.7k
          sprintf (p, "\\x%02x", *s);
440
73.7k
          p += 4;
441
73.7k
        }
442
146k
      n += 4;
443
146k
      continue;
444
146k
    }
445
29.8k
        resync = 0;
446
29.8k
      }
447
200k
    if (!nleft)
448
156k
      {
449
156k
        if (!(*s & 0x80))
450
116k
    {
451
                  /* Plain ascii. */
452
116k
      if ( delim != -1
453
116k
                       && (*s < 0x20 || *s == 0x7f || *s == delim
454
88.4k
                           || (delim && *s == '\\')))
455
28.0k
        {
456
28.0k
          n++;
457
28.0k
          if (p)
458
14.0k
      *p++ = '\\';
459
28.0k
          switch (*s)
460
28.0k
      {
461
1.49k
                        case '\n': n++; if ( p ) *p++ = 'n'; break;
462
1.63k
                        case '\r': n++; if ( p ) *p++ = 'r'; break;
463
1.24k
                        case '\f': n++; if ( p ) *p++ = 'f'; break;
464
2.41k
                        case '\v': n++; if ( p ) *p++ = 'v'; break;
465
830
                        case '\b': n++; if ( p ) *p++ = 'b'; break;
466
7.09k
                        case    0: n++; if ( p ) *p++ = '0'; break;
467
13.3k
      default:
468
13.3k
        n += 3;
469
13.3k
        if (p)
470
6.68k
          {
471
6.68k
            sprintf (p, "x%02x", *s);
472
6.68k
            p += 3;
473
6.68k
          }
474
13.3k
        break;
475
28.0k
      }
476
28.0k
        }
477
88.4k
      else
478
88.4k
        {
479
88.4k
          if (p)
480
44.2k
      *p++ = *s;
481
88.4k
          n++;
482
88.4k
        }
483
116k
    }
484
39.6k
        else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */
485
15.5k
    {
486
15.5k
      val = *s & 0x1f;
487
15.5k
      nleft = 1;
488
15.5k
      encidx = 0;
489
15.5k
      encbuf[encidx++] = *s;
490
15.5k
    }
491
24.0k
        else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */
492
4.31k
    {
493
4.31k
      val = *s & 0x0f;
494
4.31k
      nleft = 2;
495
4.31k
      encidx = 0;
496
4.31k
      encbuf[encidx++] = *s;
497
4.31k
    }
498
19.7k
        else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */
499
3.94k
    {
500
3.94k
      val = *s & 0x07;
501
3.94k
      nleft = 3;
502
3.94k
      encidx = 0;
503
3.94k
      encbuf[encidx++] = *s;
504
3.94k
    }
505
15.8k
        else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */
506
2.21k
    {
507
2.21k
      val = *s & 0x03;
508
2.21k
      nleft = 4;
509
2.21k
      encidx = 0;
510
2.21k
      encbuf[encidx++] = *s;
511
2.21k
    }
512
13.6k
        else if ((*s & 0xfe) == 0xfc) /* 1111 110x */
513
2.80k
    {
514
2.80k
      val = *s & 0x01;
515
2.80k
      nleft = 5;
516
2.80k
      encidx = 0;
517
2.80k
      encbuf[encidx++] = *s;
518
2.80k
    }
519
10.8k
        else /* Invalid encoding: print as \xNN. */
520
10.8k
    {
521
10.8k
      if (p)
522
5.08k
        {
523
5.08k
          sprintf (p, "\\x%02x", *s);
524
5.08k
          p += 4;
525
5.08k
        }
526
10.8k
      n += 4;
527
10.8k
      resync = 1;
528
10.8k
    }
529
156k
      }
530
44.1k
    else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */
531
21.4k
      {
532
21.4k
        if (p)
533
10.7k
    {
534
25.4k
      for (i = 0; i < encidx; i++)
535
14.7k
        {
536
14.7k
          sprintf (p, "\\x%02x", encbuf[i]);
537
14.7k
          p += 4;
538
14.7k
        }
539
10.7k
      sprintf (p, "\\x%02x", *s);
540
10.7k
      p += 4;
541
10.7k
    }
542
21.4k
        n += 4 + 4 * encidx;
543
21.4k
        nleft = 0;
544
21.4k
        encidx = 0;
545
21.4k
        resync = 1;
546
21.4k
      }
547
22.6k
    else
548
22.6k
      {
549
22.6k
        encbuf[encidx++] = *s;
550
22.6k
        val <<= 6;
551
22.6k
        val |= *s & 0x3f;
552
22.6k
        if (!--nleft)  /* Ready. */
553
7.23k
    {
554
7.23k
      if (no_translation)
555
0
        {
556
0
          if (p)
557
0
      {
558
0
        for (i = 0; i < encidx; i++)
559
0
          *p++ = encbuf[i];
560
0
      }
561
0
          n += encidx;
562
0
          encidx = 0;
563
0
        }
564
7.23k
                  else if (with_iconv)
565
0
                    {
566
                      /* Our strategy for using iconv is a bit strange
567
                         but it better keeps compatibility with
568
                         previous versions in regard to how invalid
569
                         encodings are displayed.  What we do is to
570
                         keep the utf-8 as is and have the real
571
                         translation step then at the end.  Yes, I
572
                         know that this is ugly.  However we are short
573
                         of the 1.4 release and for this branch we
574
                         should not mess too much around with iconv
575
                         things.  One reason for this is that we don't
576
                         know enough about non-GNU iconv
577
                         implementation and want to minimize the risk
578
                         of breaking the code on too many platforms.  */
579
0
                        if ( p )
580
0
                          {
581
0
                            for (i=0; i < encidx; i++ )
582
0
                              *p++ = encbuf[i];
583
0
                          }
584
0
                        n += encidx;
585
0
                        encidx = 0;
586
0
                    }
587
7.23k
      else  /* Latin-1 case. */
588
7.23k
                    {
589
7.23k
          if (val >= 0x80 && val < 256)
590
1.16k
      {
591
                          /* We can simply print this character */
592
1.16k
        n++;
593
1.16k
        if (p)
594
580
          *p++ = val;
595
1.16k
      }
596
6.07k
          else
597
6.07k
      {
598
                          /* We do not have a translation: print utf8. */
599
6.07k
        if (p)
600
3.03k
          {
601
12.8k
            for (i = 0; i < encidx; i++)
602
9.77k
        {
603
9.77k
          sprintf (p, "\\x%02x", encbuf[i]);
604
9.77k
          p += 4;
605
9.77k
        }
606
3.03k
          }
607
6.07k
        n += encidx * 4;
608
6.07k
        encidx = 0;
609
6.07k
      }
610
7.23k
        }
611
7.23k
    }
612
613
22.6k
      }
614
200k
  }
615
11.3k
      if (!buffer)
616
5.66k
  {
617
          /* Allocate the buffer after the first pass. */
618
5.66k
    buffer = p = xmalloc (n + 1);
619
5.66k
  }
620
5.66k
      else if (with_iconv)
621
0
        {
622
          /* Note: See above for comments.  */
623
0
          iconv_t cd;
624
0
          const char *inptr;
625
0
          char *outbuf, *outptr;
626
0
          size_t inbytes, outbytes;
627
628
0
          *p = 0;  /* Terminate the buffer. */
629
630
0
          cd = iconv_open (active_charset_name, "utf-8");
631
0
          if (cd == (iconv_t)-1)
632
0
            {
633
0
              handle_iconv_error (active_charset_name, "utf-8", 1);
634
0
              xfree (buffer);
635
0
              return utf8_to_native (string, length, delim);
636
0
            }
637
638
          /* Allocate a new buffer large enough to hold all possible
639
             encodings. */
640
0
          n = p - buffer + 1;
641
0
          inbytes = n - 1;;
642
0
          inptr = buffer;
643
0
          outbytes = n * MB_LEN_MAX;
644
0
          if (outbytes / MB_LEN_MAX != n)
645
0
            BUG (); /* Actually an overflow. */
646
0
          outbuf = outptr = xmalloc (outbytes);
647
0
          if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
648
0
                      &outptr, &outbytes) == (size_t)-1)
649
0
            {
650
0
              static int shown;
651
652
0
              if (!shown)
653
0
                log_info (_("conversion from '%s' to '%s' failed: %s\n"),
654
0
                          "utf-8", active_charset_name, strerror (errno));
655
0
              shown = 1;
656
              /* Didn't worked out.  Try again but without iconv.  */
657
0
              xfree (buffer);
658
0
              buffer = NULL;
659
0
              xfree (outbuf);
660
0
              outbuf = do_utf8_to_native (string, length, delim, 0);
661
0
            }
662
0
            else /* Success.  */
663
0
              {
664
0
                *outptr = 0; /* Make sure it is a string. */
665
                /* We could realloc the buffer now but I doubt that it
666
                   makes much sense given that it will get freed
667
                   anyway soon after.  */
668
0
                xfree (buffer);
669
0
              }
670
0
          iconv_close (cd);
671
0
          return outbuf;
672
0
        }
673
5.66k
      else /* Not using iconv. */
674
5.66k
  {
675
5.66k
    *p = 0; /* Make sure it is a string. */
676
5.66k
    return buffer;
677
5.66k
  }
678
11.3k
    }
679
5.66k
}
680
681
/* Convert string, which is in UTF-8 to native encoding.  Replace
682
   illegal encodings by some "\xnn" and quote all control
683
   characters. A character with value DELIM will always be quoted, it
684
   must be a vanilla ASCII character.  A DELIM value of -1 is special:
685
   it disables all quoting of control characters.  This function
686
   terminates the process on memory shortage.  */
687
char *
688
utf8_to_native (const char *string, size_t length, int delim)
689
5.66k
{
690
5.66k
  return do_utf8_to_native (string, length, delim, use_iconv);
691
5.66k
}
692
693
694
695
696
/* Wrapper function for iconv_open, required for W32 as we dlopen that
697
   library on that system.  */
698
jnlib_iconv_t
699
jnlib_iconv_open (const char *tocode, const char *fromcode)
700
0
{
701
0
  return (jnlib_iconv_t)iconv_open (tocode, fromcode);
702
0
}
703
704
705
/* Wrapper function for iconv, required for W32 as we dlopen that
706
   library on that system.  */
707
size_t
708
jnlib_iconv (jnlib_iconv_t cd,
709
             const char **inbuf, size_t *inbytesleft,
710
             char **outbuf, size_t *outbytesleft)
711
0
{
712
0
  return iconv ((iconv_t)cd, (ICONV_CONST char**)inbuf, inbytesleft,
713
0
                outbuf, outbytesleft);
714
0
}
715
716
/* Wrapper function for iconv_close, required for W32 as we dlopen that
717
   library on that system.  */
718
int
719
jnlib_iconv_close (jnlib_iconv_t cd)
720
0
{
721
0
  return iconv_close ((iconv_t)cd);
722
0
}
723
724
725
#ifdef HAVE_W32_SYSTEM
726
/* Return a malloced string encoded for CODEPAGE from the wide char input
727
   string STRING.  Caller must free this value.  Returns NULL and sets
728
   ERRNO on failure.  Calling this function with STRING set to NULL is
729
   not defined.  */
730
static char *
731
wchar_to_cp (const wchar_t *string, unsigned int codepage)
732
{
733
  int n;
734
  char *result;
735
736
  n = WideCharToMultiByte (codepage, 0, string, -1, NULL, 0, NULL, NULL);
737
  if (n < 0)
738
    {
739
      gpg_err_set_errno (EINVAL);
740
      return NULL;
741
    }
742
743
  result = xtrymalloc (n+1);
744
  if (!result)
745
    return NULL;
746
747
  n = WideCharToMultiByte (codepage, 0, string, -1, result, n, NULL, NULL);
748
  if (n < 0)
749
    {
750
      xfree (result);
751
      gpg_err_set_errno (EINVAL);
752
      result = NULL;
753
    }
754
  return result;
755
}
756
757
758
/* Return a malloced wide char string from a CODEPAGE encoded input
759
   string STRING.  Caller must free this value.  Returns NULL and sets
760
   ERRNO on failure.  Calling this function with STRING set to NULL is
761
   not defined.  */
762
static wchar_t *
763
cp_to_wchar (const char *string, unsigned int codepage)
764
{
765
  int n;
766
  size_t nbytes;
767
  wchar_t *result;
768
769
  n = MultiByteToWideChar (codepage, 0, string, -1, NULL, 0);
770
  if (n < 0)
771
    {
772
      gpg_err_set_errno (EINVAL);
773
      return NULL;
774
    }
775
776
  nbytes = (size_t)(n+1) * sizeof(*result);
777
  if (nbytes / sizeof(*result) != (n+1))
778
    {
779
      gpg_err_set_errno (ENOMEM);
780
      return NULL;
781
    }
782
  result = xtrymalloc (nbytes);
783
  if (!result)
784
    return NULL;
785
786
  n = MultiByteToWideChar (codepage, 0, string, -1, result, n);
787
  if (n < 0)
788
    {
789
      xfree (result);
790
      gpg_err_set_errno (EINVAL);
791
      result = NULL;
792
    }
793
  return result;
794
}
795
796
797
/* Get the current codepage as used by wchar_to_native and
798
 * native_to_char.  Note that these functions intentionally do not use
799
 * iconv based conversion machinery.  */
800
static unsigned int
801
get_w32_codepage (void)
802
{
803
  static unsigned int cp;
804
805
  if (!cp)
806
    {
807
      cp = GetConsoleOutputCP ();
808
      if (!cp)
809
        cp = GetACP ();
810
    }
811
  return cp;
812
}
813
814
/* Return a malloced string encoded in the active code page from the
815
 * wide char input string STRING.  Caller must free this value.
816
 * Returns NULL and sets ERRNO on failure.  Calling this function with
817
 * STRING set to NULL is not defined.  */
818
char *
819
wchar_to_native (const wchar_t *string)
820
{
821
  return wchar_to_cp (string, get_w32_codepage ());
822
}
823
824
825
/* Return a malloced wide char string from native encoded input
826
 * string STRING.  Caller must free this value.  Returns NULL and sets
827
 * ERRNO on failure.  Calling this function with STRING set to NULL is
828
 * not defined.  */
829
wchar_t *
830
native_to_wchar (const char *string)
831
{
832
  return cp_to_wchar (string, get_w32_codepage ());
833
}
834
835
836
/* Return a malloced string encoded in UTF-8 from the wide char input
837
 * string STRING.  Caller must free this value.  Returns NULL and sets
838
 * ERRNO on failure.  Calling this function with STRING set to NULL is
839
 * not defined.  */
840
char *
841
wchar_to_utf8 (const wchar_t *string)
842
{
843
  return wchar_to_cp (string, CP_UTF8);
844
}
845
846
847
/* Return a malloced wide char string from an UTF-8 encoded input
848
 * string STRING.  Caller must free this value.  Returns NULL and sets
849
 * ERRNO on failure.  Calling this function with STRING set to NULL is
850
 * not defined.  */
851
wchar_t *
852
utf8_to_wchar (const char *string)
853
{
854
  return cp_to_wchar (string, CP_UTF8);
855
}
856
857
#endif /*HAVE_W32_SYSTEM*/