Coverage Report

Created: 2026-03-03 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gnupg/common/utf8conv.c
Line
Count
Source
1
/* utf8conf.c -  UTF8 character set conversion
2
 * Copyright (C) 1994, 1998, 1999, 2000, 2001, 2003, 2006,
3
 *               2008, 2010  Free Software Foundation, Inc.
4
 *
5
 * This file is part of GnuPG.
6
 *
7
 * GnuPG is free software; you can redistribute and/or modify this
8
 * part of GnuPG under the terms of either
9
 *
10
 *   - the GNU Lesser General Public License as published by the Free
11
 *     Software Foundation; either version 3 of the License, or (at
12
 *     your option) any later version.
13
 *
14
 * or
15
 *
16
 *   - the GNU General Public License as published by the Free
17
 *     Software Foundation; either version 2 of the License, or (at
18
 *     your option) any later version.
19
 *
20
 * or both in parallel, as here.
21
 *
22
 * GnuPG is distributed in the hope that it will be useful, but
23
 * WITHOUT ANY WARRANTY; without even the implied warranty of
24
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
25
 * General Public License for more details.
26
 *
27
 * You should have received a copies of the GNU General Public License
28
 * and the GNU Lesser General Public License along with this program;
29
 * if not, see <https://www.gnu.org/licenses/>.
30
 */
31
32
#include <config.h>
33
#include <stdlib.h>
34
#include <string.h>
35
#include <stdarg.h>
36
#include <ctype.h>
37
#ifdef HAVE_LANGINFO_CODESET
38
#include <langinfo.h>
39
#endif
40
#include <errno.h>
41
42
#if HAVE_W32_SYSTEM
43
# /* Tell libgpg-error to provide the iconv macros.  */
44
# define GPGRT_ENABLE_W32_ICONV_MACROS 1
45
#elif HAVE_ANDROID_SYSTEM
46
# /* No iconv support.  */
47
#else
48
# include <iconv.h>
49
#endif
50
51
52
#include "util.h"
53
#include "common-defs.h"
54
#include "i18n.h"
55
#include "stringhelp.h"
56
#include "utf8conv.h"
57
58
#ifdef HAVE_W32_SYSTEM
59
#include <windows.h>
60
#endif
61
62
#ifndef MB_LEN_MAX
63
0
#define MB_LEN_MAX 16
64
#endif
65
66
static const char *active_charset_name = "iso-8859-1";
67
static int no_translation;     /* Set to true if we let simply pass through. */
68
static int use_iconv;          /* iconv conversion functions required. */
69
70
71
#ifdef HAVE_ANDROID_SYSTEM
72
/* Fake stuff to get things building.  */
73
typedef void *iconv_t;
74
#define ICONV_CONST
75
76
static iconv_t
77
iconv_open (const char *tocode, const char *fromcode)
78
{
79
  (void)tocode;
80
  (void)fromcode;
81
  return (iconv_t)(-1);
82
}
83
84
static size_t
85
iconv (iconv_t cd, char **inbuf, size_t *inbytesleft,
86
       char **outbuf, size_t *outbytesleft)
87
{
88
  (void)cd;
89
  (void)inbuf;
90
  (void)inbytesleft;
91
  (void)outbuf;
92
  (void)outbytesleft;
93
  return (size_t)(0);
94
}
95
96
static int
97
iconv_close (iconv_t cd)
98
{
99
  (void)cd;
100
  return 0;
101
}
102
#endif /*HAVE_ANDROID_SYSTEM*/
103
104
105
/* Error handler for iconv failures. This is needed to not clutter the
106
   output with repeated diagnostics about a missing conversion. */
107
static void
108
handle_iconv_error (const char *to, const char *from, int use_fallback)
109
0
{
110
0
  if (errno == EINVAL)
111
0
    {
112
0
      static int shown1, shown2;
113
0
      int x;
114
115
0
      if (to && !strcmp (to, "utf-8"))
116
0
        {
117
0
          x = shown1;
118
0
          shown1 = 1;
119
0
        }
120
0
      else
121
0
        {
122
0
          x = shown2;
123
0
          shown2 = 1;
124
0
        }
125
126
0
      if (!x)
127
0
        log_info (_("conversion from '%s' to '%s' not available\n"),
128
0
                  from, to);
129
0
    }
130
0
  else
131
0
    {
132
0
      static int shown;
133
134
0
      if (!shown)
135
0
        log_info (_("iconv_open failed: %s\n"), strerror (errno));
136
0
      shown = 1;
137
0
    }
138
139
0
  if (use_fallback)
140
0
    {
141
      /* To avoid further error messages we fallback to UTF-8 for the
142
         native encoding.  Nowadays this seems to be the best bet in
143
         case of errors from iconv or nl_langinfo.  */
144
0
      active_charset_name = "utf-8";
145
0
      no_translation = 1;
146
0
      use_iconv = 0;
147
0
    }
148
0
}
149
150
151
152
int
153
set_native_charset (const char *newset)
154
0
{
155
0
  const char *full_newset;
156
157
0
  if (!newset)
158
0
    {
159
#ifdef HAVE_ANDROID_SYSTEM
160
      newset = "utf-8";
161
#elif defined HAVE_W32_SYSTEM
162
      static char codepage[30];
163
      unsigned int cpno;
164
      const char *aliases;
165
166
      /* We are a console program thus we need to use the
167
         GetConsoleOutputCP function and not the GetACP which
168
         would give the codepage for a GUI program.  Note this is not
169
         a bulletproof detection because GetConsoleCP might return a
170
         different one for console input.  Not sure how to cope with
171
         that.  If the console Code page is not known we fall back to
172
         the system code page.  */
173
      cpno = GetConsoleOutputCP ();
174
      if (!cpno)
175
        cpno = GetACP ();
176
      sprintf (codepage, "CP%u", cpno );
177
      /* Resolve alias.  We use a long string string and not the usual
178
         array to optimize if the code is taken to a DSO.  Taken from
179
         libiconv 1.9.2. */
180
      newset = codepage;
181
      for (aliases = ("CP936"   "\0" "GBK" "\0"
182
                      "CP1361"  "\0" "JOHAB" "\0"
183
                      "CP20127" "\0" "ASCII" "\0"
184
                      "CP20866" "\0" "KOI8-R" "\0"
185
                      "CP21866" "\0" "KOI8-RU" "\0"
186
                      "CP28591" "\0" "ISO-8859-1" "\0"
187
                      "CP28592" "\0" "ISO-8859-2" "\0"
188
                      "CP28593" "\0" "ISO-8859-3" "\0"
189
                      "CP28594" "\0" "ISO-8859-4" "\0"
190
                      "CP28595" "\0" "ISO-8859-5" "\0"
191
                      "CP28596" "\0" "ISO-8859-6" "\0"
192
                      "CP28597" "\0" "ISO-8859-7" "\0"
193
                      "CP28598" "\0" "ISO-8859-8" "\0"
194
                      "CP28599" "\0" "ISO-8859-9" "\0"
195
                      "CP28605" "\0" "ISO-8859-15" "\0"
196
                      "CP65001" "\0" "UTF-8" "\0");
197
           *aliases;
198
           aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
199
        {
200
          if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
201
            {
202
              newset = aliases + strlen (aliases) + 1;
203
              break;
204
            }
205
        }
206
207
#else /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
208
209
0
#ifdef HAVE_LANGINFO_CODESET
210
0
      newset = nl_langinfo (CODESET);
211
#else /*!HAVE_LANGINFO_CODESET*/
212
      /* Try to get the used charset from environment variables.  */
213
      static char codepage[30];
214
      const char *lc, *dot, *mod;
215
216
      strcpy (codepage, "iso-8859-1");
217
      lc = getenv ("LC_ALL");
218
      if (!lc || !*lc)
219
        {
220
          lc = getenv ("LC_CTYPE");
221
          if (!lc || !*lc)
222
            lc = getenv ("LANG");
223
        }
224
      if (lc && *lc)
225
        {
226
          dot = strchr (lc, '.');
227
          if (dot)
228
            {
229
              mod = strchr (++dot, '@');
230
              if (!mod)
231
                mod = dot + strlen (dot);
232
              if (mod - dot < sizeof codepage && dot != mod)
233
                {
234
                  memcpy (codepage, dot, mod - dot);
235
                  codepage [mod - dot] = 0;
236
                }
237
            }
238
        }
239
      newset = codepage;
240
#endif /*!HAVE_LANGINFO_CODESET*/
241
0
#endif /*!HAVE_W32_SYSTEM && !HAVE_ANDROID_SYSTEM*/
242
0
    }
243
244
0
  full_newset = newset;
245
0
  if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3))
246
0
    {
247
0
      newset += 3;
248
0
      if (*newset == '-' || *newset == '_')
249
0
        newset++;
250
0
    }
251
252
  /* Note that we silently assume that plain ASCII is actually meant
253
     as Latin-1.  This makes sense because many Unix system don't have
254
     their locale set up properly and thus would get annoying error
255
     messages and we have to handle all the "bug" reports. Latin-1 has
256
     traditionally been the character set used for 8 bit characters on
257
     Unix systems. */
258
0
  if ( !*newset
259
0
       || !ascii_strcasecmp (newset, "8859-1" )
260
0
       || !ascii_strcasecmp (newset, "646" )
261
0
       || !ascii_strcasecmp (newset, "ASCII" )
262
0
       || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
263
0
       )
264
0
    {
265
0
      active_charset_name = "iso-8859-1";
266
0
      no_translation = 0;
267
0
      use_iconv = 0;
268
0
    }
269
0
  else if ( !ascii_strcasecmp (newset, "utf8" )
270
0
            || !ascii_strcasecmp(newset, "utf-8") )
271
0
    {
272
0
      active_charset_name = "utf-8";
273
0
      no_translation = 1;
274
0
      use_iconv = 0;
275
0
    }
276
0
  else
277
0
    {
278
0
      iconv_t cd;
279
280
0
      cd = iconv_open (full_newset, "utf-8");
281
0
      if (cd == (iconv_t)-1)
282
0
        {
283
0
          handle_iconv_error (full_newset, "utf-8", 0);
284
0
          return -1;
285
0
        }
286
0
      iconv_close (cd);
287
0
      cd = iconv_open ("utf-8", full_newset);
288
0
      if (cd == (iconv_t)-1)
289
0
        {
290
0
          handle_iconv_error ("utf-8", full_newset, 0);
291
0
          return -1;
292
0
        }
293
0
      iconv_close (cd);
294
0
      active_charset_name = full_newset;
295
0
      no_translation = 0;
296
0
      use_iconv = 1;
297
0
    }
298
0
  return 0;
299
0
}
300
301
const char *
302
get_native_charset (void)
303
0
{
304
0
  return active_charset_name;
305
0
}
306
307
/* Return true if the native charset is utf-8.  */
308
int
309
is_native_utf8 (void)
310
0
{
311
0
  return no_translation;
312
0
}
313
314
315
/* Convert string, which is in native encoding to UTF8 and return a
316
   new allocated UTF-8 string.  This function terminates the process
317
   on memory shortage.  */
318
char *
319
native_to_utf8 (const char *orig_string)
320
0
{
321
0
  const unsigned char *string = (const unsigned char *)orig_string;
322
0
  const unsigned char *s;
323
0
  char *buffer;
324
0
  unsigned char *p;
325
0
  size_t length = 0;
326
327
0
  if (no_translation)
328
0
    {
329
      /* Already utf-8 encoded. */
330
0
      buffer = xstrdup (orig_string);
331
0
    }
332
0
  else if (!use_iconv)
333
0
    {
334
      /* For Latin-1 we can avoid the iconv overhead. */
335
0
      for (s = string; *s; s++)
336
0
  {
337
0
    length++;
338
0
    if (*s & 0x80)
339
0
      length++;
340
0
  }
341
0
      buffer = xmalloc (length + 1);
342
0
      for (p = (unsigned char *)buffer, s = string; *s; s++)
343
0
  {
344
0
    if ( (*s & 0x80 ))
345
0
      {
346
0
        *p++ = 0xc0 | ((*s >> 6) & 3);
347
0
        *p++ = 0x80 | (*s & 0x3f);
348
0
      }
349
0
    else
350
0
      *p++ = *s;
351
0
  }
352
0
      *p = 0;
353
0
    }
354
0
  else
355
0
    {
356
      /* Need to use iconv.  */
357
0
      iconv_t cd;
358
0
      const char *inptr;
359
0
      char *outptr;
360
0
      size_t inbytes, outbytes;
361
362
0
      cd = iconv_open ("utf-8", active_charset_name);
363
0
      if (cd == (iconv_t)-1)
364
0
        {
365
0
          handle_iconv_error ("utf-8", active_charset_name, 1);
366
0
          return native_to_utf8 (string);
367
0
        }
368
369
0
      for (s=string; *s; s++ )
370
0
        {
371
0
          length++;
372
0
          if ((*s & 0x80))
373
0
            length += 5; /* We may need up to 6 bytes for the utf8 output. */
374
0
        }
375
0
      buffer = xmalloc (length + 1);
376
377
0
      inptr = string;
378
0
      inbytes = strlen (string);
379
0
      outptr = buffer;
380
0
      outbytes = length;
381
0
      if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
382
0
                  &outptr, &outbytes) == (size_t)-1)
383
0
        {
384
0
          static int shown;
385
386
0
          if (!shown)
387
0
            log_info (_("conversion from '%s' to '%s' failed: %s\n"),
388
0
                      active_charset_name, "utf-8", strerror (errno));
389
0
          shown = 1;
390
          /* We don't do any conversion at all but use the strings as is. */
391
0
          strcpy (buffer, string);
392
0
        }
393
0
      else /* Success.  */
394
0
        {
395
0
          *outptr = 0;
396
          /* We could realloc the buffer now but I doubt that it makes
397
             much sense given that it will get freed anyway soon
398
             after.  */
399
0
        }
400
0
      iconv_close (cd);
401
0
    }
402
0
  return buffer;
403
0
}
404
405
406
407
static char *
408
do_utf8_to_native (const char *string, size_t length, int delim,
409
                   int with_iconv)
410
1.75k
{
411
1.75k
  int nleft;
412
1.75k
  int i;
413
1.75k
  unsigned char encbuf[8];
414
1.75k
  int encidx;
415
1.75k
  const unsigned char *s;
416
1.75k
  size_t n;
417
1.75k
  char *buffer = NULL;
418
1.75k
  char *p = NULL;
419
1.75k
  unsigned long val = 0;
420
1.75k
  size_t slen;
421
1.75k
  int resync = 0;
422
423
  /* First pass (p==NULL): count the extended utf-8 characters.  */
424
  /* Second pass (p!=NULL): create string.  */
425
1.75k
  for (;;)
426
3.51k
    {
427
3.51k
      for (slen = length, nleft = encidx = 0, n = 0,
428
3.51k
             s = (const unsigned char *)string;
429
46.6k
           slen;
430
43.1k
     s++, slen--)
431
43.1k
  {
432
43.1k
    if (resync)
433
11.5k
      {
434
11.5k
        if (!(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)))
435
4.28k
    {
436
      /* Still invalid. */
437
4.28k
      if (p)
438
2.42k
        {
439
2.42k
          sprintf (p, "\\x%02x", *s);
440
2.42k
          p += 4;
441
2.42k
        }
442
4.28k
      n += 4;
443
4.28k
      continue;
444
4.28k
    }
445
7.27k
        resync = 0;
446
7.27k
      }
447
38.8k
    if (!nleft)
448
26.1k
      {
449
26.1k
        if (!(*s & 0x80))
450
14.7k
    {
451
                  /* Plain ascii. */
452
14.7k
      if ( delim != -1
453
14.7k
                       && (*s < 0x20 || *s == 0x7f || *s == delim
454
4.09k
                           || (delim && *s == '\\')))
455
10.6k
        {
456
10.6k
          n++;
457
10.6k
          if (p)
458
5.33k
      *p++ = '\\';
459
10.6k
          switch (*s)
460
10.6k
      {
461
1.62k
                        case '\n': n++; if ( p ) *p++ = 'n'; break;
462
1.01k
                        case '\r': n++; if ( p ) *p++ = 'r'; break;
463
284
                        case '\f': n++; if ( p ) *p++ = 'f'; break;
464
2.58k
                        case '\v': n++; if ( p ) *p++ = 'v'; break;
465
456
                        case '\b': n++; if ( p ) *p++ = 'b'; break;
466
2.89k
                        case    0: n++; if ( p ) *p++ = '0'; break;
467
1.81k
      default:
468
1.81k
        n += 3;
469
1.81k
        if (p)
470
907
          {
471
907
            sprintf (p, "x%02x", *s);
472
907
            p += 3;
473
907
          }
474
1.81k
        break;
475
10.6k
      }
476
10.6k
        }
477
4.09k
      else
478
4.09k
        {
479
4.09k
          if (p)
480
2.04k
      *p++ = *s;
481
4.09k
          n++;
482
4.09k
        }
483
14.7k
    }
484
11.3k
        else if ((*s & 0xe0) == 0xc0) /* 110x xxxx */
485
956
    {
486
956
      val = *s & 0x1f;
487
956
      nleft = 1;
488
956
      encidx = 0;
489
956
      encbuf[encidx++] = *s;
490
956
    }
491
10.4k
        else if ((*s & 0xf0) == 0xe0) /* 1110 xxxx */
492
1.35k
    {
493
1.35k
      val = *s & 0x0f;
494
1.35k
      nleft = 2;
495
1.35k
      encidx = 0;
496
1.35k
      encbuf[encidx++] = *s;
497
1.35k
    }
498
9.05k
        else if ((*s & 0xf8) == 0xf0) /* 1111 0xxx */
499
862
    {
500
862
      val = *s & 0x07;
501
862
      nleft = 3;
502
862
      encidx = 0;
503
862
      encbuf[encidx++] = *s;
504
862
    }
505
8.19k
        else if ((*s & 0xfc) == 0xf8) /* 1111 10xx */
506
1.10k
    {
507
1.10k
      val = *s & 0x03;
508
1.10k
      nleft = 4;
509
1.10k
      encidx = 0;
510
1.10k
      encbuf[encidx++] = *s;
511
1.10k
    }
512
7.08k
        else if ((*s & 0xfe) == 0xfc) /* 1111 110x */
513
2.29k
    {
514
2.29k
      val = *s & 0x01;
515
2.29k
      nleft = 5;
516
2.29k
      encidx = 0;
517
2.29k
      encbuf[encidx++] = *s;
518
2.29k
    }
519
4.79k
        else /* Invalid encoding: print as \xNN. */
520
4.79k
    {
521
4.79k
      if (p)
522
2.11k
        {
523
2.11k
          sprintf (p, "\\x%02x", *s);
524
2.11k
          p += 4;
525
2.11k
        }
526
4.79k
      n += 4;
527
4.79k
      resync = 1;
528
4.79k
    }
529
26.1k
      }
530
12.6k
    else if (*s < 0x80 || *s >= 0xc0) /* Invalid utf-8 */
531
3.99k
      {
532
3.99k
        if (p)
533
1.99k
    {
534
5.62k
      for (i = 0; i < encidx; i++)
535
3.62k
        {
536
3.62k
          sprintf (p, "\\x%02x", encbuf[i]);
537
3.62k
          p += 4;
538
3.62k
        }
539
1.99k
      sprintf (p, "\\x%02x", *s);
540
1.99k
      p += 4;
541
1.99k
    }
542
3.99k
        n += 4 + 4 * encidx;
543
3.99k
        nleft = 0;
544
3.99k
        encidx = 0;
545
3.99k
        resync = 1;
546
3.99k
      }
547
8.70k
    else
548
8.70k
      {
549
8.70k
        encbuf[encidx++] = *s;
550
8.70k
        val <<= 6;
551
8.70k
        val |= *s & 0x3f;
552
8.70k
        if (!--nleft)  /* Ready. */
553
2.52k
    {
554
2.52k
      if (no_translation)
555
0
        {
556
0
          if (p)
557
0
      {
558
0
        for (i = 0; i < encidx; i++)
559
0
          *p++ = encbuf[i];
560
0
      }
561
0
          n += encidx;
562
0
          encidx = 0;
563
0
        }
564
2.52k
                  else if (with_iconv)
565
0
                    {
566
                      /* Our strategy for using iconv is a bit strange
567
                         but it better keeps compatibility with
568
                         previous versions in regard to how invalid
569
                         encodings are displayed.  What we do is to
570
                         keep the utf-8 as is and have the real
571
                         translation step then at the end.  Yes, I
572
                         know that this is ugly.  However we are short
573
                         of the 1.4 release and for this branch we
574
                         should not mess too much around with iconv
575
                         things.  One reason for this is that we don't
576
                         know enough about non-GNU iconv
577
                         implementation and want to minimize the risk
578
                         of breaking the code on too many platforms.  */
579
0
                        if ( p )
580
0
                          {
581
0
                            for (i=0; i < encidx; i++ )
582
0
                              *p++ = encbuf[i];
583
0
                          }
584
0
                        n += encidx;
585
0
                        encidx = 0;
586
0
                    }
587
2.52k
      else  /* Latin-1 case. */
588
2.52k
                    {
589
2.52k
          if (val >= 0x80 && val < 256)
590
752
      {
591
                          /* We can simply print this character */
592
752
        n++;
593
752
        if (p)
594
376
          *p++ = val;
595
752
      }
596
1.77k
          else
597
1.77k
      {
598
                          /* We do not have a translation: print utf8. */
599
1.77k
        if (p)
600
888
          {
601
4.09k
            for (i = 0; i < encidx; i++)
602
3.20k
        {
603
3.20k
          sprintf (p, "\\x%02x", encbuf[i]);
604
3.20k
          p += 4;
605
3.20k
        }
606
888
          }
607
1.77k
        n += encidx * 4;
608
1.77k
        encidx = 0;
609
1.77k
      }
610
2.52k
        }
611
2.52k
    }
612
613
8.70k
      }
614
38.8k
  }
615
3.51k
      if (!buffer)
616
1.75k
  {
617
          /* Allocate the buffer after the first pass. */
618
1.75k
    buffer = p = xmalloc (n + 1);
619
1.75k
  }
620
1.75k
      else if (with_iconv)
621
0
        {
622
          /* Note: See above for comments.  */
623
0
          iconv_t cd;
624
0
          const char *inptr;
625
0
          char *outbuf, *outptr;
626
0
          size_t inbytes, outbytes;
627
628
0
          *p = 0;  /* Terminate the buffer. */
629
630
0
          cd = iconv_open (active_charset_name, "utf-8");
631
0
          if (cd == (iconv_t)-1)
632
0
            {
633
0
              handle_iconv_error (active_charset_name, "utf-8", 1);
634
0
              xfree (buffer);
635
0
              return utf8_to_native (string, length, delim);
636
0
            }
637
638
          /* Allocate a new buffer large enough to hold all possible
639
             encodings. */
640
0
          n = p - buffer + 1;
641
0
          inbytes = n - 1;;
642
0
          inptr = buffer;
643
0
          outbytes = n * MB_LEN_MAX;
644
0
          if (outbytes / MB_LEN_MAX != n)
645
0
            BUG (); /* Actually an overflow. */
646
0
          outbuf = outptr = xmalloc (outbytes);
647
0
          if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
648
0
                      &outptr, &outbytes) == (size_t)-1)
649
0
            {
650
0
              static int shown;
651
652
0
              if (!shown)
653
0
                log_info (_("conversion from '%s' to '%s' failed: %s\n"),
654
0
                          "utf-8", active_charset_name, strerror (errno));
655
0
              shown = 1;
656
              /* Didn't worked out.  Try again but without iconv.  */
657
0
              xfree (buffer);
658
0
              buffer = NULL;
659
0
              xfree (outbuf);
660
0
              outbuf = do_utf8_to_native (string, length, delim, 0);
661
0
            }
662
0
            else /* Success.  */
663
0
              {
664
0
                *outptr = 0; /* Make sure it is a string. */
665
                /* We could realloc the buffer now but I doubt that it
666
                   makes much sense given that it will get freed
667
                   anyway soon after.  */
668
0
                xfree (buffer);
669
0
              }
670
0
          iconv_close (cd);
671
0
          return outbuf;
672
0
        }
673
1.75k
      else /* Not using iconv. */
674
1.75k
  {
675
1.75k
    *p = 0; /* Make sure it is a string. */
676
1.75k
    return buffer;
677
1.75k
  }
678
3.51k
    }
679
1.75k
}
680
681
/* Convert string, which is in UTF-8 to native encoding.  Replace
682
   illegal encodings by some "\xnn" and quote all control
683
   characters. A character with value DELIM will always be quoted, it
684
   must be a vanilla ASCII character.  A DELIM value of -1 is special:
685
   it disables all quoting of control characters.  This function
686
   terminates the process on memory shortage.  */
687
char *
688
utf8_to_native (const char *string, size_t length, int delim)
689
1.75k
{
690
1.75k
  return do_utf8_to_native (string, length, delim, use_iconv);
691
1.75k
}
692
693
694
695
696
/* Wrapper function for iconv_open, required for W32 as we dlopen that
697
   library on that system.  */
698
jnlib_iconv_t
699
jnlib_iconv_open (const char *tocode, const char *fromcode)
700
0
{
701
0
  return (jnlib_iconv_t)iconv_open (tocode, fromcode);
702
0
}
703
704
705
/* Wrapper function for iconv, required for W32 as we dlopen that
706
   library on that system.  */
707
size_t
708
jnlib_iconv (jnlib_iconv_t cd,
709
             const char **inbuf, size_t *inbytesleft,
710
             char **outbuf, size_t *outbytesleft)
711
0
{
712
0
  return iconv ((iconv_t)cd, (ICONV_CONST char**)inbuf, inbytesleft,
713
0
                outbuf, outbytesleft);
714
0
}
715
716
/* Wrapper function for iconv_close, required for W32 as we dlopen that
717
   library on that system.  */
718
int
719
jnlib_iconv_close (jnlib_iconv_t cd)
720
0
{
721
0
  return iconv_close ((iconv_t)cd);
722
0
}
723
724
725
#ifdef HAVE_W32_SYSTEM
726
/* Return a malloced string encoded for CODEPAGE from the wide char input
727
   string STRING.  Caller must free this value.  Returns NULL and sets
728
   ERRNO on failure.  Calling this function with STRING set to NULL is
729
   not defined.  */
730
static char *
731
wchar_to_cp (const wchar_t *string, unsigned int codepage)
732
{
733
  int n;
734
  char *result;
735
736
  n = WideCharToMultiByte (codepage, 0, string, -1, NULL, 0, NULL, NULL);
737
  if (n < 0)
738
    {
739
      gpg_err_set_errno (EINVAL);
740
      return NULL;
741
    }
742
743
  result = xtrymalloc (n+1);
744
  if (!result)
745
    return NULL;
746
747
  n = WideCharToMultiByte (codepage, 0, string, -1, result, n, NULL, NULL);
748
  if (n < 0)
749
    {
750
      xfree (result);
751
      gpg_err_set_errno (EINVAL);
752
      result = NULL;
753
    }
754
  return result;
755
}
756
757
758
/* Return a malloced wide char string from a CODEPAGE encoded input
759
   string STRING.  Caller must free this value.  Returns NULL and sets
760
   ERRNO on failure.  Calling this function with STRING set to NULL is
761
   not defined.  */
762
static wchar_t *
763
cp_to_wchar (const char *string, unsigned int codepage)
764
{
765
  int n;
766
  size_t nbytes;
767
  wchar_t *result;
768
769
  n = MultiByteToWideChar (codepage, 0, string, -1, NULL, 0);
770
  if (n < 0)
771
    {
772
      gpg_err_set_errno (EINVAL);
773
      return NULL;
774
    }
775
776
  nbytes = (size_t)(n+1) * sizeof(*result);
777
  if (nbytes / sizeof(*result) != (n+1))
778
    {
779
      gpg_err_set_errno (ENOMEM);
780
      return NULL;
781
    }
782
  result = xtrymalloc (nbytes);
783
  if (!result)
784
    return NULL;
785
786
  n = MultiByteToWideChar (codepage, 0, string, -1, result, n);
787
  if (n < 0)
788
    {
789
      xfree (result);
790
      gpg_err_set_errno (EINVAL);
791
      result = NULL;
792
    }
793
  return result;
794
}
795
796
797
/* Get the current codepage as used by wchar_to_native and
798
 * native_to_char.  Note that these functions intentionally do not use
799
 * iconv based conversion machinery.  */
800
static unsigned int
801
get_w32_codepage (void)
802
{
803
  static unsigned int cp;
804
805
  if (!cp)
806
    {
807
      cp = GetConsoleOutputCP ();
808
      if (!cp)
809
        cp = GetACP ();
810
    }
811
  return cp;
812
}
813
814
/* Return a malloced string encoded in the active code page from the
815
 * wide char input string STRING.  Caller must free this value.
816
 * Returns NULL and sets ERRNO on failure.  Calling this function with
817
 * STRING set to NULL is not defined.  */
818
char *
819
wchar_to_native (const wchar_t *string)
820
{
821
  return wchar_to_cp (string, get_w32_codepage ());
822
}
823
824
825
/* Return a malloced wide char string from native encoded input
826
 * string STRING.  Caller must free this value.  Returns NULL and sets
827
 * ERRNO on failure.  Calling this function with STRING set to NULL is
828
 * not defined.  */
829
wchar_t *
830
native_to_wchar (const char *string)
831
{
832
  return cp_to_wchar (string, get_w32_codepage ());
833
}
834
835
836
/* Return a malloced string encoded in UTF-8 from the wide char input
837
 * string STRING.  Caller must free this value.  Returns NULL and sets
838
 * ERRNO on failure.  Calling this function with STRING set to NULL is
839
 * not defined.  */
840
char *
841
wchar_to_utf8 (const wchar_t *string)
842
{
843
  return wchar_to_cp (string, CP_UTF8);
844
}
845
846
847
/* Return a malloced wide char string from an UTF-8 encoded input
848
 * string STRING.  Caller must free this value.  Returns NULL and sets
849
 * ERRNO on failure.  Calling this function with STRING set to NULL is
850
 * not defined.  */
851
wchar_t *
852
utf8_to_wchar (const char *string)
853
{
854
  return cp_to_wchar (string, CP_UTF8);
855
}
856
857
#endif /*HAVE_W32_SYSTEM*/