Coverage Report

Created: 2025-11-16 07:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/glib-2.80.0/glib/gconvert.c
Line
Count
Source
1
/* GLIB - Library of useful routines for C programming
2
 *
3
 * gconvert.c: Convert between character sets using iconv
4
 * Copyright Red Hat Inc., 2000
5
 * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com>
6
 *
7
 * SPDX-License-Identifier: LGPL-2.1-or-later
8
 *
9
 * This library is free software; you can redistribute it and/or
10
 * modify it under the terms of the GNU Lesser General Public
11
 * License as published by the Free Software Foundation; either
12
 * version 2.1 of the License, or (at your option) any later version.
13
 *
14
 * This library is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
 * Lesser General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU Lesser General Public
20
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#include "config.h"
24
#include "glibconfig.h"
25
26
#ifndef G_OS_WIN32
27
#include <iconv.h>
28
#endif
29
#include <errno.h>
30
#include <stdio.h>
31
#include <string.h>
32
#include <stdlib.h>
33
34
#ifdef G_OS_WIN32
35
#include "win_iconv.c"
36
#endif
37
38
#ifdef G_PLATFORM_WIN32
39
#define STRICT
40
#include <windows.h>
41
#undef STRICT
42
#endif
43
44
#include "gconvert.h"
45
#include "gconvertprivate.h"
46
47
#include "gcharsetprivate.h"
48
#include "gslist.h"
49
#include "gstrfuncs.h"
50
#include "gtestutils.h"
51
#include "gthread.h"
52
#include "gthreadprivate.h"
53
#include "gunicode.h"
54
#include "gfileutils.h"
55
#include "genviron.h"
56
57
#include "glibintl.h"
58
59
/* We try to terminate strings in unknown charsets with this many zero bytes
60
 * to ensure that multibyte strings really are nul-terminated when we return
61
 * them from g_convert() and friends.
62
 */
63
59.7k
#define NUL_TERMINATOR_LENGTH 4
64
65
G_DEFINE_QUARK (g_convert_error, g_convert_error)
66
67
static gboolean
68
try_conversion (const char *to_codeset,
69
    const char *from_codeset,
70
    iconv_t    *cd)
71
16.5k
{
72
16.5k
  *cd = iconv_open (to_codeset, from_codeset);
73
74
16.5k
  if (*cd == (iconv_t)-1 && errno == EINVAL)
75
0
    return FALSE;
76
77
#if defined(__FreeBSD__) && defined(ICONV_SET_ILSEQ_INVALID)
78
  /* On FreeBSD request GNU iconv compatible handling of characters that cannot
79
   * be repesented in the destination character set.
80
   * See https://cgit.freebsd.org/src/commit/?id=7c5b23111c5fd1992047922d4247c4a1ce1bb6c3
81
   */
82
  int value = 1;
83
  if (iconvctl (*cd, ICONV_SET_ILSEQ_INVALID, &value) != 0)
84
    return FALSE;
85
#endif
86
16.5k
  return TRUE;
87
16.5k
}
88
89
static gboolean
90
try_to_aliases (const char **to_aliases,
91
    const char  *from_codeset,
92
    iconv_t     *cd)
93
0
{
94
0
  if (to_aliases)
95
0
    {
96
0
      const char **p = to_aliases;
97
0
      while (*p)
98
0
  {
99
0
    if (try_conversion (*p, from_codeset, cd))
100
0
      return TRUE;
101
102
0
    p++;
103
0
  }
104
0
    }
105
106
0
  return FALSE;
107
0
}
108
109
/**
110
 * g_iconv_open: (skip)
111
 * @to_codeset: destination codeset
112
 * @from_codeset: source codeset
113
 * 
114
 * Same as the standard UNIX routine iconv_open(), but
115
 * may be implemented via libiconv on UNIX flavors that lack
116
 * a native implementation.
117
 * 
118
 * GLib provides g_convert() and g_locale_to_utf8() which are likely
119
 * more convenient than the raw iconv wrappers.
120
 * 
121
 * Returns: a "conversion descriptor", or (GIConv)-1 if
122
 *  opening the converter failed.
123
 **/
124
GIConv
125
g_iconv_open (const gchar  *to_codeset,
126
        const gchar  *from_codeset)
127
16.5k
{
128
16.5k
  iconv_t cd;
129
  
130
16.5k
  if (!try_conversion (to_codeset, from_codeset, &cd))
131
0
    {
132
0
      const char **to_aliases = _g_charset_get_aliases (to_codeset);
133
0
      const char **from_aliases = _g_charset_get_aliases (from_codeset);
134
135
0
      if (from_aliases)
136
0
  {
137
0
    const char **p = from_aliases;
138
0
    while (*p)
139
0
      {
140
0
        if (try_conversion (to_codeset, *p, &cd))
141
0
    goto out;
142
143
0
        if (try_to_aliases (to_aliases, *p, &cd))
144
0
    goto out;
145
146
0
        p++;
147
0
      }
148
0
  }
149
150
0
      if (try_to_aliases (to_aliases, from_codeset, &cd))
151
0
  goto out;
152
0
    }
153
154
16.5k
 out:
155
16.5k
  return (cd == (iconv_t)-1) ? (GIConv)-1 : (GIConv)cd;
156
16.5k
}
157
158
/**
159
 * g_iconv: (skip)
160
 * @converter: conversion descriptor from g_iconv_open()
161
 * @inbuf: bytes to convert
162
 * @inbytes_left: (inout): inout parameter, bytes remaining to convert in @inbuf
163
 * @outbuf: converted output bytes
164
 * @outbytes_left: (inout): inout parameter, bytes available to fill in @outbuf
165
 * 
166
 * Same as the standard UNIX routine iconv(), but
167
 * may be implemented via libiconv on UNIX flavors that lack
168
 * a native implementation.
169
 *
170
 * GLib provides g_convert() and g_locale_to_utf8() which are likely
171
 * more convenient than the raw iconv wrappers.
172
 * 
173
 * Note that the behaviour of iconv() for characters which are valid in the
174
 * input character set, but which have no representation in the output character
175
 * set, is implementation defined. This function may return success (with a
176
 * positive number of non-reversible conversions as replacement characters were
177
 * used), or it may return -1 and set an error such as %EILSEQ, in such a
178
 * situation.
179
 *
180
 * Returns: count of non-reversible conversions, or -1 on error
181
 **/
182
gsize 
183
g_iconv (GIConv   converter,
184
   gchar  **inbuf,
185
   gsize   *inbytes_left,
186
   gchar  **outbuf,
187
   gsize   *outbytes_left)
188
36.2k
{
189
36.2k
  iconv_t cd = (iconv_t)converter;
190
191
36.2k
  return iconv (cd, inbuf, inbytes_left, outbuf, outbytes_left);
192
36.2k
}
193
194
/**
195
 * g_iconv_close: (skip)
196
 * @converter: a conversion descriptor from g_iconv_open()
197
 *
198
 * Same as the standard UNIX routine iconv_close(), but
199
 * may be implemented via libiconv on UNIX flavors that lack
200
 * a native implementation. Should be called to clean up
201
 * the conversion descriptor from g_iconv_open() when
202
 * you are done converting things.
203
 *
204
 * GLib provides g_convert() and g_locale_to_utf8() which are likely
205
 * more convenient than the raw iconv wrappers.
206
 * 
207
 * Returns: -1 on error, 0 on success
208
 **/
209
gint
210
g_iconv_close (GIConv converter)
211
16.5k
{
212
16.5k
  iconv_t cd = (iconv_t)converter;
213
214
16.5k
  return iconv_close (cd);
215
16.5k
}
216
217
static GIConv
218
open_converter (const gchar *to_codeset,
219
    const gchar *from_codeset,
220
    GError     **error)
221
16.5k
{
222
16.5k
  GIConv cd;
223
224
16.5k
  cd = g_iconv_open (to_codeset, from_codeset);
225
226
16.5k
  if (cd == (GIConv) -1)
227
0
    {
228
      /* Something went wrong.  */
229
0
      if (error)
230
0
  {
231
0
    if (errno == EINVAL)
232
0
      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
233
0
       _("Conversion from character set “%s” to “%s” is not supported"),
234
0
       from_codeset, to_codeset);
235
0
    else
236
0
      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
237
0
       _("Could not open converter from “%s” to “%s”"),
238
0
       from_codeset, to_codeset);
239
0
  }
240
0
    }
241
  
242
16.5k
  return cd;
243
16.5k
}
244
245
static int
246
close_converter (GIConv cd)
247
16.5k
{
248
16.5k
  if (cd == (GIConv) -1)
249
0
    return 0;
250
  
251
16.5k
  return g_iconv_close (cd);  
252
16.5k
}
253
254
/**
255
 * g_convert_with_iconv: (skip)
256
 * @str:           (array length=len) (element-type guint8):
257
 *                 the string to convert.
258
 * @len:           the length of the string in bytes, or -1 if the string is
259
 *                 nul-terminated (Note that some encodings may allow nul
260
 *                 bytes to occur inside strings. In that case, using -1
261
 *                 for the @len parameter is unsafe)
262
 * @converter:     conversion descriptor from g_iconv_open()
263
 * @bytes_read:    (out) (optional): location to store the number of bytes in
264
 *                 the input string that were successfully converted, or %NULL.
265
 *                 Even if the conversion was successful, this may be 
266
 *                 less than @len if there were partial characters
267
 *                 at the end of the input. If the error
268
 *                 %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
269
 *                 stored will be the byte offset after the last valid
270
 *                 input sequence.
271
 * @bytes_written: (out) (optional): the number of bytes stored in
272
 *                 the output buffer (not including the terminating nul).
273
 * @error:         location to store the error occurring, or %NULL to ignore
274
 *                 errors. Any of the errors in #GConvertError may occur.
275
 *
276
 * Converts a string from one character set to another. 
277
 * 
278
 * Note that you should use g_iconv() for streaming conversions. 
279
 * Despite the fact that @bytes_read can return information about partial
280
 * characters, the g_convert_... functions are not generally suitable
281
 * for streaming. If the underlying converter maintains internal state,
282
 * then this won't be preserved across successive calls to g_convert(),
283
 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
284
 * this is the GNU C converter for CP1255 which does not emit a base
285
 * character until it knows that the next character is not a mark that
286
 * could combine with the base character.)
287
 *
288
 * Characters which are valid in the input character set, but which have no
289
 * representation in the output character set will result in a
290
 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error. This is in contrast to the iconv()
291
 * specification, which leaves this behaviour implementation defined. Note that
292
 * this is the same error code as is returned for an invalid byte sequence in
293
 * the input character set. To get defined behaviour for conversion of
294
 * unrepresentable characters, use g_convert_with_fallback().
295
 *
296
 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
297
 *               If the conversion was successful, a newly allocated buffer
298
 *               containing the converted string, which must be freed with
299
 *               g_free(). Otherwise %NULL and @error will be set.
300
 **/
301
gchar*
302
g_convert_with_iconv (const gchar *str,
303
          gssize       len,
304
          GIConv       converter,
305
          gsize       *bytes_read, 
306
          gsize       *bytes_written, 
307
          GError     **error)
308
16.5k
{
309
16.5k
  gchar *dest;
310
16.5k
  gchar *outp;
311
16.5k
  const gchar *p;
312
16.5k
  gsize inbytes_remaining;
313
16.5k
  gsize outbytes_remaining;
314
16.5k
  gsize err;
315
16.5k
  gsize outbuf_size;
316
16.5k
  gboolean have_error = FALSE;
317
16.5k
  gboolean done = FALSE;
318
16.5k
  gboolean reset = FALSE;
319
  
320
16.5k
  g_return_val_if_fail (converter != (GIConv) -1, NULL);
321
     
322
16.5k
  if (len < 0)
323
16.5k
    len = strlen (str);
324
325
16.5k
  p = str;
326
16.5k
  inbytes_remaining = len;
327
16.5k
  outbuf_size = len + NUL_TERMINATOR_LENGTH;
328
  
329
16.5k
  outbytes_remaining = outbuf_size - NUL_TERMINATOR_LENGTH;
330
16.5k
  outp = dest = g_malloc (outbuf_size);
331
332
52.7k
  while (!done && !have_error)
333
36.2k
    {
334
36.2k
      if (reset)
335
9.50k
        err = g_iconv (converter, NULL, &inbytes_remaining, &outp, &outbytes_remaining);
336
26.7k
      else
337
26.7k
        err = g_iconv (converter, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);
338
339
36.2k
      if (err == (gsize) -1)
340
17.2k
  {
341
17.2k
    switch (errno)
342
17.2k
      {
343
138
      case EINVAL:
344
        /* Incomplete text, do not report an error */
345
138
        done = TRUE;
346
138
        break;
347
10.1k
      case E2BIG:
348
10.1k
        {
349
10.1k
    gsize used = outp - dest;
350
    
351
10.1k
    outbuf_size *= 2;
352
10.1k
    dest = g_realloc (dest, outbuf_size);
353
    
354
10.1k
    outp = dest + used;
355
10.1k
    outbytes_remaining = outbuf_size - used - NUL_TERMINATOR_LENGTH;
356
10.1k
        }
357
10.1k
        break;
358
6.88k
      case EILSEQ:
359
6.88k
              g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
360
6.88k
                                   _("Invalid byte sequence in conversion input"));
361
6.88k
        have_error = TRUE;
362
6.88k
        break;
363
0
      default:
364
0
              {
365
0
                int errsv = errno;
366
367
0
                g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
368
0
                             _("Error during conversion: %s"),
369
0
                             g_strerror (errsv));
370
0
              }
371
0
        have_error = TRUE;
372
0
        break;
373
17.2k
      }
374
17.2k
  }
375
19.0k
      else if (err > 0)
376
0
        {
377
          /* @err gives the number of replacement characters used. */
378
0
          g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
379
0
                               _("Unrepresentable character in conversion input"));
380
0
          have_error = TRUE;
381
0
        }
382
19.0k
      else 
383
19.0k
  {
384
19.0k
    if (!reset)
385
9.50k
      {
386
        /* call g_iconv with NULL inbuf to cleanup shift state */
387
9.50k
        reset = TRUE;
388
9.50k
        inbytes_remaining = 0;
389
9.50k
      }
390
9.50k
    else
391
9.50k
      done = TRUE;
392
19.0k
  }
393
36.2k
    }
394
395
16.5k
  memset (outp, 0, NUL_TERMINATOR_LENGTH);
396
  
397
16.5k
  if (bytes_read)
398
0
    *bytes_read = p - str;
399
16.5k
  else
400
16.5k
    {
401
16.5k
      if ((p - str) != len) 
402
7.02k
  {
403
7.02k
          if (!have_error)
404
138
            {
405
138
              g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
406
138
                                   _("Partial character sequence at end of input"));
407
138
              have_error = TRUE;
408
138
            }
409
7.02k
  }
410
16.5k
    }
411
412
16.5k
  if (bytes_written)
413
16.5k
    *bytes_written = outp - dest; /* Doesn't include '\0' */
414
415
16.5k
  if (have_error)
416
7.02k
    {
417
7.02k
      g_free (dest);
418
7.02k
      return NULL;
419
7.02k
    }
420
9.50k
  else
421
9.50k
    return dest;
422
16.5k
}
423
424
/**
425
 * g_convert:
426
 * @str:           (array length=len) (element-type guint8):
427
 *                 the string to convert.
428
 * @len:           the length of the string in bytes, or -1 if the string is
429
 *                 nul-terminated (Note that some encodings may allow nul
430
 *                 bytes to occur inside strings. In that case, using -1
431
 *                 for the @len parameter is unsafe)
432
 * @to_codeset:    name of character set into which to convert @str
433
 * @from_codeset:  character set of @str.
434
 * @bytes_read:    (out) (optional): location to store the number of bytes in
435
 *                 the input string that were successfully converted, or %NULL.
436
 *                 Even if the conversion was successful, this may be 
437
 *                 less than @len if there were partial characters
438
 *                 at the end of the input. If the error
439
 *                 %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
440
 *                 stored will be the byte offset after the last valid
441
 *                 input sequence.
442
 * @bytes_written: (out) (optional): the number of bytes stored in
443
 *                 the output buffer (not including the terminating nul).
444
 * @error:         location to store the error occurring, or %NULL to ignore
445
 *                 errors. Any of the errors in #GConvertError may occur.
446
 *
447
 * Converts a string from one character set to another.
448
 *
449
 * Note that you should use g_iconv() for streaming conversions. 
450
 * Despite the fact that @bytes_read can return information about partial
451
 * characters, the g_convert_... functions are not generally suitable
452
 * for streaming. If the underlying converter maintains internal state,
453
 * then this won't be preserved across successive calls to g_convert(),
454
 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
455
 * this is the GNU C converter for CP1255 which does not emit a base
456
 * character until it knows that the next character is not a mark that
457
 * could combine with the base character.)
458
 *
459
 * Using extensions such as "//TRANSLIT" may not work (or may not work
460
 * well) on many platforms.  Consider using g_str_to_ascii() instead.
461
 *
462
 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
463
 *          If the conversion was successful, a newly allocated buffer
464
 *          containing the converted string, which must be freed with g_free().
465
 *          Otherwise %NULL and @error will be set.
466
 **/
467
gchar*
468
g_convert (const gchar *str,
469
           gssize       len,  
470
           const gchar *to_codeset,
471
           const gchar *from_codeset,
472
           gsize       *bytes_read, 
473
     gsize       *bytes_written, 
474
     GError     **error)
475
16.5k
{
476
16.5k
  gchar *res;
477
16.5k
  GIConv cd;
478
479
16.5k
  g_return_val_if_fail (str != NULL, NULL);
480
16.5k
  g_return_val_if_fail (to_codeset != NULL, NULL);
481
16.5k
  g_return_val_if_fail (from_codeset != NULL, NULL);
482
  
483
16.5k
  cd = open_converter (to_codeset, from_codeset, error);
484
485
16.5k
  if (cd == (GIConv) -1)
486
0
    {
487
0
      if (bytes_read)
488
0
        *bytes_read = 0;
489
      
490
0
      if (bytes_written)
491
0
        *bytes_written = 0;
492
      
493
0
      return NULL;
494
0
    }
495
496
16.5k
  res = g_convert_with_iconv (str, len, cd,
497
16.5k
            bytes_read, bytes_written,
498
16.5k
            error);
499
500
16.5k
  close_converter (cd);
501
502
16.5k
  return res;
503
16.5k
}
504
505
/**
506
 * g_convert_with_fallback:
507
 * @str:          (array length=len) (element-type guint8):
508
 *                the string to convert.
509
 * @len:          the length of the string in bytes, or -1 if the string is
510
 *                 nul-terminated (Note that some encodings may allow nul
511
 *                 bytes to occur inside strings. In that case, using -1
512
 *                 for the @len parameter is unsafe)
513
 * @to_codeset:   name of character set into which to convert @str
514
 * @from_codeset: character set of @str.
515
 * @fallback:     UTF-8 string to use in place of characters not
516
 *                present in the target encoding. (The string must be
517
 *                representable in the target encoding). 
518
 *                If %NULL, characters not in the target encoding will 
519
 *                be represented as Unicode escapes \uxxxx or \Uxxxxyyyy.
520
 * @bytes_read:   (out) (optional): location to store the number of bytes in
521
 *                the input string that were successfully converted, or %NULL.
522
 *                Even if the conversion was successful, this may be 
523
 *                less than @len if there were partial characters
524
 *                at the end of the input.
525
 * @bytes_written: (out) (optional): the number of bytes stored in
526
 *                 the output buffer (not including the terminating nul).
527
 * @error:        location to store the error occurring, or %NULL to ignore
528
 *                errors. Any of the errors in #GConvertError may occur.
529
 *
530
 * Converts a string from one character set to another, possibly
531
 * including fallback sequences for characters not representable
532
 * in the output. Note that it is not guaranteed that the specification
533
 * for the fallback sequences in @fallback will be honored. Some
534
 * systems may do an approximate conversion from @from_codeset
535
 * to @to_codeset in their iconv() functions, 
536
 * in which case GLib will simply return that approximate conversion.
537
 *
538
 * Note that you should use g_iconv() for streaming conversions. 
539
 * Despite the fact that @bytes_read can return information about partial
540
 * characters, the g_convert_... functions are not generally suitable
541
 * for streaming. If the underlying converter maintains internal state,
542
 * then this won't be preserved across successive calls to g_convert(),
543
 * g_convert_with_iconv() or g_convert_with_fallback(). (An example of
544
 * this is the GNU C converter for CP1255 which does not emit a base
545
 * character until it knows that the next character is not a mark that
546
 * could combine with the base character.)
547
 *
548
 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
549
 *          If the conversion was successful, a newly allocated buffer
550
 *          containing the converted string, which must be freed with g_free().
551
 *          Otherwise %NULL and @error will be set.
552
 **/
553
gchar*
554
g_convert_with_fallback (const gchar *str,
555
       gssize       len,    
556
       const gchar *to_codeset,
557
       const gchar *from_codeset,
558
       const gchar *fallback,
559
       gsize       *bytes_read,
560
       gsize       *bytes_written,
561
       GError     **error)
562
28
{
563
28
  gchar *utf8;
564
28
  gchar *dest;
565
28
  gchar *outp;
566
28
  const gchar *insert_str = NULL;
567
28
  const gchar *p;
568
28
  gsize inbytes_remaining;   
569
28
  const gchar *save_p = NULL;
570
28
  gsize save_inbytes = 0;
571
28
  gsize outbytes_remaining; 
572
28
  gsize err;
573
28
  GIConv cd;
574
28
  gsize outbuf_size;
575
28
  gboolean have_error = FALSE;
576
28
  gboolean done = FALSE;
577
578
28
  GError *local_error = NULL;
579
  
580
28
  g_return_val_if_fail (str != NULL, NULL);
581
28
  g_return_val_if_fail (to_codeset != NULL, NULL);
582
28
  g_return_val_if_fail (from_codeset != NULL, NULL);
583
     
584
28
  if (len < 0)
585
28
    len = strlen (str);
586
  
587
  /* Try an exact conversion; we only proceed if this fails
588
   * due to an illegal sequence in the input string.
589
   */
590
28
  dest = g_convert (str, len, to_codeset, from_codeset, 
591
28
        bytes_read, bytes_written, &local_error);
592
28
  if (!local_error)
593
28
    return dest;
594
595
28
  g_assert (dest == NULL);
596
597
0
  if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
598
0
    {
599
0
      g_propagate_error (error, local_error);
600
0
      return NULL;
601
0
    }
602
0
  else
603
0
    g_error_free (local_error);
604
605
0
  local_error = NULL;
606
  
607
  /* No go; to proceed, we need a converter from "UTF-8" to
608
   * to_codeset, and the string as UTF-8.
609
   */
610
0
  cd = open_converter (to_codeset, "UTF-8", error);
611
0
  if (cd == (GIConv) -1)
612
0
    {
613
0
      if (bytes_read)
614
0
        *bytes_read = 0;
615
      
616
0
      if (bytes_written)
617
0
        *bytes_written = 0;
618
      
619
0
      return NULL;
620
0
    }
621
622
0
  utf8 = g_convert (str, len, "UTF-8", from_codeset, 
623
0
        bytes_read, &inbytes_remaining, error);
624
0
  if (!utf8)
625
0
    {
626
0
      close_converter (cd);
627
0
      if (bytes_written)
628
0
        *bytes_written = 0;
629
0
      return NULL;
630
0
    }
631
632
  /* Now the heart of the code. We loop through the UTF-8 string, and
633
   * whenever we hit an offending character, we form fallback, convert
634
   * the fallback to the target codeset, and then go back to
635
   * converting the original string after finishing with the fallback.
636
   *
637
   * The variables save_p and save_inbytes store the input state
638
   * for the original string while we are converting the fallback
639
   */
640
0
  p = utf8;
641
642
0
  outbuf_size = len + NUL_TERMINATOR_LENGTH;
643
0
  outbytes_remaining = outbuf_size - NUL_TERMINATOR_LENGTH;
644
0
  outp = dest = g_malloc (outbuf_size);
645
646
0
  while (!done && !have_error)
647
0
    {
648
0
      gsize inbytes_tmp = inbytes_remaining;
649
0
      err = g_iconv (cd, (char **)&p, &inbytes_tmp, &outp, &outbytes_remaining);
650
0
      inbytes_remaining = inbytes_tmp;
651
652
0
      if (err == (gsize) -1)
653
0
  {
654
0
    switch (errno)
655
0
      {
656
0
      case EINVAL:
657
0
        g_assert_not_reached();
658
0
        break;
659
0
      case E2BIG:
660
0
        {
661
0
    gsize used = outp - dest;
662
663
0
    outbuf_size *= 2;
664
0
    dest = g_realloc (dest, outbuf_size);
665
    
666
0
    outp = dest + used;
667
0
    outbytes_remaining = outbuf_size - used - NUL_TERMINATOR_LENGTH;
668
    
669
0
    break;
670
0
        }
671
0
      case EILSEQ:
672
0
        if (save_p)
673
0
    {
674
      /* Error converting fallback string - fatal
675
       */
676
0
      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
677
0
             _("Cannot convert fallback “%s” to codeset “%s”"),
678
0
             insert_str, to_codeset);
679
0
      have_error = TRUE;
680
0
      break;
681
0
    }
682
0
        else if (p)
683
0
    {
684
0
      if (!fallback)
685
0
        { 
686
0
          gunichar ch = g_utf8_get_char (p);
687
0
          insert_str = g_strdup_printf (ch < 0x10000 ? "\\u%04x" : "\\U%08x",
688
0
                ch);
689
0
        }
690
0
      else
691
0
        insert_str = fallback;
692
      
693
0
      save_p = g_utf8_next_char (p);
694
0
      save_inbytes = inbytes_remaining - (save_p - p);
695
0
      p = insert_str;
696
0
      inbytes_remaining = strlen (p);
697
0
      break;
698
0
    }
699
              /* if p is null */
700
0
              G_GNUC_FALLTHROUGH;
701
0
      default:
702
0
              {
703
0
                int errsv = errno;
704
705
0
                g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
706
0
                             _("Error during conversion: %s"),
707
0
                             g_strerror (errsv));
708
0
              }
709
710
0
        have_error = TRUE;
711
0
        break;
712
0
      }
713
0
  }
714
0
      else
715
0
  {
716
0
    if (save_p)
717
0
      {
718
0
        if (!fallback)
719
0
    g_free ((gchar *)insert_str);
720
0
        p = save_p;
721
0
        inbytes_remaining = save_inbytes;
722
0
        save_p = NULL;
723
0
      }
724
0
    else if (p)
725
0
      {
726
        /* call g_iconv with NULL inbuf to cleanup shift state */
727
0
        p = NULL;
728
0
        inbytes_remaining = 0;
729
0
      }
730
0
    else
731
0
      done = TRUE;
732
0
  }
733
0
    }
734
735
  /* Cleanup
736
   */
737
0
  memset (outp, 0, NUL_TERMINATOR_LENGTH);
738
  
739
0
  close_converter (cd);
740
741
0
  if (bytes_written)
742
0
    *bytes_written = outp - dest; /* Doesn't include '\0' */
743
744
0
  g_free (utf8);
745
746
0
  if (have_error)
747
0
    {
748
0
      if (save_p && !fallback)
749
0
  g_free ((gchar *)insert_str);
750
0
      g_free (dest);
751
0
      return NULL;
752
0
    }
753
0
  else
754
0
    return dest;
755
0
}
756
757
/*
758
 * g_locale_to_utf8
759
 *
760
 * 
761
 */
762
763
/*
764
 * Validate @string as UTF-8. @len can be negative if @string is
765
 * nul-terminated, or a non-negative value in bytes. If @string ends in an
766
 * incomplete sequence, or contains any illegal sequences or nul codepoints,
767
 * %NULL will be returned and the error set to
768
 * %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
769
 * On success, @bytes_read and @bytes_written, if provided, will be set to
770
 * the number of bytes in @string up to @len or the terminating nul byte.
771
 * On error, @bytes_read will be set to the byte offset after the last valid
772
 * and non-nul UTF-8 sequence in @string, and @bytes_written will be set to 0.
773
 */
774
static gchar *
775
strdup_len (const gchar *string,
776
      gssize       len,
777
      gsize       *bytes_read,
778
      gsize       *bytes_written,
779
      GError     **error)
780
0
{
781
0
  gsize real_len;
782
0
  const gchar *end_valid;
783
784
0
  if (!g_utf8_validate (string, len, &end_valid))
785
0
    {
786
0
      if (bytes_read)
787
0
  *bytes_read = end_valid - string;
788
0
      if (bytes_written)
789
0
  *bytes_written = 0;
790
791
0
      g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
792
0
                           _("Invalid byte sequence in conversion input"));
793
0
      return NULL;
794
0
    }
795
796
0
  real_len = end_valid - string;
797
798
0
  if (bytes_read)
799
0
    *bytes_read = real_len;
800
0
  if (bytes_written)
801
0
    *bytes_written = real_len;
802
803
0
  return g_strndup (string, real_len);
804
0
}
805
806
typedef enum
807
{
808
  CONVERT_CHECK_NO_NULS_IN_INPUT  = 1 << 0,
809
  CONVERT_CHECK_NO_NULS_IN_OUTPUT = 1 << 1
810
} ConvertCheckFlags;
811
812
/*
813
 * Convert from @string in the encoding identified by @from_codeset,
814
 * returning a string in the encoding identifed by @to_codeset.
815
 * @len can be negative if @string is nul-terminated, or a non-negative
816
 * value in bytes. Flags defined in #ConvertCheckFlags can be set in @flags
817
 * to check the input, the output, or both, for embedded nul bytes.
818
 * On success, @bytes_read, if provided, will be set to the number of bytes
819
 * in @string up to @len or the terminating nul byte, and @bytes_written, if
820
 * provided, will be set to the number of output bytes written into the
821
 * returned buffer, excluding the terminating nul sequence.
822
 * On error, @bytes_read will be set to the byte offset after the last valid
823
 * sequence in @string, and @bytes_written will be set to 0.
824
 */
825
static gchar *
826
convert_checked (const gchar      *string,
827
                 gssize            len,
828
                 const gchar      *to_codeset,
829
                 const gchar      *from_codeset,
830
                 ConvertCheckFlags flags,
831
                 gsize            *bytes_read,
832
                 gsize            *bytes_written,
833
                 GError          **error)
834
0
{
835
0
  gchar *out;
836
0
  gsize outbytes;
837
838
0
  if ((flags & CONVERT_CHECK_NO_NULS_IN_INPUT) && len > 0)
839
0
    {
840
0
      const gchar *early_nul = memchr (string, '\0', len);
841
0
      if (early_nul != NULL)
842
0
        {
843
0
          if (bytes_read)
844
0
            *bytes_read = early_nul - string;
845
0
          if (bytes_written)
846
0
            *bytes_written = 0;
847
848
0
          g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
849
0
                               _("Embedded NUL byte in conversion input"));
850
0
          return NULL;
851
0
        }
852
0
    }
853
854
0
  out = g_convert (string, len, to_codeset, from_codeset,
855
0
                   bytes_read, &outbytes, error);
856
0
  if (out == NULL)
857
0
    {
858
0
      if (bytes_written)
859
0
        *bytes_written = 0;
860
0
      return NULL;
861
0
    }
862
863
0
  if ((flags & CONVERT_CHECK_NO_NULS_IN_OUTPUT)
864
0
      && memchr (out, '\0', outbytes) != NULL)
865
0
    {
866
0
      g_free (out);
867
0
      if (bytes_written)
868
0
        *bytes_written = 0;
869
0
      g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_EMBEDDED_NUL,
870
0
                           _("Embedded NUL byte in conversion output"));
871
0
      return NULL;
872
0
    }
873
874
0
  if (bytes_written)
875
0
    *bytes_written = outbytes;
876
0
  return out;
877
0
}
878
879
/**
880
 * g_locale_to_utf8:
881
 * @opsysstring:   (array length=len) (element-type guint8): a string in the
882
 *                 encoding of the current locale. On Windows
883
 *                 this means the system codepage.
884
 * @len:           the length of the string, or -1 if the string is
885
 *                 nul-terminated (Note that some encodings may allow nul
886
 *                 bytes to occur inside strings. In that case, using -1
887
 *                 for the @len parameter is unsafe)
888
 * @bytes_read: (out) (optional): location to store the number of bytes in the
889
 *                 input string that were successfully converted, or %NULL.
890
 *                 Even if the conversion was successful, this may be 
891
 *                 less than @len if there were partial characters
892
 *                 at the end of the input. If the error
893
 *                 %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
894
 *                 stored will be the byte offset after the last valid
895
 *                 input sequence.
896
 * @bytes_written: (out) (optional): the number of bytes stored in the output
897
 *                 buffer (not including the terminating nul).
898
 * @error:         location to store the error occurring, or %NULL to ignore
899
 *                 errors. Any of the errors in #GConvertError may occur.
900
 * 
901
 * Converts a string which is in the encoding used for strings by
902
 * the C runtime (usually the same as that used by the operating
903
 * system) in the [current locale][setlocale] into a UTF-8 string.
904
 *
905
 * If the source encoding is not UTF-8 and the conversion output contains a
906
 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
907
 * function returns %NULL.
908
 * If the source encoding is UTF-8, an embedded nul character is treated with
909
 * the %G_CONVERT_ERROR_ILLEGAL_SEQUENCE error for backward compatibility with
910
 * earlier versions of this library. Use g_convert() to produce output that
911
 * may contain embedded nul characters.
912
 * 
913
 * Returns: (type utf8): The converted string, or %NULL on an error.
914
 **/
915
gchar *
916
g_locale_to_utf8 (const gchar  *opsysstring,
917
      gssize        len,            
918
      gsize        *bytes_read,    
919
      gsize        *bytes_written,
920
      GError      **error)
921
0
{
922
0
  const char *charset;
923
924
0
  if (g_get_charset (&charset))
925
0
    return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
926
0
  else
927
0
    return convert_checked (opsysstring, len, "UTF-8", charset,
928
0
                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
929
0
                            bytes_read, bytes_written, error);
930
0
}
931
932
/*
933
 * Do the exact same as g_locale_to_utf8 except that the charset would
934
 * be retrieved from _g_get_time_charset (which uses LC_TIME)
935
 *
936
 * Returns: The converted string, or %NULL on an error.
937
 */
938
gchar *
939
_g_time_locale_to_utf8 (const gchar *opsysstring,
940
                        gssize       len,
941
                        gsize       *bytes_read,
942
                        gsize       *bytes_written,
943
                        GError     **error)
944
0
{
945
0
  const char *charset;
946
947
0
  if (_g_get_time_charset (&charset))
948
0
    return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
949
0
  else
950
0
    return convert_checked (opsysstring, len, "UTF-8", charset,
951
0
                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
952
0
                            bytes_read, bytes_written, error);
953
0
}
954
955
/*
956
 * Do the exact same as g_locale_to_utf8 except that the charset would
957
 * be retrieved from _g_get_ctype_charset (which uses LC_CTYPE)
958
 *
959
 * Returns: The converted string, or %NULL on an error.
960
 */
961
gchar *
962
_g_ctype_locale_to_utf8 (const gchar *opsysstring,
963
                         gssize       len,
964
                         gsize       *bytes_read,
965
                         gsize       *bytes_written,
966
                         GError     **error)
967
0
{
968
0
  const char *charset;
969
970
0
  if (_g_get_ctype_charset (&charset))
971
0
    return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
972
0
  else
973
0
    return convert_checked (opsysstring, len, "UTF-8", charset,
974
0
                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
975
0
                            bytes_read, bytes_written, error);
976
0
}
977
978
/**
979
 * g_locale_from_utf8:
980
 * @utf8string:    a UTF-8 encoded string 
981
 * @len:           the length of the string, or -1 if the string is
982
 *                 nul-terminated.
983
 * @bytes_read: (out) (optional): location to store the number of bytes in the
984
 *                 input string that were successfully converted, or %NULL.
985
 *                 Even if the conversion was successful, this may be 
986
 *                 less than @len if there were partial characters
987
 *                 at the end of the input. If the error
988
 *                 %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
989
 *                 stored will be the byte offset after the last valid
990
 *                 input sequence.
991
 * @bytes_written: (out) (optional): the number of bytes stored in the output
992
 *                 buffer (not including the terminating nul).
993
 * @error:         location to store the error occurring, or %NULL to ignore
994
 *                 errors. Any of the errors in #GConvertError may occur.
995
 * 
996
 * Converts a string from UTF-8 to the encoding used for strings by
997
 * the C runtime (usually the same as that used by the operating
998
 * system) in the [current locale][setlocale]. On Windows this means
999
 * the system codepage.
1000
 *
1001
 * The input string shall not contain nul characters even if the @len
1002
 * argument is positive. A nul character found inside the string will result
1003
 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. Use g_convert() to convert
1004
 * input that may contain embedded nul characters.
1005
 *
1006
 * Returns: (array length=bytes_written) (element-type guint8) (transfer full):
1007
 *          A newly-allocated buffer containing the converted string,
1008
 *          or %NULL on an error, and error will be set.
1009
 **/
1010
gchar *
1011
g_locale_from_utf8 (const gchar *utf8string,
1012
        gssize       len,            
1013
        gsize       *bytes_read,    
1014
        gsize       *bytes_written,
1015
        GError     **error)
1016
0
{
1017
0
  const gchar *charset;
1018
1019
0
  if (g_get_charset (&charset))
1020
0
    return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1021
0
  else
1022
0
    return convert_checked (utf8string, len, charset, "UTF-8",
1023
0
                            CONVERT_CHECK_NO_NULS_IN_INPUT,
1024
0
                            bytes_read, bytes_written, error);
1025
0
}
1026
1027
#ifndef G_PLATFORM_WIN32
1028
1029
typedef struct _GFilenameCharsetCache GFilenameCharsetCache;
1030
1031
struct _GFilenameCharsetCache {
1032
  gboolean is_utf8;
1033
  gchar *charset;
1034
  gchar **filename_charsets;
1035
};
1036
1037
static void
1038
filename_charset_cache_free (gpointer data)
1039
0
{
1040
0
  GFilenameCharsetCache *cache = data;
1041
0
  g_free (cache->charset);
1042
0
  g_strfreev (cache->filename_charsets);
1043
0
  g_free (cache);
1044
0
}
1045
1046
/**
1047
 * g_get_filename_charsets:
1048
 * @filename_charsets: (out) (transfer none) (array zero-terminated=1):
1049
 *    return location for the %NULL-terminated list of encoding names
1050
 *
1051
 * Determines the preferred character sets used for filenames.
1052
 * The first character set from the @charsets is the filename encoding, the
1053
 * subsequent character sets are used when trying to generate a displayable
1054
 * representation of a filename, see g_filename_display_name().
1055
 *
1056
 * On Unix, the character sets are determined by consulting the
1057
 * environment variables `G_FILENAME_ENCODING` and `G_BROKEN_FILENAMES`.
1058
 * On Windows, the character set used in the GLib API is always UTF-8
1059
 * and said environment variables have no effect.
1060
 *
1061
 * `G_FILENAME_ENCODING` may be set to a comma-separated list of
1062
 * character set names. The special token "\@locale" is taken
1063
 * to  mean the character set for the [current locale][setlocale].
1064
 * If `G_FILENAME_ENCODING` is not set, but `G_BROKEN_FILENAMES` is,
1065
 * the character set of the current locale is taken as the filename
1066
 * encoding. If neither environment variable  is set, UTF-8 is taken
1067
 * as the filename encoding, but the character set of the current locale
1068
 * is also put in the list of encodings.
1069
 *
1070
 * The returned @charsets belong to GLib and must not be freed.
1071
 *
1072
 * Note that on Unix, regardless of the locale character set or
1073
 * `G_FILENAME_ENCODING` value, the actual file names present 
1074
 * on a system might be in any random encoding or just gibberish.
1075
 *
1076
 * Returns: %TRUE if the filename encoding is UTF-8.
1077
 * 
1078
 * Since: 2.6
1079
 */
1080
gboolean
1081
g_get_filename_charsets (const gchar ***filename_charsets)
1082
0
{
1083
0
  static GPrivate cache_private = G_PRIVATE_INIT (filename_charset_cache_free);
1084
0
  GFilenameCharsetCache *cache = g_private_get (&cache_private);
1085
0
  const gchar *charset;
1086
1087
0
  if (!cache)
1088
0
    cache = g_private_set_alloc0 (&cache_private, sizeof (GFilenameCharsetCache));
1089
1090
0
  g_get_charset (&charset);
1091
1092
0
  if (!(cache->charset && strcmp (cache->charset, charset) == 0))
1093
0
    {
1094
0
      const gchar *new_charset;
1095
0
      const gchar *p;
1096
0
      gint i;
1097
1098
0
      g_free (cache->charset);
1099
0
      g_strfreev (cache->filename_charsets);
1100
0
      cache->charset = g_strdup (charset);
1101
      
1102
0
      p = g_getenv ("G_FILENAME_ENCODING");
1103
0
      if (p != NULL && p[0] != '\0') 
1104
0
  {
1105
0
    cache->filename_charsets = g_strsplit (p, ",", 0);
1106
0
    cache->is_utf8 = (strcmp (cache->filename_charsets[0], "UTF-8") == 0);
1107
1108
0
    for (i = 0; cache->filename_charsets[i]; i++)
1109
0
      {
1110
0
        if (strcmp ("@locale", cache->filename_charsets[i]) == 0)
1111
0
    {
1112
0
      g_get_charset (&new_charset);
1113
0
      g_free (cache->filename_charsets[i]);
1114
0
      cache->filename_charsets[i] = g_strdup (new_charset);
1115
0
    }
1116
0
      }
1117
0
  }
1118
0
      else if (g_getenv ("G_BROKEN_FILENAMES") != NULL)
1119
0
  {
1120
0
    cache->filename_charsets = g_new0 (gchar *, 2);
1121
0
    cache->is_utf8 = g_get_charset (&new_charset);
1122
0
    cache->filename_charsets[0] = g_strdup (new_charset);
1123
0
  }
1124
0
      else 
1125
0
  {
1126
0
    cache->filename_charsets = g_new0 (gchar *, 3);
1127
0
    cache->is_utf8 = TRUE;
1128
0
    cache->filename_charsets[0] = g_strdup ("UTF-8");
1129
0
    if (!g_get_charset (&new_charset))
1130
0
      cache->filename_charsets[1] = g_strdup (new_charset);
1131
0
  }
1132
0
    }
1133
1134
0
  if (filename_charsets)
1135
0
    *filename_charsets = (const gchar **)cache->filename_charsets;
1136
1137
0
  return cache->is_utf8;
1138
0
}
1139
1140
#else /* G_PLATFORM_WIN32 */
1141
1142
gboolean
1143
g_get_filename_charsets (const gchar ***filename_charsets) 
1144
{
1145
  static const gchar *charsets[] = {
1146
    "UTF-8",
1147
    NULL
1148
  };
1149
1150
#ifdef G_OS_WIN32
1151
  /* On Windows GLib pretends that the filename charset is UTF-8 */
1152
  if (filename_charsets)
1153
    *filename_charsets = charsets;
1154
1155
  return TRUE;
1156
#else
1157
  gboolean result;
1158
1159
  /* Cygwin works like before */
1160
  result = g_get_charset (&(charsets[0]));
1161
1162
  if (filename_charsets)
1163
    *filename_charsets = charsets;
1164
1165
  return result;
1166
#endif
1167
}
1168
1169
#endif /* G_PLATFORM_WIN32 */
1170
1171
static gboolean
1172
get_filename_charset (const gchar **filename_charset)
1173
0
{
1174
0
  const gchar **charsets;
1175
0
  gboolean is_utf8;
1176
  
1177
0
  is_utf8 = g_get_filename_charsets (&charsets);
1178
1179
0
  if (filename_charset)
1180
0
    *filename_charset = charsets[0];
1181
  
1182
0
  return is_utf8;
1183
0
}
1184
1185
/**
1186
 * g_filename_to_utf8:
1187
 * @opsysstring: (type filename): a string in the encoding for filenames
1188
 * @len:           the length of the string, or -1 if the string is
1189
 *                 nul-terminated (Note that some encodings may allow nul
1190
 *                 bytes to occur inside strings. In that case, using -1
1191
 *                 for the @len parameter is unsafe)
1192
 * @bytes_read: (out) (optional): location to store the number of bytes in the
1193
 *                 input string that were successfully converted, or %NULL.
1194
 *                 Even if the conversion was successful, this may be 
1195
 *                 less than @len if there were partial characters
1196
 *                 at the end of the input. If the error
1197
 *                 %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1198
 *                 stored will be the byte offset after the last valid
1199
 *                 input sequence.
1200
 * @bytes_written: (out) (optional): the number of bytes stored in the output
1201
 *                 buffer (not including the terminating nul).
1202
 * @error:         location to store the error occurring, or %NULL to ignore
1203
 *                 errors. Any of the errors in #GConvertError may occur.
1204
 * 
1205
 * Converts a string which is in the encoding used by GLib for
1206
 * filenames into a UTF-8 string. Note that on Windows GLib uses UTF-8
1207
 * for filenames; on other platforms, this function indirectly depends on 
1208
 * the [current locale][setlocale].
1209
 *
1210
 * The input string shall not contain nul characters even if the @len
1211
 * argument is positive. A nul character found inside the string will result
1212
 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE.
1213
 * If the source encoding is not UTF-8 and the conversion output contains a
1214
 * nul character, the error %G_CONVERT_ERROR_EMBEDDED_NUL is set and the
1215
 * function returns %NULL. Use g_convert() to produce output that
1216
 * may contain embedded nul characters.
1217
 * 
1218
 * Returns: (type utf8): The converted string, or %NULL on an error.
1219
 **/
1220
gchar*
1221
g_filename_to_utf8 (const gchar *opsysstring, 
1222
        gssize       len,           
1223
        gsize       *bytes_read,   
1224
        gsize       *bytes_written,
1225
        GError     **error)
1226
0
{
1227
0
  const gchar *charset;
1228
1229
0
  g_return_val_if_fail (opsysstring != NULL, NULL);
1230
1231
0
  if (get_filename_charset (&charset))
1232
0
    return strdup_len (opsysstring, len, bytes_read, bytes_written, error);
1233
0
  else
1234
0
    return convert_checked (opsysstring, len, "UTF-8", charset,
1235
0
                            CONVERT_CHECK_NO_NULS_IN_INPUT |
1236
0
                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
1237
0
                            bytes_read, bytes_written, error);
1238
0
}
1239
1240
/**
1241
 * g_filename_from_utf8:
1242
 * @utf8string:    (type utf8): a UTF-8 encoded string.
1243
 * @len:           the length of the string, or -1 if the string is
1244
 *                 nul-terminated.
1245
 * @bytes_read:    (out) (optional): location to store the number of bytes in
1246
 *                 the input string that were successfully converted, or %NULL.
1247
 *                 Even if the conversion was successful, this may be 
1248
 *                 less than @len if there were partial characters
1249
 *                 at the end of the input. If the error
1250
 *                 %G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
1251
 *                 stored will be the byte offset after the last valid
1252
 *                 input sequence.
1253
 * @bytes_written: (out) (optional): the number of bytes stored in
1254
 *                 the output buffer (not including the terminating nul).
1255
 * @error:         location to store the error occurring, or %NULL to ignore
1256
 *                 errors. Any of the errors in #GConvertError may occur.
1257
 * 
1258
 * Converts a string from UTF-8 to the encoding GLib uses for
1259
 * filenames. Note that on Windows GLib uses UTF-8 for filenames;
1260
 * on other platforms, this function indirectly depends on the 
1261
 * [current locale][setlocale].
1262
 *
1263
 * The input string shall not contain nul characters even if the @len
1264
 * argument is positive. A nul character found inside the string will result
1265
 * in error %G_CONVERT_ERROR_ILLEGAL_SEQUENCE. If the filename encoding is
1266
 * not UTF-8 and the conversion output contains a nul character, the error
1267
 * %G_CONVERT_ERROR_EMBEDDED_NUL is set and the function returns %NULL.
1268
 *
1269
 * Returns: (type filename):
1270
 *               The converted string, or %NULL on an error.
1271
 **/
1272
gchar*
1273
g_filename_from_utf8 (const gchar *utf8string,
1274
          gssize       len,            
1275
          gsize       *bytes_read,    
1276
          gsize       *bytes_written,
1277
          GError     **error)
1278
0
{
1279
0
  const gchar *charset;
1280
1281
0
  if (get_filename_charset (&charset))
1282
0
    return strdup_len (utf8string, len, bytes_read, bytes_written, error);
1283
0
  else
1284
0
    return convert_checked (utf8string, len, charset, "UTF-8",
1285
0
                            CONVERT_CHECK_NO_NULS_IN_INPUT |
1286
0
                            CONVERT_CHECK_NO_NULS_IN_OUTPUT,
1287
0
                            bytes_read, bytes_written, error);
1288
0
}
1289
1290
/* Test of haystack has the needle prefix, comparing case
1291
 * insensitive. haystack may be UTF-8, but needle must
1292
 * contain only ascii. */
1293
static gboolean
1294
has_case_prefix (const gchar *haystack, const gchar *needle)
1295
0
{
1296
0
  const gchar *h, *n;
1297
  
1298
  /* Eat one character at a time. */
1299
0
  h = haystack;
1300
0
  n = needle;
1301
1302
0
  while (*n && *h &&
1303
0
   g_ascii_tolower (*n) == g_ascii_tolower (*h))
1304
0
    {
1305
0
      n++;
1306
0
      h++;
1307
0
    }
1308
  
1309
0
  return *n == '\0';
1310
0
}
1311
1312
typedef enum {
1313
  UNSAFE_ALL        = 0x1,  /* Escape all unsafe characters   */
1314
  UNSAFE_ALLOW_PLUS = 0x2,  /* Allows '+'  */
1315
  UNSAFE_PATH       = 0x8,  /* Allows '/', '&', '=', ':', '@', '+', '$' and ',' */
1316
  UNSAFE_HOST       = 0x10, /* Allows '/' and ':' and '@' */
1317
  UNSAFE_SLASHES    = 0x20  /* Allows all characters except for '/' and '%' */
1318
} UnsafeCharacterSet;
1319
1320
static const guchar acceptable[96] = {
1321
  /* A table of the ASCII chars from space (32) to DEL (127) */
1322
  /*      !    "    #    $    %    &    '    (    )    *    +    ,    -    .    / */ 
1323
  0x00,0x3F,0x20,0x20,0x28,0x00,0x2C,0x3F,0x3F,0x3F,0x3F,0x2A,0x28,0x3F,0x3F,0x1C,
1324
  /* 0    1    2    3    4    5    6    7    8    9    :    ;    <    =    >    ? */
1325
  0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x38,0x20,0x20,0x2C,0x20,0x20,
1326
  /* @    A    B    C    D    E    F    G    H    I    J    K    L    M    N    O */
1327
  0x38,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1328
  /* P    Q    R    S    T    U    V    W    X    Y    Z    [    \    ]    ^    _ */
1329
  0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x20,0x3F,
1330
  /* `    a    b    c    d    e    f    g    h    i    j    k    l    m    n    o */
1331
  0x20,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,
1332
  /* p    q    r    s    t    u    v    w    x    y    z    {    |    }    ~  DEL */
1333
  0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x3F,0x20,0x20,0x20,0x3F,0x20
1334
};
1335
1336
static const gchar hex[] = "0123456789ABCDEF";
1337
1338
/* Note: This escape function works on file: URIs, but if you want to
1339
 * escape something else, please read RFC-2396 */
1340
static gchar *
1341
g_escape_uri_string (const gchar *string, 
1342
         UnsafeCharacterSet mask)
1343
0
{
1344
0
#define ACCEPTABLE(a) ((a)>=32 && (a)<128 && (acceptable[(a)-32] & use_mask))
1345
1346
0
  const gchar *p;
1347
0
  gchar *q;
1348
0
  gchar *result;
1349
0
  int c;
1350
0
  gint unacceptable;
1351
0
  UnsafeCharacterSet use_mask;
1352
  
1353
0
  g_return_val_if_fail (mask == UNSAFE_ALL
1354
0
      || mask == UNSAFE_ALLOW_PLUS
1355
0
      || mask == UNSAFE_PATH
1356
0
      || mask == UNSAFE_HOST
1357
0
      || mask == UNSAFE_SLASHES, NULL);
1358
  
1359
0
  unacceptable = 0;
1360
0
  use_mask = mask;
1361
0
  for (p = string; *p != '\0'; p++)
1362
0
    {
1363
0
      c = (guchar) *p;
1364
0
      if (!ACCEPTABLE (c)) 
1365
0
  unacceptable++;
1366
0
    }
1367
  
1368
0
  result = g_malloc (p - string + unacceptable * 2 + 1);
1369
  
1370
0
  use_mask = mask;
1371
0
  for (q = result, p = string; *p != '\0'; p++)
1372
0
    {
1373
0
      c = (guchar) *p;
1374
      
1375
0
      if (!ACCEPTABLE (c))
1376
0
  {
1377
0
    *q++ = '%'; /* means hex coming */
1378
0
    *q++ = hex[c >> 4];
1379
0
    *q++ = hex[c & 15];
1380
0
  }
1381
0
      else
1382
0
  *q++ = *p;
1383
0
    }
1384
  
1385
0
  *q = '\0';
1386
  
1387
0
  return result;
1388
0
}
1389
1390
1391
static gchar *
1392
g_escape_file_uri (const gchar *hostname,
1393
       const gchar *pathname)
1394
0
{
1395
0
  char *escaped_hostname = NULL;
1396
0
  char *escaped_path;
1397
0
  char *res;
1398
1399
#ifdef G_OS_WIN32
1400
  char *p, *backslash;
1401
1402
  /* Turn backslashes into forward slashes. That's what Netscape
1403
   * does, and they are actually more or less equivalent in Windows.
1404
   */
1405
  
1406
  pathname = g_strdup (pathname);
1407
  p = (char *) pathname;
1408
  
1409
  while ((backslash = strchr (p, '\\')) != NULL)
1410
    {
1411
      *backslash = '/';
1412
      p = backslash + 1;
1413
    }
1414
#endif
1415
1416
0
  if (hostname && *hostname != '\0')
1417
0
    {
1418
0
      escaped_hostname = g_escape_uri_string (hostname, UNSAFE_HOST);
1419
0
    }
1420
1421
0
  escaped_path = g_escape_uri_string (pathname, UNSAFE_PATH);
1422
1423
0
  res = g_strconcat ("file://",
1424
0
         (escaped_hostname) ? escaped_hostname : "",
1425
0
         (*escaped_path != '/') ? "/" : "",
1426
0
         escaped_path,
1427
0
         NULL);
1428
1429
#ifdef G_OS_WIN32
1430
  g_free ((char *) pathname);
1431
#endif
1432
1433
0
  g_free (escaped_hostname);
1434
0
  g_free (escaped_path);
1435
  
1436
0
  return res;
1437
0
}
1438
1439
static int
1440
unescape_character (const char *scanner)
1441
0
{
1442
0
  int first_digit;
1443
0
  int second_digit;
1444
1445
0
  first_digit = g_ascii_xdigit_value (scanner[0]);
1446
0
  if (first_digit < 0) 
1447
0
    return -1;
1448
  
1449
0
  second_digit = g_ascii_xdigit_value (scanner[1]);
1450
0
  if (second_digit < 0) 
1451
0
    return -1;
1452
  
1453
0
  return (first_digit << 4) | second_digit;
1454
0
}
1455
1456
static gchar *
1457
g_unescape_uri_string (const char *escaped,
1458
           int         len,
1459
           const char *illegal_escaped_characters,
1460
           gboolean    ascii_must_not_be_escaped)
1461
0
{
1462
0
  const gchar *in, *in_end;
1463
0
  gchar *out, *result;
1464
0
  int c;
1465
  
1466
0
  if (escaped == NULL)
1467
0
    return NULL;
1468
1469
0
  if (len < 0)
1470
0
    len = strlen (escaped);
1471
1472
0
  result = g_malloc (len + 1);
1473
  
1474
0
  out = result;
1475
0
  for (in = escaped, in_end = escaped + len; in < in_end; in++)
1476
0
    {
1477
0
      c = *in;
1478
1479
0
      if (c == '%')
1480
0
  {
1481
    /* catch partial escape sequences past the end of the substring */
1482
0
    if (in + 3 > in_end)
1483
0
      break;
1484
1485
0
    c = unescape_character (in + 1);
1486
1487
    /* catch bad escape sequences and NUL characters */
1488
0
    if (c <= 0)
1489
0
      break;
1490
1491
    /* catch escaped ASCII */
1492
0
    if (ascii_must_not_be_escaped && c <= 0x7F)
1493
0
      break;
1494
1495
    /* catch other illegal escaped characters */
1496
0
    if (strchr (illegal_escaped_characters, c) != NULL)
1497
0
      break;
1498
1499
0
    in += 2;
1500
0
  }
1501
1502
0
      *out++ = c;
1503
0
    }
1504
  
1505
0
  g_assert (out - result <= len);
1506
0
  *out = '\0';
1507
1508
0
  if (in != in_end)
1509
0
    {
1510
0
      g_free (result);
1511
0
      return NULL;
1512
0
    }
1513
1514
0
  return result;
1515
0
}
1516
1517
static gboolean
1518
is_asciialphanum (gunichar c)
1519
0
{
1520
0
  return c <= 0x7F && g_ascii_isalnum (c);
1521
0
}
1522
1523
static gboolean
1524
is_asciialpha (gunichar c)
1525
0
{
1526
0
  return c <= 0x7F && g_ascii_isalpha (c);
1527
0
}
1528
1529
/* allows an empty string */
1530
static gboolean
1531
hostname_validate (const char *hostname)
1532
0
{
1533
0
  const char *p;
1534
0
  gunichar c, first_char, last_char;
1535
1536
0
  p = hostname;
1537
0
  if (*p == '\0')
1538
0
    return TRUE;
1539
0
  do
1540
0
    {
1541
      /* read in a label */
1542
0
      c = g_utf8_get_char (p);
1543
0
      p = g_utf8_next_char (p);
1544
0
      if (!is_asciialphanum (c))
1545
0
  return FALSE;
1546
0
      first_char = c;
1547
0
      do
1548
0
  {
1549
0
    last_char = c;
1550
0
    c = g_utf8_get_char (p);
1551
0
    p = g_utf8_next_char (p);
1552
0
  }
1553
0
      while (is_asciialphanum (c) || c == '-');
1554
0
      if (last_char == '-')
1555
0
  return FALSE;
1556
      
1557
      /* if that was the last label, check that it was a toplabel */
1558
0
      if (c == '\0' || (c == '.' && *p == '\0'))
1559
0
  return is_asciialpha (first_char);
1560
0
    }
1561
0
  while (c == '.');
1562
0
  return FALSE;
1563
0
}
1564
1565
/**
1566
 * g_filename_from_uri:
1567
 * @uri: a uri describing a filename (escaped, encoded in ASCII).
1568
 * @hostname: (out) (optional) (nullable): Location to store hostname for the URI.
1569
 *            If there is no hostname in the URI, %NULL will be
1570
 *            stored in this location.
1571
 * @error: location to store the error occurring, or %NULL to ignore
1572
 *         errors. Any of the errors in #GConvertError may occur.
1573
 * 
1574
 * Converts an escaped ASCII-encoded URI to a local filename in the
1575
 * encoding used for filenames.
1576
 *
1577
 * Since GLib 2.78, the query string and fragment can be present in the URI,
1578
 * but are not part of the resulting filename.
1579
 * We take inspiration from https://url.spec.whatwg.org/#file-state,
1580
 * but we don't support the entire standard.
1581
 * 
1582
 * Returns: (type filename): a newly-allocated string holding
1583
 *               the resulting filename, or %NULL on an error.
1584
 **/
1585
gchar *
1586
g_filename_from_uri (const gchar *uri,
1587
         gchar      **hostname,
1588
         GError     **error)
1589
0
{
1590
0
  const char *past_scheme;
1591
0
  const char *host_part;
1592
0
  char *unescaped_hostname;
1593
0
  char *result;
1594
0
  char *filename;
1595
0
  char *past_path;
1596
0
  char *temp_uri;
1597
0
  int offs;
1598
#ifdef G_OS_WIN32
1599
  char *p, *slash;
1600
#endif
1601
1602
0
  if (hostname)
1603
0
    *hostname = NULL;
1604
1605
0
  if (!has_case_prefix (uri, "file:/"))
1606
0
    {
1607
0
      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1608
0
       _("The URI “%s” is not an absolute URI using the “file” scheme"),
1609
0
       uri);
1610
0
      return NULL;
1611
0
    }
1612
1613
0
  temp_uri = g_strdup (uri);
1614
1615
0
  past_scheme = temp_uri + strlen ("file:");
1616
  
1617
0
  past_path = strchr (past_scheme, '?');
1618
0
  if (past_path != NULL)
1619
0
    *past_path = '\0';
1620
1621
0
  past_path = strchr (past_scheme, '#');
1622
0
  if (past_path != NULL)
1623
0
    *past_path = '\0';
1624
1625
0
  if (has_case_prefix (past_scheme, "///"))
1626
0
    past_scheme += 2;
1627
0
  else if (has_case_prefix (past_scheme, "//"))
1628
0
    {
1629
0
      past_scheme += 2;
1630
0
      host_part = past_scheme;
1631
1632
0
      past_scheme = strchr (past_scheme, '/');
1633
1634
0
      if (past_scheme == NULL)
1635
0
  {
1636
0
          g_free (temp_uri);
1637
0
    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1638
0
           _("The URI “%s” is invalid"),
1639
0
           uri);
1640
0
    return NULL;
1641
0
  }
1642
1643
0
      unescaped_hostname = g_unescape_uri_string (host_part, past_scheme - host_part, "", TRUE);
1644
1645
0
      if (unescaped_hostname == NULL ||
1646
0
    !hostname_validate (unescaped_hostname))
1647
0
  {
1648
0
    g_free (unescaped_hostname);
1649
0
          g_free (temp_uri);
1650
0
    g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1651
0
           _("The hostname of the URI “%s” is invalid"),
1652
0
           uri);
1653
0
    return NULL;
1654
0
  }
1655
      
1656
0
      if (hostname)
1657
0
  *hostname = unescaped_hostname;
1658
0
      else
1659
0
  g_free (unescaped_hostname);
1660
0
    }
1661
1662
0
  filename = g_unescape_uri_string (past_scheme, -1, "/", FALSE);
1663
1664
0
  if (filename == NULL)
1665
0
    {
1666
0
      g_free (temp_uri);
1667
0
      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_BAD_URI,
1668
0
       _("The URI “%s” contains invalidly escaped characters"),
1669
0
       uri);
1670
0
      return NULL;
1671
0
    }
1672
1673
0
  offs = 0;
1674
#ifdef G_OS_WIN32
1675
  /* Drop localhost */
1676
  if (hostname && *hostname != NULL &&
1677
      g_ascii_strcasecmp (*hostname, "localhost") == 0)
1678
    {
1679
      g_free (*hostname);
1680
      *hostname = NULL;
1681
    }
1682
1683
  /* Turn slashes into backslashes, because that's the canonical spelling */
1684
  p = filename;
1685
  while ((slash = strchr (p, '/')) != NULL)
1686
    {
1687
      *slash = '\\';
1688
      p = slash + 1;
1689
    }
1690
1691
  /* Windows URIs with a drive letter can be like "file://host/c:/foo"
1692
   * or "file://host/c|/foo" (some Netscape versions). In those cases, start
1693
   * the filename from the drive letter.
1694
   */
1695
  if (g_ascii_isalpha (filename[1]))
1696
    {
1697
      if (filename[2] == ':')
1698
  offs = 1;
1699
      else if (filename[2] == '|')
1700
  {
1701
    filename[2] = ':';
1702
    offs = 1;
1703
  }
1704
    }
1705
#endif
1706
1707
0
  result = g_strdup (filename + offs);
1708
0
  g_free (filename);
1709
1710
0
  g_free (temp_uri);
1711
1712
0
  return result;
1713
0
}
1714
1715
/**
1716
 * g_filename_to_uri:
1717
 * @filename: (type filename): an absolute filename specified in the GLib file
1718
 *     name encoding, which is the on-disk file name bytes on Unix, and UTF-8
1719
 *     on Windows
1720
 * @hostname: (nullable): A UTF-8 encoded hostname, or %NULL for none.
1721
 * @error: location to store the error occurring, or %NULL to ignore
1722
 *         errors. Any of the errors in #GConvertError may occur.
1723
 * 
1724
 * Converts an absolute filename to an escaped ASCII-encoded URI, with the path
1725
 * component following Section 3.3. of RFC 2396.
1726
 * 
1727
 * Returns: a newly-allocated string holding the resulting
1728
 *               URI, or %NULL on an error.
1729
 **/
1730
gchar *
1731
g_filename_to_uri (const gchar *filename,
1732
       const gchar *hostname,
1733
       GError     **error)
1734
0
{
1735
0
  char *escaped_uri;
1736
1737
0
  g_return_val_if_fail (filename != NULL, NULL);
1738
1739
0
  if (!g_path_is_absolute (filename))
1740
0
    {
1741
0
      g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NOT_ABSOLUTE_PATH,
1742
0
       _("The pathname “%s” is not an absolute path"),
1743
0
       filename);
1744
0
      return NULL;
1745
0
    }
1746
1747
0
  if (hostname &&
1748
0
      !(g_utf8_validate (hostname, -1, NULL)
1749
0
  && hostname_validate (hostname)))
1750
0
    {
1751
0
      g_set_error_literal (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
1752
0
                           _("Invalid hostname"));
1753
0
      return NULL;
1754
0
    }
1755
  
1756
#ifdef G_OS_WIN32
1757
  /* Don't use localhost unnecessarily */
1758
  if (hostname && g_ascii_strcasecmp (hostname, "localhost") == 0)
1759
    hostname = NULL;
1760
#endif
1761
1762
0
  escaped_uri = g_escape_file_uri (hostname, filename);
1763
1764
0
  return escaped_uri;
1765
0
}
1766
1767
/**
1768
 * g_uri_list_extract_uris:
1769
 * @uri_list: an URI list 
1770
 *
1771
 * Splits an URI list conforming to the text/uri-list
1772
 * mime type defined in RFC 2483 into individual URIs,
1773
 * discarding any comments. The URIs are not validated.
1774
 *
1775
 * Returns: (transfer full): a newly allocated %NULL-terminated list
1776
 *   of strings holding the individual URIs. The array should be freed
1777
 *   with g_strfreev().
1778
 *
1779
 * Since: 2.6
1780
 */
1781
gchar **
1782
g_uri_list_extract_uris (const gchar *uri_list)
1783
0
{
1784
0
  GPtrArray *uris;
1785
0
  const gchar *p, *q;
1786
1787
0
  uris = g_ptr_array_new ();
1788
1789
0
  p = uri_list;
1790
1791
  /* We don't actually try to validate the URI according to RFC
1792
   * 2396, or even check for allowed characters - we just ignore
1793
   * comments and trim whitespace off the ends.  We also
1794
   * allow LF delimination as well as the specified CRLF.
1795
   *
1796
   * We do allow comments like specified in RFC 2483.
1797
   */
1798
0
  while (p)
1799
0
    {
1800
0
      if (*p != '#')
1801
0
  {
1802
0
    while (g_ascii_isspace (*p))
1803
0
      p++;
1804
1805
0
    q = p;
1806
0
    while (*q && (*q != '\n') && (*q != '\r'))
1807
0
      q++;
1808
1809
0
    if (q > p)
1810
0
      {
1811
0
        q--;
1812
0
        while (q > p && g_ascii_isspace (*q))
1813
0
    q--;
1814
1815
0
        if (q > p)
1816
0
                g_ptr_array_add (uris, g_strndup (p, q - p + 1));
1817
0
            }
1818
0
        }
1819
0
      p = strchr (p, '\n');
1820
0
      if (p)
1821
0
  p++;
1822
0
    }
1823
1824
0
  g_ptr_array_add (uris, NULL);
1825
1826
0
  return (gchar **) g_ptr_array_free (uris, FALSE);
1827
0
}
1828
1829
/**
1830
 * g_filename_display_basename:
1831
 * @filename: (type filename): an absolute pathname in the
1832
 *     GLib file name encoding
1833
 *
1834
 * Returns the display basename for the particular filename, guaranteed
1835
 * to be valid UTF-8. The display name might not be identical to the filename,
1836
 * for instance there might be problems converting it to UTF-8, and some files
1837
 * can be translated in the display.
1838
 *
1839
 * If GLib cannot make sense of the encoding of @filename, as a last resort it 
1840
 * replaces unknown characters with U+FFFD, the Unicode replacement character.
1841
 * You can search the result for the UTF-8 encoding of this character (which is
1842
 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
1843
 * encoding.
1844
 *
1845
 * You must pass the whole absolute pathname to this functions so that
1846
 * translation of well known locations can be done.
1847
 *
1848
 * This function is preferred over g_filename_display_name() if you know the
1849
 * whole path, as it allows translation.
1850
 *
1851
 * Returns: a newly allocated string containing
1852
 *   a rendition of the basename of the filename in valid UTF-8
1853
 *
1854
 * Since: 2.6
1855
 **/
1856
gchar *
1857
g_filename_display_basename (const gchar *filename)
1858
0
{
1859
0
  char *basename;
1860
0
  char *display_name;
1861
1862
0
  g_return_val_if_fail (filename != NULL, NULL);
1863
  
1864
0
  basename = g_path_get_basename (filename);
1865
0
  display_name = g_filename_display_name (basename);
1866
0
  g_free (basename);
1867
0
  return display_name;
1868
0
}
1869
1870
/**
1871
 * g_filename_display_name:
1872
 * @filename: (type filename): a pathname hopefully in the
1873
 *     GLib file name encoding
1874
 * 
1875
 * Converts a filename into a valid UTF-8 string. The conversion is 
1876
 * not necessarily reversible, so you should keep the original around 
1877
 * and use the return value of this function only for display purposes.
1878
 * Unlike g_filename_to_utf8(), the result is guaranteed to be non-%NULL 
1879
 * even if the filename actually isn't in the GLib file name encoding.
1880
 *
1881
 * If GLib cannot make sense of the encoding of @filename, as a last resort it 
1882
 * replaces unknown characters with U+FFFD, the Unicode replacement character.
1883
 * You can search the result for the UTF-8 encoding of this character (which is
1884
 * "\357\277\275" in octal notation) to find out if @filename was in an invalid
1885
 * encoding.
1886
 *
1887
 * If you know the whole pathname of the file you should use
1888
 * g_filename_display_basename(), since that allows location-based
1889
 * translation of filenames.
1890
 *
1891
 * Returns: a newly allocated string containing
1892
 *   a rendition of the filename in valid UTF-8
1893
 *
1894
 * Since: 2.6
1895
 **/
1896
gchar *
1897
g_filename_display_name (const gchar *filename)
1898
0
{
1899
0
  gint i;
1900
0
  const gchar **charsets;
1901
0
  gchar *display_name = NULL;
1902
0
  gboolean is_utf8;
1903
 
1904
0
  is_utf8 = g_get_filename_charsets (&charsets);
1905
1906
0
  if (is_utf8)
1907
0
    {
1908
0
      if (g_utf8_validate (filename, -1, NULL))
1909
0
  display_name = g_strdup (filename);
1910
0
    }
1911
  
1912
0
  if (!display_name)
1913
0
    {
1914
      /* Try to convert from the filename charsets to UTF-8.
1915
       * Skip the first charset if it is UTF-8.
1916
       */
1917
0
      for (i = is_utf8 ? 1 : 0; charsets[i]; i++)
1918
0
  {
1919
0
    display_name = g_convert (filename, -1, "UTF-8", charsets[i], 
1920
0
            NULL, NULL, NULL);
1921
1922
0
    if (display_name)
1923
0
      break;
1924
0
  }
1925
0
    }
1926
  
1927
  /* if all conversions failed, we replace invalid UTF-8
1928
   * by a question mark
1929
   */
1930
0
  if (!display_name) 
1931
0
    display_name = g_utf8_make_valid (filename, -1);
1932
1933
0
  return display_name;
1934
0
}
1935
1936
#ifdef G_OS_WIN32
1937
1938
/* Binary compatibility versions. Not for newly compiled code. */
1939
1940
_GLIB_EXTERN gchar *g_filename_to_utf8_utf8   (const gchar  *opsysstring,
1941
                                               gssize        len,
1942
                                               gsize        *bytes_read,
1943
                                               gsize        *bytes_written,
1944
                                               GError      **error) G_GNUC_MALLOC;
1945
_GLIB_EXTERN gchar *g_filename_from_utf8_utf8 (const gchar  *utf8string,
1946
                                               gssize        len,
1947
                                               gsize        *bytes_read,
1948
                                               gsize        *bytes_written,
1949
                                               GError      **error) G_GNUC_MALLOC;
1950
_GLIB_EXTERN gchar *g_filename_from_uri_utf8  (const gchar  *uri,
1951
                                               gchar       **hostname,
1952
                                               GError      **error) G_GNUC_MALLOC;
1953
_GLIB_EXTERN gchar *g_filename_to_uri_utf8    (const gchar  *filename,
1954
                                               const gchar  *hostname,
1955
                                               GError      **error) G_GNUC_MALLOC;
1956
1957
gchar *
1958
g_filename_to_utf8_utf8 (const gchar *opsysstring,
1959
                         gssize       len,
1960
                         gsize       *bytes_read,
1961
                         gsize       *bytes_written,
1962
                         GError     **error)
1963
{
1964
  return g_filename_to_utf8 (opsysstring, len, bytes_read, bytes_written, error);
1965
}
1966
1967
gchar *
1968
g_filename_from_utf8_utf8 (const gchar *utf8string,
1969
                           gssize       len,
1970
                           gsize       *bytes_read,
1971
                           gsize       *bytes_written,
1972
                           GError     **error)
1973
{
1974
  return g_filename_from_utf8 (utf8string, len, bytes_read, bytes_written, error);
1975
}
1976
1977
gchar *
1978
g_filename_from_uri_utf8 (const gchar *uri,
1979
                          gchar      **hostname,
1980
                          GError     **error)
1981
{
1982
  return g_filename_from_uri (uri, hostname, error);
1983
}
1984
1985
gchar *
1986
g_filename_to_uri_utf8 (const gchar *filename,
1987
                        const gchar *hostname,
1988
                        GError     **error)
1989
{
1990
  return g_filename_to_uri (filename, hostname, error);
1991
}
1992
1993
#endif