Coverage Report

Created: 2025-06-10 07:27

/src/ghostpdl/pdf/pdf_utf8.c
Line
Count
Source (jump to first uncovered line)
1
/* Copyright (C) 2020-2023 Artifex Software, Inc.
2
   All Rights Reserved.
3
4
   This software is provided AS-IS with no warranty, either express or
5
   implied.
6
7
   This software is distributed under license and may not be copied,
8
   modified or distributed except as expressly authorized under the terms
9
   of the license contained in the file LICENSE in this distribution.
10
11
   Refer to licensing information at http://www.artifex.com or contact
12
   Artifex Software, Inc.,  39 Mesa Street, Suite 108A, San Francisco,
13
   CA 94129, USA, for further information.
14
*/
15
16
/* platform-specific string to UTF8 conversion routines */
17
18
/* This code is in its own file because it needs to be compiled with
19
 * a different set of complier flags to normal when built for MS Windows.
20
 * We can't use the /Za flag with a file whcih includes windows.h, disabling
21
 * the language extensions breaks the compilation. See pdf/pdf.mak for the
22
 * build-specifics.
23
 */
24
25
#include "ghostpdf.h"
26
#include "pdf_utf8.h"
27
#include "pdf_types.h"
28
#include "pdf_stack.h"
29
30
#ifdef HAVE_LIBIDN
31
#  include <stringprep.h>
32
#  include <errno.h>
33
/* Convert a string from the current locale's character set to UTF-8.
34
 * <string> .locale_to_utf8 <string> */
35
int
36
locale_to_utf8(pdf_context *ctx, pdf_string *input, pdf_string **output)
37
{
38
    char *out = NULL;
39
    int code;
40
41
    out = stringprep_locale_to_utf8((const char *)input->data);
42
    if (out == NULL) {
43
        /* This function is intended to be used on strings whose
44
         * character set is unknown, so it's not an error if the
45
         * input contains invalid characters.  Just return the input
46
         * string unchanged.
47
         *
48
         * Sadly, EINVAL from stringprep_locale_to_utf8 can mean
49
         * either an invalid character set conversion (which we care
50
         * about), or an incomplete input string (which we don't).
51
         * For now, we ignore EINVAL; the right solution is probably
52
         * to not use stringprep_locale_to_utf8, and just call iconv
53
         * by hand. */
54
        if (errno == EILSEQ || errno == EINVAL)
55
            return 0;
56
57
        /* Other errors (like ENFILE) are real errors, which we
58
         * want to return to the user. */
59
        return_error(gs_error_ioerror);
60
    }
61
62
    code = pdfi_object_alloc(ctx, PDF_STRING, strlen(out), (pdf_obj **)output);
63
    if (code < 0)
64
        return code;
65
    pdfi_countup(*output);
66
    memcpy((*output)->data, out, strlen(out));
67
68
    free(out);
69
    return 0;
70
}
71
#else
72
#ifdef _MSC_VER
73
#include "windows_.h"
74
/* Convert a string from the current locale's character set to UTF-8.
75
 * Unfortunately, "current locale" can mean a few different things on
76
 * Windows -- we use the default ANSI code page, which does the right
77
 * thing for command-line arguments (like "-sPDFPassword=foo") and
78
 * for strings typed as input to gswin32.exe.  It doesn't work for
79
 * strings typed as input to gswin32c.exe, which are normally in the
80
 * default OEM code page instead.
81
 * <string> .locale_to_utf8 <string> */
82
int
83
locale_to_utf8(pdf_context *ctx, pdf_string *input, pdf_string **output)
84
{
85
#define LOCALE_TO_UTF8_BUFFER_SIZE 1024
86
    WCHAR wide_buffer[LOCALE_TO_UTF8_BUFFER_SIZE];
87
    char utf8_buffer[LOCALE_TO_UTF8_BUFFER_SIZE];
88
    int code, BytesWritten;
89
90
    *output = NULL;
91
92
    BytesWritten = MultiByteToWideChar(CP_ACP, 0, input->data, input->length,
93
        wide_buffer, LOCALE_TO_UTF8_BUFFER_SIZE);
94
    if (BytesWritten == 0)
95
        return_error(gs_error_ioerror);
96
97
    BytesWritten = WideCharToMultiByte(CP_UTF8, 0, wide_buffer, BytesWritten,
98
        utf8_buffer, LOCALE_TO_UTF8_BUFFER_SIZE, NULL, NULL);
99
    if (BytesWritten == 0)
100
        return_error(gs_error_ioerror);
101
102
    code = pdfi_object_alloc(ctx, PDF_STRING, BytesWritten, (pdf_obj **)output);
103
    if (code < 0)
104
        return code;
105
    pdfi_countup(*output);
106
    memcpy((*output)->data, utf8_buffer, BytesWritten);
107
108
    return 0;
109
#undef LOCALE_TO_UTF8_BUFFER_SIZE
110
}
111
#else
112
/* We have no known method to create a UTF-8 string. Just copy the input and pretend.
113
 */
114
int
115
locale_to_utf8(pdf_context *ctx, pdf_string *input, pdf_string **output)
116
0
{
117
0
    int code = 0;
118
119
0
    code = pdfi_object_alloc(ctx, PDF_STRING, input->length, (pdf_obj **)output);
120
0
    if (code < 0)
121
0
        return code;
122
0
    pdfi_countup(*output);
123
0
    memcpy((*output)->data, input->data, input->length);
124
125
0
    return 0;
126
0
}
127
#endif /* _WINDOWS_ */
128
#endif /* HAVE_LIBIDN */