/src/ghostpdl/pdf/pdf_utf8.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Copyright (C) 2020-2023 Artifex Software, Inc. |
2 | | All Rights Reserved. |
3 | | |
4 | | This software is provided AS-IS with no warranty, either express or |
5 | | implied. |
6 | | |
7 | | This software is distributed under license and may not be copied, |
8 | | modified or distributed except as expressly authorized under the terms |
9 | | of the license contained in the file LICENSE in this distribution. |
10 | | |
11 | | Refer to licensing information at http://www.artifex.com or contact |
12 | | Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco, |
13 | | CA 94129, USA, for further information. |
14 | | */ |
15 | | |
16 | | /* platform-specific string to UTF8 conversion routines */ |
17 | | |
18 | | /* This code is in its own file because it needs to be compiled with |
19 | | * a different set of complier flags to normal when built for MS Windows. |
20 | | * We can't use the /Za flag with a file whcih includes windows.h, disabling |
21 | | * the language extensions breaks the compilation. See pdf/pdf.mak for the |
22 | | * build-specifics. |
23 | | */ |
24 | | |
25 | | #include "ghostpdf.h" |
26 | | #include "pdf_utf8.h" |
27 | | #include "pdf_types.h" |
28 | | #include "pdf_stack.h" |
29 | | |
30 | | #ifdef HAVE_LIBIDN |
31 | | # include <stringprep.h> |
32 | | # include <errno.h> |
33 | | /* Convert a string from the current locale's character set to UTF-8. |
34 | | * <string> .locale_to_utf8 <string> */ |
35 | | int |
36 | | locale_to_utf8(pdf_context *ctx, pdf_string *input, pdf_string **output) |
37 | | { |
38 | | char *out = NULL; |
39 | | int code; |
40 | | |
41 | | out = stringprep_locale_to_utf8((const char *)input->data); |
42 | | if (out == NULL) { |
43 | | /* This function is intended to be used on strings whose |
44 | | * character set is unknown, so it's not an error if the |
45 | | * input contains invalid characters. Just return the input |
46 | | * string unchanged. |
47 | | * |
48 | | * Sadly, EINVAL from stringprep_locale_to_utf8 can mean |
49 | | * either an invalid character set conversion (which we care |
50 | | * about), or an incomplete input string (which we don't). |
51 | | * For now, we ignore EINVAL; the right solution is probably |
52 | | * to not use stringprep_locale_to_utf8, and just call iconv |
53 | | * by hand. */ |
54 | | if (errno == EILSEQ || errno == EINVAL) |
55 | | return 0; |
56 | | |
57 | | /* Other errors (like ENFILE) are real errors, which we |
58 | | * want to return to the user. */ |
59 | | return_error(gs_error_ioerror); |
60 | | } |
61 | | |
62 | | code = pdfi_object_alloc(ctx, PDF_STRING, strlen(out), (pdf_obj **)output); |
63 | | if (code < 0) |
64 | | return code; |
65 | | pdfi_countup(*output); |
66 | | memcpy((*output)->data, out, strlen(out)); |
67 | | |
68 | | free(out); |
69 | | return 0; |
70 | | } |
71 | | #else |
72 | | #ifdef _MSC_VER |
73 | | #include "windows_.h" |
74 | | /* Convert a string from the current locale's character set to UTF-8. |
75 | | * Unfortunately, "current locale" can mean a few different things on |
76 | | * Windows -- we use the default ANSI code page, which does the right |
77 | | * thing for command-line arguments (like "-sPDFPassword=foo") and |
78 | | * for strings typed as input to gswin32.exe. It doesn't work for |
79 | | * strings typed as input to gswin32c.exe, which are normally in the |
80 | | * default OEM code page instead. |
81 | | * <string> .locale_to_utf8 <string> */ |
82 | | int |
83 | | locale_to_utf8(pdf_context *ctx, pdf_string *input, pdf_string **output) |
84 | | { |
85 | | #define LOCALE_TO_UTF8_BUFFER_SIZE 1024 |
86 | | WCHAR wide_buffer[LOCALE_TO_UTF8_BUFFER_SIZE]; |
87 | | char utf8_buffer[LOCALE_TO_UTF8_BUFFER_SIZE]; |
88 | | int code, BytesWritten; |
89 | | |
90 | | *output = NULL; |
91 | | |
92 | | BytesWritten = MultiByteToWideChar(CP_ACP, 0, input->data, input->length, |
93 | | wide_buffer, LOCALE_TO_UTF8_BUFFER_SIZE); |
94 | | if (BytesWritten == 0) |
95 | | return_error(gs_error_ioerror); |
96 | | |
97 | | BytesWritten = WideCharToMultiByte(CP_UTF8, 0, wide_buffer, BytesWritten, |
98 | | utf8_buffer, LOCALE_TO_UTF8_BUFFER_SIZE, NULL, NULL); |
99 | | if (BytesWritten == 0) |
100 | | return_error(gs_error_ioerror); |
101 | | |
102 | | code = pdfi_object_alloc(ctx, PDF_STRING, BytesWritten, (pdf_obj **)output); |
103 | | if (code < 0) |
104 | | return code; |
105 | | pdfi_countup(*output); |
106 | | memcpy((*output)->data, utf8_buffer, BytesWritten); |
107 | | |
108 | | return 0; |
109 | | #undef LOCALE_TO_UTF8_BUFFER_SIZE |
110 | | } |
111 | | #else |
112 | | /* We have no known method to create a UTF-8 string. Just copy the input and pretend. |
113 | | */ |
114 | | int |
115 | | locale_to_utf8(pdf_context *ctx, pdf_string *input, pdf_string **output) |
116 | 0 | { |
117 | 0 | int code = 0; |
118 | |
|
119 | 0 | code = pdfi_object_alloc(ctx, PDF_STRING, input->length, (pdf_obj **)output); |
120 | 0 | if (code < 0) |
121 | 0 | return code; |
122 | 0 | pdfi_countup(*output); |
123 | 0 | memcpy((*output)->data, input->data, input->length); |
124 | |
|
125 | 0 | return 0; |
126 | 0 | } |
127 | | #endif /* _WINDOWS_ */ |
128 | | #endif /* HAVE_LIBIDN */ |