/src/binutils-gdb/binutils/winduni.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* winduni.c -- unicode support for the windres program. |
2 | | Copyright (C) 1997-2023 Free Software Foundation, Inc. |
3 | | Written by Ian Lance Taylor, Cygnus Support. |
4 | | Rewritten by Kai Tietz, Onevision. |
5 | | |
6 | | This file is part of GNU Binutils. |
7 | | |
8 | | This program is free software; you can redistribute it and/or modify |
9 | | it under the terms of the GNU General Public License as published by |
10 | | the Free Software Foundation; either version 3 of the License, or |
11 | | (at your option) any later version. |
12 | | |
13 | | This program is distributed in the hope that it will be useful, |
14 | | but WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
16 | | GNU General Public License for more details. |
17 | | |
18 | | You should have received a copy of the GNU General Public License |
19 | | along with this program; if not, write to the Free Software |
20 | | Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA |
21 | | 02110-1301, USA. */ |
22 | | |
23 | | |
24 | | /* This file contains unicode support routines for the windres |
25 | | program. Ideally, we would have generic unicode support which |
26 | | would work on all systems. However, we don't. Instead, on a |
27 | | Windows host, we are prepared to call some Windows routines. This |
28 | | means that we will generate different output on Windows and Unix |
29 | | hosts, but that seems better than not really supporting unicode at |
30 | | all. */ |
31 | | |
32 | | #include "sysdep.h" |
33 | | #include "bfd.h" |
34 | | #include "libiberty.h" /* for xstrdup */ |
35 | | #include "bucomm.h" |
36 | | /* Must be include before windows.h and winnls.h. */ |
37 | | #if defined (_WIN32) || defined (__CYGWIN__) |
38 | | #include <windows.h> |
39 | | #include <winnls.h> |
40 | | #endif |
41 | | #include "winduni.h" |
42 | | #include "safe-ctype.h" |
43 | | |
44 | | #if HAVE_ICONV |
45 | | #include <iconv.h> |
46 | | #endif |
47 | | |
48 | | static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type); |
49 | | static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type); |
50 | | static int unichar_isascii (const unichar *, rc_uint_type); |
51 | | |
52 | | /* Convert an ASCII string to a unicode string. We just copy it, |
53 | | expanding chars to shorts, rather than doing something intelligent. */ |
54 | | |
55 | | #if !defined (_WIN32) && !defined (__CYGWIN__) |
56 | | |
57 | | /* Codepages mapped. */ |
58 | | static local_iconv_map codepages[] = |
59 | | { |
60 | | { 0, "cp1252" }, |
61 | | { 1, "WINDOWS-1252" }, |
62 | | { 437, "MS-ANSI" }, |
63 | | { 737, "MS-GREEK" }, |
64 | | { 775, "WINBALTRIM" }, |
65 | | { 850, "MS-ANSI" }, |
66 | | { 852, "MS-EE" }, |
67 | | { 857, "MS-TURK" }, |
68 | | { 862, "CP862" }, |
69 | | { 864, "CP864" }, |
70 | | { 866, "MS-CYRL" }, |
71 | | { 874, "WINDOWS-874" }, |
72 | | { 932, "CP932" }, |
73 | | { 936, "CP936" }, |
74 | | { 949, "CP949" }, |
75 | | { 950, "CP950" }, |
76 | | { 1250, "WINDOWS-1250" }, |
77 | | { 1251, "WINDOWS-1251" }, |
78 | | { 1252, "WINDOWS-1252" }, |
79 | | { 1253, "WINDOWS-1253" }, |
80 | | { 1254, "WINDOWS-1254" }, |
81 | | { 1255, "WINDOWS-1255" }, |
82 | | { 1256, "WINDOWS-1256" }, |
83 | | { 1257, "WINDOWS-1257" }, |
84 | | { 1258, "WINDOWS-1258" }, |
85 | | { CP_UTF7, "UTF-7" }, |
86 | | { CP_UTF8, "UTF-8" }, |
87 | | { CP_UTF16, "UTF-16LE" }, |
88 | | { (rc_uint_type) -1, NULL } |
89 | | }; |
90 | | |
91 | | /* Languages supported. */ |
92 | | static const wind_language_t languages[] = |
93 | | { |
94 | | { 0x0000, 437, 1252, "Neutral", "Neutral" }, |
95 | | { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" }, { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" }, |
96 | | { 0x0403, 850, 1252, "Catalan", "Spain" }, { 0x0404, 950, 950, "Chinese", "Taiwan" }, |
97 | | { 0x0405, 852, 1250, "Czech", "Czech Republic" }, { 0x0406, 850, 1252, "Danish", "Denmark" }, |
98 | | { 0x0407, 850, 1252, "German", "Germany" }, { 0x0408, 737, 1253, "Greek", "Greece" }, |
99 | | { 0x0409, 437, 1252, "English", "United States" }, { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" }, |
100 | | { 0x040B, 850, 1252, "Finnish", "Finland" }, { 0x040C, 850, 1252, "French", "France" }, |
101 | | { 0x040D, 862, 1255, "Hebrew", "Israel" }, { 0x040E, 852, 1250, "Hungarian", "Hungary" }, |
102 | | { 0x040F, 850, 1252, "Icelandic", "Iceland" }, { 0x0410, 850, 1252, "Italian", "Italy" }, |
103 | | { 0x0411, 932, 932, "Japanese", "Japan" }, { 0x0412, 949, 949, "Korean", "Korea (south)" }, |
104 | | { 0x0413, 850, 1252, "Dutch", "Netherlands" }, { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" }, |
105 | | { 0x0415, 852, 1250, "Polish", "Poland" }, { 0x0416, 850, 1252, "Portuguese", "Brazil" }, |
106 | | { 0x0418, 852, 1250, "Romanian", "Romania" }, { 0x0419, 866, 1251, "Russian", "Russia" }, |
107 | | { 0x041A, 852, 1250, "Croatian", "Croatia" }, { 0x041B, 852, 1250, "Slovak", "Slovakia" }, |
108 | | { 0x041C, 852, 1250, "Albanian", "Albania" }, { 0x041D, 850, 1252, "Swedish", "Sweden" }, |
109 | | { 0x041E, 874, 874, "Thai", "Thailand" }, { 0x041F, 857, 1254, "Turkish", "Turkey" }, |
110 | | { 0x0421, 850, 1252, "Indonesian", "Indonesia" }, { 0x0422, 866, 1251, "Ukrainian", "Ukraine" }, |
111 | | { 0x0423, 866, 1251, "Belarusian", "Belarus" }, { 0x0424, 852, 1250, "Slovene", "Slovenia" }, |
112 | | { 0x0425, 775, 1257, "Estonian", "Estonia" }, { 0x0426, 775, 1257, "Latvian", "Latvia" }, |
113 | | { 0x0427, 775, 1257, "Lithuanian", "Lithuania" }, |
114 | | { 0x0429, 864, 1256, "Arabic", "Farsi" }, { 0x042A,1258, 1258, "Vietnamese", "Vietnam" }, |
115 | | { 0x042D, 850, 1252, "Basque", "Spain" }, |
116 | | { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" }, |
117 | | { 0x0436, 850, 1252, "Afrikaans", "South Africa" }, |
118 | | { 0x0438, 850, 1252, "Faroese", "Faroe Islands" }, |
119 | | { 0x043C, 437, 1252, "Irish", "Ireland" }, |
120 | | { 0x043E, 850, 1252, "Malay", "Malaysia" }, |
121 | | { 0x0801, 864, 1256, "Arabic", "Iraq" }, |
122 | | { 0x0804, 936, 936, "Chinese (People's republic of China)", "People's republic of China" }, |
123 | | { 0x0807, 850, 1252, "German", "Switzerland" }, |
124 | | { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" }, |
125 | | { 0x080C, 850, 1252, "French", "Belgium" }, |
126 | | { 0x0810, 850, 1252, "Italian", "Switzerland" }, |
127 | | { 0x0813, 850, 1252, "Dutch", "Belgium" }, { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" }, |
128 | | { 0x0816, 850, 1252, "Portuguese", "Portugal" }, |
129 | | { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" }, |
130 | | { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" }, |
131 | | { 0x0C01, 864, 1256, "Arabic", "Egypt" }, |
132 | | { 0x0C04, 950, 950, "Chinese", "Hong Kong" }, |
133 | | { 0x0C07, 850, 1252, "German", "Austria" }, |
134 | | { 0x0C09, 850, 1252, "English", "Australia" }, { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" }, |
135 | | { 0x0C0C, 850, 1252, "French", "Canada"}, |
136 | | { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" }, |
137 | | { 0x1001, 864, 1256, "Arabic", "Libya" }, |
138 | | { 0x1004, 936, 936, "Chinese", "Singapore" }, |
139 | | { 0x1007, 850, 1252, "German", "Luxembourg" }, |
140 | | { 0x1009, 850, 1252, "English", "Canada" }, |
141 | | { 0x100A, 850, 1252, "Spanish", "Guatemala" }, |
142 | | { 0x100C, 850, 1252, "French", "Switzerland" }, |
143 | | { 0x1401, 864, 1256, "Arabic", "Algeria" }, |
144 | | { 0x1407, 850, 1252, "German", "Liechtenstein" }, |
145 | | { 0x1409, 850, 1252, "English", "New Zealand" }, { 0x140A, 850, 1252, "Spanish", "Costa Rica" }, |
146 | | { 0x140C, 850, 1252, "French", "Luxembourg" }, |
147 | | { 0x1801, 864, 1256, "Arabic", "Morocco" }, |
148 | | { 0x1809, 850, 1252, "English", "Ireland" }, { 0x180A, 850, 1252, "Spanish", "Panama" }, |
149 | | { 0x180C, 850, 1252, "French", "Monaco" }, |
150 | | { 0x1C01, 864, 1256, "Arabic", "Tunisia" }, |
151 | | { 0x1C09, 437, 1252, "English", "South Africa" }, { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" }, |
152 | | { 0x2001, 864, 1256, "Arabic", "Oman" }, |
153 | | { 0x2009, 850, 1252, "English", "Jamaica" }, { 0x200A, 850, 1252, "Spanish", "Venezuela" }, |
154 | | { 0x2401, 864, 1256, "Arabic", "Yemen" }, |
155 | | { 0x2409, 850, 1252, "English", "Caribbean" }, { 0x240A, 850, 1252, "Spanish", "Colombia" }, |
156 | | { 0x2801, 864, 1256, "Arabic", "Syria" }, |
157 | | { 0x2809, 850, 1252, "English", "Belize" }, { 0x280A, 850, 1252, "Spanish", "Peru" }, |
158 | | { 0x2C01, 864, 1256, "Arabic", "Jordan" }, |
159 | | { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" }, |
160 | | { 0x3001, 864, 1256, "Arabic", "Lebanon" }, |
161 | | { 0x3009, 437, 1252, "English", "Zimbabwe" }, { 0x300A, 850, 1252, "Spanish", "Ecuador" }, |
162 | | { 0x3401, 864, 1256, "Arabic", "Kuwait" }, |
163 | | { 0x3409, 437, 1252, "English", "Philippines" }, { 0x340A, 850, 1252, "Spanish", "Chile" }, |
164 | | { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" }, |
165 | | { 0x380A, 850, 1252, "Spanish", "Uruguay" }, |
166 | | { 0x3C01, 864, 1256, "Arabic", "Bahrain" }, |
167 | | { 0x3C0A, 850, 1252, "Spanish", "Paraguay" }, |
168 | | { 0x4001, 864, 1256, "Arabic", "Qatar" }, |
169 | | { 0x400A, 850, 1252, "Spanish", "Bolivia" }, |
170 | | { 0x440A, 850, 1252, "Spanish", "El Salvador" }, |
171 | | { 0x480A, 850, 1252, "Spanish", "Honduras" }, |
172 | | { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" }, |
173 | | { 0x500A, 850, 1252, "Spanish", "Puerto Rico" }, |
174 | | { (unsigned) -1, 0, 0, NULL, NULL } |
175 | | }; |
176 | | |
177 | | #endif |
178 | | |
179 | | /* Specifies the default codepage to be used for unicode |
180 | | transformations. By default this is CP_ACP. */ |
181 | | rc_uint_type wind_default_codepage = CP_ACP; |
182 | | |
183 | | /* Specifies the currently used codepage for unicode |
184 | | transformations. By default this is CP_ACP. */ |
185 | | rc_uint_type wind_current_codepage = CP_ACP; |
186 | | |
187 | | /* Convert an ASCII string to a unicode string. We just copy it, |
188 | | expanding chars to shorts, rather than doing something intelligent. */ |
189 | | |
190 | | void |
191 | | unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii) |
192 | 0 | { |
193 | 0 | unicode_from_codepage (length, unicode, ascii, wind_current_codepage); |
194 | 0 | } |
195 | | |
196 | | /* Convert an ASCII string with length A_LENGTH to a unicode string. We just |
197 | | copy it, expanding chars to shorts, rather than doing something intelligent. |
198 | | This routine converts also \0 within a string. */ |
199 | | |
200 | | void |
201 | | unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length) |
202 | 0 | { |
203 | 0 | char *tmp, *p; |
204 | 0 | rc_uint_type tlen, elen, idx = 0; |
205 | |
|
206 | 0 | *unicode = NULL; |
207 | |
|
208 | 0 | if (!a_length) |
209 | 0 | { |
210 | 0 | if (length) |
211 | 0 | *length = 0; |
212 | 0 | return; |
213 | 0 | } |
214 | | |
215 | | /* Make sure we have zero terminated string. */ |
216 | 0 | p = tmp = (char *) xmalloc (a_length + 1); |
217 | 0 | memcpy (tmp, ascii, a_length); |
218 | 0 | tmp[a_length] = 0; |
219 | |
|
220 | 0 | while (a_length > 0) |
221 | 0 | { |
222 | 0 | unichar *utmp, *up; |
223 | |
|
224 | 0 | tlen = strlen (p); |
225 | |
|
226 | 0 | if (tlen > a_length) |
227 | 0 | tlen = a_length; |
228 | 0 | if (*p == 0) |
229 | 0 | { |
230 | | /* Make room for one more character. */ |
231 | 0 | utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1)); |
232 | 0 | if (idx > 0) |
233 | 0 | { |
234 | 0 | memcpy (utmp, *unicode, idx * sizeof (unichar)); |
235 | 0 | } |
236 | 0 | *unicode = utmp; |
237 | 0 | utmp[idx++] = 0; |
238 | 0 | --a_length; |
239 | 0 | p++; |
240 | 0 | continue; |
241 | 0 | } |
242 | 0 | utmp = NULL; |
243 | 0 | elen = 0; |
244 | 0 | elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0); |
245 | 0 | if (elen) |
246 | 0 | { |
247 | 0 | utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2)); |
248 | 0 | wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen); |
249 | 0 | elen /= sizeof (unichar); |
250 | 0 | elen --; |
251 | 0 | } |
252 | 0 | else |
253 | 0 | { |
254 | | /* Make room for one more character. */ |
255 | 0 | utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1)); |
256 | 0 | if (idx > 0) |
257 | 0 | { |
258 | 0 | memcpy (utmp, *unicode, idx * sizeof (unichar)); |
259 | 0 | } |
260 | 0 | *unicode = utmp; |
261 | 0 | utmp[idx++] = ((unichar) *p) & 0xff; |
262 | 0 | --a_length; |
263 | 0 | p++; |
264 | 0 | continue; |
265 | 0 | } |
266 | 0 | p += tlen; |
267 | 0 | a_length -= tlen; |
268 | |
|
269 | 0 | up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen)); |
270 | 0 | if (idx > 0) |
271 | 0 | memcpy (up, *unicode, idx * sizeof (unichar)); |
272 | |
|
273 | 0 | *unicode = up; |
274 | 0 | if (elen) |
275 | 0 | memcpy (&up[idx], utmp, sizeof (unichar) * elen); |
276 | |
|
277 | 0 | idx += elen; |
278 | 0 | } |
279 | |
|
280 | 0 | if (length) |
281 | 0 | *length = idx; |
282 | |
|
283 | 0 | free (tmp); |
284 | 0 | } |
285 | | |
286 | | /* Convert an unicode string to an ASCII string. We just copy it, |
287 | | shrink shorts to chars, rather than doing something intelligent. |
288 | | Shorts with not within the char range are replaced by '_'. */ |
289 | | |
290 | | void |
291 | | ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii) |
292 | 0 | { |
293 | 0 | codepage_from_unicode (length, unicode, ascii, wind_current_codepage); |
294 | 0 | } |
295 | | |
296 | | /* Print the unicode string UNICODE to the file E. LENGTH is the |
297 | | number of characters to print, or -1 if we should print until the |
298 | | end of the string. FIXME: On a Windows host, we should be calling |
299 | | some Windows function, probably WideCharToMultiByte. */ |
300 | | |
301 | | void |
302 | | unicode_print (FILE *e, const unichar *unicode, rc_uint_type length) |
303 | 0 | { |
304 | 0 | while (1) |
305 | 0 | { |
306 | 0 | unichar ch; |
307 | |
|
308 | 0 | if (length == 0) |
309 | 0 | return; |
310 | 0 | if ((bfd_signed_vma) length > 0) |
311 | 0 | --length; |
312 | |
|
313 | 0 | ch = *unicode; |
314 | |
|
315 | 0 | if (ch == 0 && (bfd_signed_vma) length < 0) |
316 | 0 | return; |
317 | | |
318 | 0 | ++unicode; |
319 | |
|
320 | 0 | if ((ch & 0x7f) == ch) |
321 | 0 | { |
322 | 0 | if (ch == '\\') |
323 | 0 | fputs ("\\\\", e); |
324 | 0 | else if (ch == '"') |
325 | 0 | fputs ("\"\"", e); |
326 | 0 | else if (ISPRINT (ch)) |
327 | 0 | putc (ch, e); |
328 | 0 | else |
329 | 0 | { |
330 | 0 | switch (ch) |
331 | 0 | { |
332 | 0 | case ESCAPE_A: |
333 | 0 | fputs ("\\a", e); |
334 | 0 | break; |
335 | | |
336 | 0 | case ESCAPE_B: |
337 | 0 | fputs ("\\b", e); |
338 | 0 | break; |
339 | | |
340 | 0 | case ESCAPE_F: |
341 | 0 | fputs ("\\f", e); |
342 | 0 | break; |
343 | | |
344 | 0 | case ESCAPE_N: |
345 | 0 | fputs ("\\n", e); |
346 | 0 | break; |
347 | | |
348 | 0 | case ESCAPE_R: |
349 | 0 | fputs ("\\r", e); |
350 | 0 | break; |
351 | | |
352 | 0 | case ESCAPE_T: |
353 | 0 | fputs ("\\t", e); |
354 | 0 | break; |
355 | | |
356 | 0 | case ESCAPE_V: |
357 | 0 | fputs ("\\v", e); |
358 | 0 | break; |
359 | | |
360 | 0 | default: |
361 | 0 | fprintf (e, "\\%03o", (unsigned int) ch); |
362 | 0 | break; |
363 | 0 | } |
364 | 0 | } |
365 | 0 | } |
366 | 0 | else if ((ch & 0xff) == ch) |
367 | 0 | fprintf (e, "\\%03o", (unsigned int) ch); |
368 | 0 | else |
369 | 0 | fprintf (e, "\\x%04x", (unsigned int) ch); |
370 | 0 | } |
371 | 0 | } |
372 | | |
373 | | /* Print a unicode string to a file. */ |
374 | | |
375 | | void |
376 | | ascii_print (FILE *e, const char *s, rc_uint_type length) |
377 | 0 | { |
378 | 0 | while (1) |
379 | 0 | { |
380 | 0 | char ch; |
381 | |
|
382 | 0 | if (length == 0) |
383 | 0 | return; |
384 | 0 | if ((bfd_signed_vma) length > 0) |
385 | 0 | --length; |
386 | |
|
387 | 0 | ch = *s; |
388 | |
|
389 | 0 | if (ch == 0 && (bfd_signed_vma) length < 0) |
390 | 0 | return; |
391 | | |
392 | 0 | ++s; |
393 | |
|
394 | 0 | if ((ch & 0x7f) == ch) |
395 | 0 | { |
396 | 0 | if (ch == '\\') |
397 | 0 | fputs ("\\\\", e); |
398 | 0 | else if (ch == '"') |
399 | 0 | fputs ("\"\"", e); |
400 | 0 | else if (ISPRINT (ch)) |
401 | 0 | putc (ch, e); |
402 | 0 | else |
403 | 0 | { |
404 | 0 | switch (ch) |
405 | 0 | { |
406 | 0 | case ESCAPE_A: |
407 | 0 | fputs ("\\a", e); |
408 | 0 | break; |
409 | | |
410 | 0 | case ESCAPE_B: |
411 | 0 | fputs ("\\b", e); |
412 | 0 | break; |
413 | | |
414 | 0 | case ESCAPE_F: |
415 | 0 | fputs ("\\f", e); |
416 | 0 | break; |
417 | | |
418 | 0 | case ESCAPE_N: |
419 | 0 | fputs ("\\n", e); |
420 | 0 | break; |
421 | | |
422 | 0 | case ESCAPE_R: |
423 | 0 | fputs ("\\r", e); |
424 | 0 | break; |
425 | | |
426 | 0 | case ESCAPE_T: |
427 | 0 | fputs ("\\t", e); |
428 | 0 | break; |
429 | | |
430 | 0 | case ESCAPE_V: |
431 | 0 | fputs ("\\v", e); |
432 | 0 | break; |
433 | | |
434 | 0 | default: |
435 | 0 | fprintf (e, "\\%03o", (unsigned int) ch); |
436 | 0 | break; |
437 | 0 | } |
438 | 0 | } |
439 | 0 | } |
440 | 0 | else |
441 | 0 | fprintf (e, "\\%03o", (unsigned int) ch & 0xff); |
442 | 0 | } |
443 | 0 | } |
444 | | |
445 | | rc_uint_type |
446 | | unichar_len (const unichar *unicode) |
447 | 0 | { |
448 | 0 | rc_uint_type r = 0; |
449 | |
|
450 | 0 | if (unicode) |
451 | 0 | while (unicode[r] != 0) |
452 | 0 | r++; |
453 | 0 | else |
454 | 0 | --r; |
455 | 0 | return r; |
456 | 0 | } |
457 | | |
458 | | unichar * |
459 | | unichar_dup (const unichar *unicode) |
460 | 0 | { |
461 | 0 | unichar *r; |
462 | 0 | int len; |
463 | |
|
464 | 0 | if (! unicode) |
465 | 0 | return NULL; |
466 | 0 | for (len = 0; unicode[len] != 0; ++len) |
467 | 0 | ; |
468 | 0 | ++len; |
469 | 0 | r = ((unichar *) res_alloc (len * sizeof (unichar))); |
470 | 0 | memcpy (r, unicode, len * sizeof (unichar)); |
471 | 0 | return r; |
472 | 0 | } |
473 | | |
474 | | unichar * |
475 | | unichar_dup_uppercase (const unichar *u) |
476 | 0 | { |
477 | 0 | unichar *r = unichar_dup (u); |
478 | 0 | int i; |
479 | |
|
480 | 0 | if (! r) |
481 | 0 | return NULL; |
482 | | |
483 | 0 | for (i = 0; r[i] != 0; ++i) |
484 | 0 | { |
485 | 0 | if (r[i] >= 'a' && r[i] <= 'z') |
486 | 0 | r[i] &= 0xdf; |
487 | 0 | } |
488 | 0 | return r; |
489 | 0 | } |
490 | | |
491 | | static int |
492 | | unichar_isascii (const unichar *u, rc_uint_type len) |
493 | 0 | { |
494 | 0 | rc_uint_type i; |
495 | |
|
496 | 0 | if ((bfd_signed_vma) len < 0) |
497 | 0 | { |
498 | 0 | if (u) |
499 | 0 | len = (rc_uint_type) unichar_len (u); |
500 | 0 | else |
501 | 0 | len = 0; |
502 | 0 | } |
503 | |
|
504 | 0 | for (i = 0; i < len; i++) |
505 | 0 | if ((u[i] & 0xff80) != 0) |
506 | 0 | return 0; |
507 | 0 | return 1; |
508 | 0 | } |
509 | | |
510 | | void |
511 | | unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len) |
512 | 0 | { |
513 | 0 | if (! unichar_isascii (u, len)) |
514 | 0 | fputc ('L', e); |
515 | 0 | fputc ('"', e); |
516 | 0 | unicode_print (e, u, len); |
517 | 0 | fputc ('"', e); |
518 | 0 | } |
519 | | |
520 | | int |
521 | | unicode_is_valid_codepage (rc_uint_type cp) |
522 | 0 | { |
523 | 0 | if ((cp & 0xffff) != cp) |
524 | 0 | return 0; |
525 | 0 | if (cp == CP_UTF16 || cp == CP_ACP) |
526 | 0 | return 1; |
527 | | |
528 | 0 | #if !defined (_WIN32) && !defined (__CYGWIN__) |
529 | 0 | if (! wind_find_codepage_info (cp)) |
530 | 0 | return 0; |
531 | 0 | return 1; |
532 | | #else |
533 | | return !! IsValidCodePage ((UINT) cp); |
534 | | #endif |
535 | 0 | } |
536 | | |
537 | | #if defined (_WIN32) || defined (__CYGWIN__) |
538 | | |
539 | | #define max_cp_string_len 6 |
540 | | |
541 | | static unsigned int |
542 | | codepage_from_langid (unsigned short langid) |
543 | | { |
544 | | char cp_string [max_cp_string_len]; |
545 | | int c; |
546 | | |
547 | | memset (cp_string, 0, max_cp_string_len); |
548 | | /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion, |
549 | | but is unavailable on Win95. */ |
550 | | c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), |
551 | | LOCALE_IDEFAULTANSICODEPAGE, |
552 | | cp_string, max_cp_string_len); |
553 | | /* If codepage data for an LCID is not installed on users's system, |
554 | | GetLocaleInfo returns an empty string. Fall back to system ANSI |
555 | | default. */ |
556 | | if (c == 0) |
557 | | return CP_ACP; |
558 | | return strtoul (cp_string, 0, 10); |
559 | | } |
560 | | |
561 | | static unsigned int |
562 | | wincodepage_from_langid (unsigned short langid) |
563 | | { |
564 | | char cp_string [max_cp_string_len]; |
565 | | int c; |
566 | | |
567 | | memset (cp_string, 0, max_cp_string_len); |
568 | | /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion, |
569 | | but is unavailable on Win95. */ |
570 | | c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), |
571 | | LOCALE_IDEFAULTCODEPAGE, |
572 | | cp_string, max_cp_string_len); |
573 | | /* If codepage data for an LCID is not installed on users's system, |
574 | | GetLocaleInfo returns an empty string. Fall back to system ANSI |
575 | | default. */ |
576 | | if (c == 0) |
577 | | return CP_OEM; |
578 | | return strtoul (cp_string, 0, 10); |
579 | | } |
580 | | |
581 | | static char * |
582 | | lang_from_langid (unsigned short langid) |
583 | | { |
584 | | char cp_string[261]; |
585 | | int c; |
586 | | |
587 | | memset (cp_string, 0, 261); |
588 | | c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), |
589 | | LOCALE_SENGLANGUAGE, |
590 | | cp_string, 260); |
591 | | /* If codepage data for an LCID is not installed on users's system, |
592 | | GetLocaleInfo returns an empty string. Fall back to system ANSI |
593 | | default. */ |
594 | | if (c == 0) |
595 | | strcpy (cp_string, "Neutral"); |
596 | | return xstrdup (cp_string); |
597 | | } |
598 | | |
599 | | static char * |
600 | | country_from_langid (unsigned short langid) |
601 | | { |
602 | | char cp_string[261]; |
603 | | int c; |
604 | | |
605 | | memset (cp_string, 0, 261); |
606 | | c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT), |
607 | | LOCALE_SENGCOUNTRY, |
608 | | cp_string, 260); |
609 | | /* If codepage data for an LCID is not installed on users's system, |
610 | | GetLocaleInfo returns an empty string. Fall back to system ANSI |
611 | | default. */ |
612 | | if (c == 0) |
613 | | strcpy (cp_string, "Neutral"); |
614 | | return xstrdup (cp_string); |
615 | | } |
616 | | |
617 | | #endif |
618 | | |
619 | | const wind_language_t * |
620 | | wind_find_language_by_id (unsigned id) |
621 | 0 | { |
622 | 0 | #if !defined (_WIN32) && !defined (__CYGWIN__) |
623 | 0 | int i; |
624 | |
|
625 | 0 | if (! id) |
626 | 0 | return NULL; |
627 | 0 | for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++) |
628 | 0 | ; |
629 | 0 | if (languages[i].id == id) |
630 | 0 | return &languages[i]; |
631 | 0 | return NULL; |
632 | | #else |
633 | | static wind_language_t wl; |
634 | | |
635 | | wl.id = id; |
636 | | wl.doscp = codepage_from_langid ((unsigned short) id); |
637 | | wl.wincp = wincodepage_from_langid ((unsigned short) id); |
638 | | wl.name = lang_from_langid ((unsigned short) id); |
639 | | wl.country = country_from_langid ((unsigned short) id); |
640 | | |
641 | | return & wl; |
642 | | #endif |
643 | 0 | } |
644 | | |
645 | | const local_iconv_map * |
646 | | wind_find_codepage_info (unsigned cp) |
647 | 0 | { |
648 | 0 | #if !defined (_WIN32) && !defined (__CYGWIN__) |
649 | 0 | int i; |
650 | |
|
651 | 0 | for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++) |
652 | 0 | ; |
653 | 0 | if (codepages[i].codepage == (rc_uint_type) -1) |
654 | 0 | return NULL; |
655 | 0 | return &codepages[i]; |
656 | | #else |
657 | | static local_iconv_map lim; |
658 | | if (!unicode_is_valid_codepage (cp)) |
659 | | return NULL; |
660 | | lim.codepage = cp; |
661 | | lim.iconv_name = ""; |
662 | | return & lim; |
663 | | #endif |
664 | 0 | } |
665 | | |
666 | | /* Convert an Codepage string to a unicode string. */ |
667 | | |
668 | | void |
669 | | unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp) |
670 | 0 | { |
671 | 0 | rc_uint_type len; |
672 | |
|
673 | 0 | len = wind_MultiByteToWideChar (cp, src, NULL, 0); |
674 | 0 | if (len) |
675 | 0 | { |
676 | 0 | *u = ((unichar *) res_alloc (len)); |
677 | 0 | wind_MultiByteToWideChar (cp, src, *u, len); |
678 | 0 | } |
679 | | /* Discount the trailing '/0'. If MultiByteToWideChar failed, |
680 | | this will set *length to -1. */ |
681 | 0 | len -= sizeof (unichar); |
682 | |
|
683 | 0 | if (length != NULL) |
684 | 0 | *length = len / sizeof (unichar); |
685 | 0 | } |
686 | | |
687 | | /* Convert an unicode string to an codepage string. */ |
688 | | |
689 | | void |
690 | | codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp) |
691 | 0 | { |
692 | 0 | rc_uint_type len; |
693 | |
|
694 | 0 | len = wind_WideCharToMultiByte (cp, unicode, NULL, 0); |
695 | 0 | if (len) |
696 | 0 | { |
697 | 0 | *ascii = (char *) res_alloc (len * sizeof (char)); |
698 | 0 | wind_WideCharToMultiByte (cp, unicode, *ascii, len); |
699 | 0 | } |
700 | | /* Discount the trailing '/0'. If MultiByteToWideChar failed, |
701 | | this will set *length to -1. */ |
702 | 0 | len--; |
703 | |
|
704 | 0 | if (length != NULL) |
705 | 0 | *length = len; |
706 | 0 | } |
707 | | |
708 | | #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__) |
709 | | static int |
710 | | iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d) |
711 | 0 | { |
712 | 0 | int i; |
713 | |
|
714 | 0 | for (i = 1; i <= 32; i++) |
715 | 0 | { |
716 | 0 | char *tmp_d = d; |
717 | 0 | ICONV_CONST char *tmp_s = s; |
718 | 0 | size_t ret; |
719 | 0 | size_t s_left = (size_t) i; |
720 | 0 | size_t d_left = (size_t) d_len; |
721 | |
|
722 | 0 | ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left); |
723 | |
|
724 | 0 | if (ret != (size_t) -1) |
725 | 0 | { |
726 | 0 | *n_s = tmp_s; |
727 | 0 | *n_d = tmp_d; |
728 | 0 | return 0; |
729 | 0 | } |
730 | 0 | } |
731 | | |
732 | 0 | return 1; |
733 | 0 | } |
734 | | |
735 | | static const char * |
736 | | wind_iconv_cp (rc_uint_type cp) |
737 | 0 | { |
738 | 0 | const local_iconv_map *lim = wind_find_codepage_info (cp); |
739 | |
|
740 | 0 | if (!lim) |
741 | 0 | return NULL; |
742 | 0 | return lim->iconv_name; |
743 | 0 | } |
744 | | #endif /* HAVE_ICONV */ |
745 | | |
746 | | static rc_uint_type |
747 | | wind_MultiByteToWideChar (rc_uint_type cp, const char *mb, |
748 | | unichar *u, rc_uint_type u_len) |
749 | 0 | { |
750 | 0 | rc_uint_type ret = 0; |
751 | |
|
752 | | #if defined (_WIN32) || defined (__CYGWIN__) |
753 | | rc_uint_type conv_flags = MB_PRECOMPOSED; |
754 | | |
755 | | /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8. |
756 | | MultiByteToWideChar will set the last error to |
757 | | ERROR_INVALID_FLAGS if we do. */ |
758 | | if (cp == CP_UTF8 || cp == CP_UTF7) |
759 | | conv_flags = 0; |
760 | | |
761 | | ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags, |
762 | | mb, -1, u, u_len); |
763 | | /* Convert to bytes. */ |
764 | | ret *= sizeof (unichar); |
765 | | |
766 | | #elif defined (HAVE_ICONV) |
767 | 0 | int first = 1; |
768 | 0 | char tmp[32]; |
769 | 0 | char *p_tmp; |
770 | 0 | const char *iconv_name = wind_iconv_cp (cp); |
771 | |
|
772 | 0 | if (!mb || !iconv_name) |
773 | 0 | return 0; |
774 | 0 | iconv_t cd = iconv_open ("UTF-16LE", iconv_name); |
775 | |
|
776 | 0 | while (1) |
777 | 0 | { |
778 | 0 | int iret; |
779 | 0 | const char *n_mb = ""; |
780 | 0 | char *n_tmp = ""; |
781 | |
|
782 | 0 | p_tmp = tmp; |
783 | 0 | iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp); |
784 | 0 | if (first) |
785 | 0 | { |
786 | 0 | first = 0; |
787 | 0 | continue; |
788 | 0 | } |
789 | 0 | if (!iret) |
790 | 0 | { |
791 | 0 | size_t l_tmp = (size_t) (n_tmp - p_tmp); |
792 | |
|
793 | 0 | if (u) |
794 | 0 | { |
795 | 0 | if ((size_t) u_len < l_tmp) |
796 | 0 | break; |
797 | 0 | memcpy (u, tmp, l_tmp); |
798 | 0 | u += l_tmp/2; |
799 | 0 | u_len -= l_tmp; |
800 | 0 | } |
801 | 0 | ret += l_tmp; |
802 | 0 | } |
803 | 0 | else |
804 | 0 | break; |
805 | 0 | if (tmp[0] == 0 && tmp[1] == 0) |
806 | 0 | break; |
807 | 0 | mb = n_mb; |
808 | 0 | } |
809 | 0 | iconv_close (cd); |
810 | | #else |
811 | | if (cp) |
812 | | ret = 0; |
813 | | ret = strlen (mb) + 1; |
814 | | ret *= sizeof (unichar); |
815 | | if (u != NULL && u_len != 0) |
816 | | { |
817 | | do |
818 | | { |
819 | | *u++ = ((unichar) *mb) & 0xff; |
820 | | --u_len; mb++; |
821 | | } |
822 | | while (u_len != 0 && mb[-1] != 0); |
823 | | } |
824 | | if (u != NULL && u_len != 0) |
825 | | *u = 0; |
826 | | #endif |
827 | 0 | return ret; |
828 | 0 | } |
829 | | |
830 | | static rc_uint_type |
831 | | wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len) |
832 | 0 | { |
833 | 0 | rc_uint_type ret = 0; |
834 | | #if defined (_WIN32) || defined (__CYGWIN__) |
835 | | WINBOOL used_def = false; |
836 | | |
837 | | ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len, |
838 | | NULL, & used_def); |
839 | | #elif defined (HAVE_ICONV) |
840 | 0 | int first = 1; |
841 | 0 | char tmp[32]; |
842 | 0 | char *p_tmp; |
843 | 0 | const char *iconv_name = wind_iconv_cp (cp); |
844 | |
|
845 | 0 | if (!u || !iconv_name) |
846 | 0 | return 0; |
847 | 0 | iconv_t cd = iconv_open (iconv_name, "UTF-16LE"); |
848 | |
|
849 | 0 | while (1) |
850 | 0 | { |
851 | 0 | int iret; |
852 | 0 | const char *n_u = ""; |
853 | 0 | char *n_tmp = ""; |
854 | |
|
855 | 0 | p_tmp = tmp; |
856 | 0 | iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp); |
857 | 0 | if (first) |
858 | 0 | { |
859 | 0 | first = 0; |
860 | 0 | continue; |
861 | 0 | } |
862 | 0 | if (!iret) |
863 | 0 | { |
864 | 0 | size_t l_tmp = (size_t) (n_tmp - p_tmp); |
865 | |
|
866 | 0 | if (mb) |
867 | 0 | { |
868 | 0 | if ((size_t) mb_len < l_tmp) |
869 | 0 | break; |
870 | 0 | memcpy (mb, tmp, l_tmp); |
871 | 0 | mb += l_tmp; |
872 | 0 | mb_len -= l_tmp; |
873 | 0 | } |
874 | 0 | ret += l_tmp; |
875 | 0 | } |
876 | 0 | else |
877 | 0 | break; |
878 | 0 | if (u[0] == 0) |
879 | 0 | break; |
880 | 0 | u = (const unichar *) n_u; |
881 | 0 | } |
882 | 0 | iconv_close (cd); |
883 | | #else |
884 | | if (cp) |
885 | | ret = 0; |
886 | | |
887 | | while (u[ret] != 0) |
888 | | ++ret; |
889 | | |
890 | | ++ret; |
891 | | |
892 | | if (mb) |
893 | | { |
894 | | while (*u != 0 && mb_len != 0) |
895 | | { |
896 | | if (u[0] == (u[0] & 0x7f)) |
897 | | *mb++ = (char) u[0]; |
898 | | else |
899 | | *mb++ = '_'; |
900 | | ++u; --mb_len; |
901 | | } |
902 | | if (mb_len != 0) |
903 | | *mb = 0; |
904 | | } |
905 | | #endif |
906 | 0 | return ret; |
907 | 0 | } |