/src/libredwg/src/codepages.c
Line | Count | Source (jump to first uncovered line) |
1 | | /*****************************************************************************/ |
2 | | /* LibreDWG - free implementation of the DWG file format */ |
3 | | /* */ |
4 | | /* Copyright (C) 2023-2025 Free Software Foundation, Inc. */ |
5 | | /* */ |
6 | | /* This library is free software, licensed under the terms of the GNU */ |
7 | | /* General Public License as published by the Free Software Foundation, */ |
8 | | /* either version 3 of the License, or (at your option) any later version. */ |
9 | | /* You should have received a copy of the GNU General Public License */ |
10 | | /* along with this program. If not, see <http://www.gnu.org/licenses/>. */ |
11 | | /*****************************************************************************/ |
12 | | |
13 | | /* |
14 | | * codepages.c: preR2007 codepages support via iconv |
15 | | * written by Reini Urban |
16 | | * |
17 | | * See also the src mappings from https://www.unicode.org/Public/MAPPINGS/ |
18 | | * or the libdxfrw/src/intern/drw_textcodec.cpp mappings. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | #include <string.h> |
23 | | #include <stdint.h> |
24 | | #include <stdlib.h> |
25 | | #include <errno.h> |
26 | | #include <ctype.h> |
27 | | #include <assert.h> |
28 | | #if defined HAVE_ICONV && defined HAVE_ICONV_H |
29 | | # include <iconv.h> |
30 | | #endif |
31 | | #if defined HAVE_WCTYPE_H |
32 | | # include <wctype.h> |
33 | | #endif |
34 | | // #define CODEPAGES_C |
35 | | #include "common.h" |
36 | | #include "codepages.h" |
37 | | |
38 | | #include "codepages/ISO-8859-2.h" |
39 | | #include "codepages/ISO-8859-3.h" |
40 | | #include "codepages/ISO-8859-4.h" |
41 | | #include "codepages/ISO-8859-5.h" |
42 | | #include "codepages/ISO-8859-6.h" |
43 | | #include "codepages/ISO-8859-7.h" |
44 | | #include "codepages/ISO-8859-8.h" |
45 | | #include "codepages/ISO-8859-9.h" |
46 | | #include "codepages/CP437.h" |
47 | | #include "codepages/CP850.h" |
48 | | #include "codepages/CP852.h" |
49 | | #include "codepages/CP855.h" |
50 | | #include "codepages/CP857.h" |
51 | | #include "codepages/CP860.h" |
52 | | #include "codepages/CP861.h" |
53 | | #include "codepages/CP863.h" |
54 | | #include "codepages/CP864.h" |
55 | | #include "codepages/CP865.h" |
56 | | #include "codepages/CP869.h" |
57 | | #include "codepages/CP932.h" |
58 | | #include "codepages/MACINTOSH.h" |
59 | | #include "codepages/BIG5.h" |
60 | | #include "codepages/CP949.h" |
61 | | #include "codepages/JOHAB.h" |
62 | | #include "codepages/CP866.h" |
63 | | #include "codepages/WINDOWS-1250.h" |
64 | | #include "codepages/WINDOWS-1251.h" |
65 | | #include "codepages/WINDOWS-1252.h" |
66 | | #include "codepages/GB2312.h" |
67 | | #include "codepages/WINDOWS-1253.h" |
68 | | #include "codepages/WINDOWS-1254.h" |
69 | | #include "codepages/WINDOWS-1255.h" |
70 | | #include "codepages/WINDOWS-1256.h" |
71 | | #include "codepages/WINDOWS-1257.h" |
72 | | #include "codepages/WINDOWS-874.h" |
73 | | #include "codepages/WINDOWS-932.h" |
74 | | #include "codepages/WINDOWS-936.h" |
75 | | #include "codepages/WINDOWS-949.h" |
76 | | #include "codepages/WINDOWS-950.h" |
77 | | #include "codepages/WINDOWS-1361.h" |
78 | | #include "codepages/WINDOWS-1258.h" |
79 | | |
80 | | static const uint16_t *cp_fntbl[] = { NULL, // UTF8 |
81 | | NULL, // US-ASCII |
82 | | NULL, // ISO-8859-1 |
83 | | cptbl_iso_8859_2, |
84 | | cptbl_iso_8859_3, |
85 | | cptbl_iso_8859_4, |
86 | | cptbl_iso_8859_5, |
87 | | cptbl_iso_8859_6, |
88 | | cptbl_iso_8859_7, |
89 | | cptbl_iso_8859_8, |
90 | | cptbl_iso_8859_9, |
91 | | cptbl_cp437, |
92 | | cptbl_cp850, |
93 | | cptbl_cp852, |
94 | | cptbl_cp855, |
95 | | cptbl_cp857, |
96 | | cptbl_cp860, |
97 | | cptbl_cp861, |
98 | | cptbl_cp863, |
99 | | cptbl_cp864, |
100 | | cptbl_cp865, |
101 | | cptbl_cp869, |
102 | | cptbl_cp932, /* original shiftjis */ |
103 | | cptbl_macintosh, |
104 | | cptbl_big5, |
105 | | cptbl_cp949, /* 25 */ |
106 | | cptbl_johab, /* 26 */ |
107 | | cptbl_cp866, |
108 | | cptbl_windows_1250, |
109 | | cptbl_windows_1251, /* 29 */ |
110 | | cptbl_windows_1252, /* 30 */ |
111 | | cptbl_gb2312, |
112 | | cptbl_windows_1253, |
113 | | cptbl_windows_1254, |
114 | | cptbl_windows_1255, |
115 | | cptbl_windows_1256, |
116 | | cptbl_windows_1257, |
117 | | cptbl_windows_874, |
118 | | cptbl_windows_932, /* windows-31j */ |
119 | | cptbl_windows_936, |
120 | | cptbl_windows_949, |
121 | | cptbl_windows_950, |
122 | | cptbl_windows_1361, /* 42 */ |
123 | | NULL, /* 43 UTF16 */ |
124 | | cptbl_windows_1258, |
125 | | NULL }; |
126 | | |
127 | | static const uint8_t *cp_alnumtbl[] |
128 | | = { NULL, // UTF8 |
129 | | NULL, // US-ASCII |
130 | | NULL, // ISO-8859-1 |
131 | | cptbl_alnum_iso_8859_2, |
132 | | cptbl_alnum_iso_8859_3, |
133 | | cptbl_alnum_iso_8859_4, |
134 | | cptbl_alnum_iso_8859_5, |
135 | | cptbl_alnum_iso_8859_6, |
136 | | cptbl_alnum_iso_8859_7, |
137 | | cptbl_alnum_iso_8859_8, |
138 | | cptbl_alnum_iso_8859_9, |
139 | | cptbl_alnum_cp437, |
140 | | cptbl_alnum_cp850, |
141 | | cptbl_alnum_cp852, |
142 | | cptbl_alnum_cp855, |
143 | | cptbl_alnum_cp857, |
144 | | cptbl_alnum_cp860, |
145 | | cptbl_alnum_cp861, |
146 | | cptbl_alnum_cp863, |
147 | | cptbl_alnum_cp864, |
148 | | cptbl_alnum_cp865, |
149 | | cptbl_alnum_cp869, |
150 | | NULL, // cptbl_alnum_cp932, /* original shiftjis */ |
151 | | cptbl_alnum_macintosh, |
152 | | NULL, // cptbl_alnum_big5, |
153 | | NULL, // cptbl_alnum_cp949, /* 25 */ |
154 | | NULL, // cptbl_alnum_johab, /* 26 */ |
155 | | cptbl_alnum_cp866, |
156 | | cptbl_alnum_windows_1250, |
157 | | cptbl_alnum_windows_1251, /* 29 */ |
158 | | cptbl_alnum_windows_1252, /* 30 */ |
159 | | NULL, // cptbl_alnum_gb2312, |
160 | | cptbl_alnum_windows_1253, |
161 | | cptbl_alnum_windows_1254, |
162 | | cptbl_alnum_windows_1255, |
163 | | cptbl_alnum_windows_1256, |
164 | | cptbl_alnum_windows_1257, |
165 | | cptbl_alnum_windows_874, |
166 | | NULL, // cptbl_alnum_windows_932, /* windows-31j */ |
167 | | NULL, // cptbl_alnum_windows_936, |
168 | | NULL, // cptbl_alnum_windows_949, |
169 | | NULL, // cptbl_alnum_windows_950, |
170 | | NULL, // cptbl_alnum_windows_1361, /* 42 */ |
171 | | NULL, /* 43 UTF16 */ |
172 | | cptbl_alnum_windows_1258, |
173 | | NULL }; |
174 | | |
175 | | static const uint16_t *cp_alnum16tbl[] |
176 | | = { NULL, // UTF8 |
177 | | NULL, // US-ASCII |
178 | | NULL, // ISO-8859-1 |
179 | | NULL, // cptbl_alnum_iso_8859_2, |
180 | | NULL, // cptbl_alnum_iso_8859_3, |
181 | | NULL, // cptbl_alnum_iso_8859_4, |
182 | | NULL, // cptbl_alnum_iso_8859_5, |
183 | | NULL, // cptbl_alnum_iso_8859_6, |
184 | | NULL, // cptbl_alnum_iso_8859_7, |
185 | | NULL, // cptbl_alnum_iso_8859_8, |
186 | | NULL, // cptbl_alnum_iso_8859_9, |
187 | | NULL, // cptbl_alnum_cp437, |
188 | | NULL, // cptbl_alnum_cp850, |
189 | | NULL, // cptbl_alnum_cp852, |
190 | | NULL, // cptbl_alnum_cp855, |
191 | | NULL, // cptbl_alnum_cp857, |
192 | | NULL, // cptbl_alnum_cp860, |
193 | | NULL, // cptbl_alnum_cp861, |
194 | | NULL, // cptbl_alnum_cp863, |
195 | | NULL, // cptbl_alnum_cp864, |
196 | | NULL, // cptbl_alnum_cp865, |
197 | | NULL, // cptbl_alnum_cp869, |
198 | | cptbl_alnum_cp932, /* original shiftjis */ |
199 | | NULL, // cptbl_alnum_macintosh, |
200 | | cptbl_alnum_big5, |
201 | | cptbl_alnum_cp949, /* 25 */ |
202 | | cptbl_alnum_johab, /* 26 */ |
203 | | NULL, // cptbl_alnum_cp866, |
204 | | NULL, // cptbl_alnum_windows_1250, |
205 | | NULL, // cptbl_alnum_windows_1251, /* 29 */ |
206 | | NULL, // cptbl_alnum_windows_1252, /* 30 */ |
207 | | cptbl_alnum_gb2312, |
208 | | NULL, // cptbl_alnum_windows_1253, |
209 | | NULL, // cptbl_alnum_windows_1254, |
210 | | NULL, // cptbl_alnum_windows_1255, |
211 | | NULL, // cptbl_alnum_windows_1256, |
212 | | NULL, // cptbl_alnum_windows_1257, |
213 | | NULL, // cptbl_alnum_windows_874, |
214 | | cptbl_alnum_windows_932, /* windows-31j */ |
215 | | cptbl_alnum_windows_936, |
216 | | cptbl_alnum_windows_949, |
217 | | cptbl_alnum_windows_950, |
218 | | cptbl_alnum_windows_1361, /* 42 */ |
219 | | NULL, /* 43 UTF16 */ |
220 | | NULL, // cptbl_alnum_windows_1258, |
221 | | NULL }; |
222 | | |
223 | | // synced with typedef enum _dwg_codepage in codepages.h |
224 | | #ifdef HAVE_ICONV |
225 | | |
226 | | const char * |
227 | | dwg_codepage_iconvstr (Dwg_Codepage cp) |
228 | 65.8k | { |
229 | | // for iconv |
230 | 65.8k | const char *_codepage_iconvstr[] = { "UTF8", "US-ASCII", |
231 | 65.8k | "ISO-8859-1", "ISO-8859-2", |
232 | 65.8k | "ISO-8859-3", "ISO-8859-4", |
233 | 65.8k | "ISO-8859-5", "ISO-8859-6", |
234 | 65.8k | "ISO-8859-7", "ISO-8859-8", |
235 | 65.8k | "ISO-8859-9", "CP437", |
236 | 65.8k | "CP850", "CP852", |
237 | 65.8k | "CP855", "CP857", |
238 | 65.8k | "CP860", "CP861", |
239 | 65.8k | "CP863", "CP864", |
240 | 65.8k | "CP865", "CP869", |
241 | 65.8k | "CP932", "MACINTOSH", |
242 | 65.8k | "BIG5", "CP949", /* 25 */ |
243 | 65.8k | "JOHAB", "CP866", |
244 | 65.8k | "WINDOWS-1250", "WINDOWS-1251", /* 29 */ |
245 | 65.8k | "WINDOWS-1252", /* 30 */ |
246 | 65.8k | "GB2312", "WINDOWS-1253", |
247 | 65.8k | "WINDOWS-1254", "WINDOWS-1255", |
248 | 65.8k | "WINDOWS-1256", "WINDOWS-1257", |
249 | 65.8k | "WINDOWS-874", "WINDOWS-932", |
250 | 65.8k | "WINDOWS-936", "WINDOWS-949", |
251 | 65.8k | "WINDOWS-950", "WINDOWS-1361", |
252 | 65.8k | "UTF16", /* 43 */ |
253 | 65.8k | "WINDOWS-1258", NULL }; |
254 | 65.8k | if (cp <= CP_ANSI_1258) |
255 | 65.8k | return _codepage_iconvstr[cp]; |
256 | 0 | else |
257 | 0 | return NULL; |
258 | 65.8k | } |
259 | | #endif |
260 | | |
261 | | const char *_codepage_dxfstr[] |
262 | | = { "UTF8", "US_ASCII", "ISO-8859-1", "ISO-8859-2", "ISO-8859-3", |
263 | | "ISO-8859-4", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8", |
264 | | "ISO-8859-9", "CP437", "CP850", "CP852", "CP855", |
265 | | "CP857", "CP860", "CP861", "CP863", "CP864", |
266 | | "CP865", "CP869", "CP932", "MACINTOSH", "BIG5", |
267 | | "CP949", /* 25 */ |
268 | | "JOHAB", "CP866", "ANSI_1250", "ANSI_1251", /* 29 */ |
269 | | "ANSI_1252", /* 30 WesternEurope Windows */ |
270 | | "GB2312", "ANSI_1253", "ANSI_1254", "ANSI_1255", "ANSI_1256", |
271 | | "ANSI_1257", "ANSI_874", "ANSI_932", "ANSI_936", "ANSI_949", |
272 | | "ANSI_950", "ANSI_1361", "UTF16", /* 43 */ |
273 | | "ANSI_1258", NULL }; |
274 | | |
275 | | const char * |
276 | | dwg_codepage_dxfstr (Dwg_Codepage cp) |
277 | 0 | { |
278 | 0 | if (cp <= CP_ANSI_1258) |
279 | 0 | return _codepage_dxfstr[cp]; |
280 | 0 | else if (cp == CP_UNDEFINED) |
281 | 0 | return "undefined"; |
282 | 0 | else |
283 | 0 | return NULL; |
284 | 0 | } |
285 | | |
286 | | Dwg_Codepage |
287 | | dwg_codepage_int (const char *s) |
288 | 259 | { |
289 | 8.02k | for (int i = 0; i <= (int)CP_ANSI_1258; i++) |
290 | 8.02k | { |
291 | 8.02k | if (strEQ (s, _codepage_dxfstr[i])) |
292 | 259 | return (Dwg_Codepage)i; |
293 | 7.77k | if (islower (*s) && 0 == strcasecmp (s, _codepage_dxfstr[i])) |
294 | 0 | return (Dwg_Codepage)i; |
295 | 7.77k | } |
296 | 0 | return CP_UNDEFINED; |
297 | 259 | } |
298 | | |
299 | | /* helper to check if a codepoint exists in the codepage, |
300 | | and convert it to/from unicode. |
301 | | dir = 1: from unicode wc to charset |
302 | | asian = 1: 2-byte CJK charset, else 1-byte (0-255) |
303 | | */ |
304 | | static wchar_t |
305 | | codepage_helper (const Dwg_Codepage codepage, const wchar_t wc, const int dir, |
306 | | const int asian) |
307 | 106k | { |
308 | 106k | const uint16_t *fntbl; |
309 | 106k | uint16_t maxc; |
310 | 106k | assert (codepage != CP_UTF8 && codepage != CP_UTF16 |
311 | 106k | && codepage != CP_US_ASCII && codepage != CP_ISO_8859_1); |
312 | 106k | fntbl = cp_fntbl[codepage]; |
313 | 106k | maxc = fntbl[0]; |
314 | 106k | assert (maxc); |
315 | 106k | if (dir) // from unicode to charset. |
316 | 0 | { // reverse lookup. unsorted rhs values so we cannot bsearch. |
317 | 0 | for (uint16_t i = 0x80; i < maxc; i++) |
318 | 0 | { |
319 | 0 | if (wc == fntbl[i]) |
320 | 0 | return i; |
321 | 0 | } |
322 | 0 | return 0; |
323 | 0 | } |
324 | 106k | else |
325 | 106k | { |
326 | 106k | if (wc < maxc) |
327 | 84.3k | return fntbl[wc]; |
328 | 21.9k | else |
329 | 21.9k | return 0; |
330 | 106k | } |
331 | 106k | } |
332 | | |
333 | | // returns the matching unicode codepoint, |
334 | | // or 0 if the codepage does not contain the character |
335 | | wchar_t |
336 | | dwg_codepage_uc (Dwg_Codepage cp, unsigned char c) |
337 | 0 | { |
338 | 0 | if (c < 128) |
339 | 0 | return (wchar_t)c; |
340 | 0 | else if (cp == CP_US_ASCII) |
341 | 0 | return 0; |
342 | 0 | if (cp == CP_ISO_8859_1 || cp == CP_UTF8 || cp == CP_UTF16) |
343 | 0 | return (wchar_t)c; |
344 | 0 | return codepage_helper (cp, (wchar_t)c, 0, 0); |
345 | 0 | } |
346 | | // for wide asian chars |
347 | | wchar_t |
348 | | dwg_codepage_uwc (Dwg_Codepage cp, uint16_t c) |
349 | 414k | { |
350 | 414k | if (cp == CP_CP864 && c == 0x25) |
351 | 0 | return 0x066a; |
352 | 414k | else if (cp == CP_CP932 && c == 0x5c) |
353 | 0 | return 0x00A5; |
354 | 414k | else if (cp == CP_CP932 && c == 0x7e) |
355 | 0 | return 0x203E; |
356 | 414k | else if (cp == CP_JOHAB && c == 0x5c) |
357 | 0 | return 0x20A9; |
358 | 414k | else if (c < 128 || cp == CP_UTF8 || cp == CP_UTF16) |
359 | 308k | return (wchar_t)c; |
360 | 106k | return codepage_helper (cp, (wchar_t)c, 0, 1); |
361 | 414k | } |
362 | | // returns the matching codepoint, |
363 | | // or 0 if the codepage does not contain the wide character |
364 | | unsigned char |
365 | | dwg_codepage_c (Dwg_Codepage cp, wchar_t wc) |
366 | 0 | { |
367 | 0 | if (wc < 128) |
368 | 0 | { |
369 | 0 | if (cp == CP_US_ASCII || cp == CP_UTF8 || cp == CP_UTF16) |
370 | 0 | return wc & 0xff; |
371 | 0 | } |
372 | 0 | else if (cp == CP_US_ASCII) |
373 | 0 | return 0; |
374 | 0 | if (cp == CP_ISO_8859_1 || cp == CP_UTF8) |
375 | 0 | return wc < 256 ? wc : 0; |
376 | 0 | return (unsigned char)codepage_helper (cp, wc, 1, 0); |
377 | 0 | } |
378 | | // for wide asian chars |
379 | | uint16_t |
380 | | dwg_codepage_wc (Dwg_Codepage cp, wchar_t wc) |
381 | 0 | { |
382 | 0 | if (wc < 128 || cp == CP_UTF8 || cp == CP_UTF16) |
383 | 0 | return wc & 0xffff; |
384 | 0 | return (uint16_t)codepage_helper (cp, wc, 1, 1); |
385 | 0 | } |
386 | | |
387 | | /* for possible wide asian chars: |
388 | | 932 is single-byte for most chars, but 0x8*, 0x9*, 0xE* and 0xF* lead bytes |
389 | | CP949, JOHAB, ANSI_949, 936, 950 for all > 0x8* lead bytes |
390 | | 1361 for all but 0x8[0123], 0xD[4567F], 0xF[A-F] lead bytes |
391 | | BIG5, GB2312 are two-byte only. |
392 | | |
393 | | none have valid 0x00 bytes, so strlen works as before in the TV case. |
394 | | */ |
395 | | bool |
396 | | dwg_codepage_isasian (const Dwg_Codepage cp) |
397 | 856k | { |
398 | 856k | if (cp >= CP_BIG5 && cp <= CP_JOHAB) |
399 | 348k | return true; |
400 | 508k | else if (cp >= CP_ANSI_932 && cp <= CP_ANSI_1258) |
401 | 203k | return true; |
402 | 305k | else if (cp == CP_GB2312) |
403 | 0 | return true; |
404 | 305k | else |
405 | 305k | return false; |
406 | 856k | } |
407 | | |
408 | | static int |
409 | | b8_cmp (const void *a, const void *b) |
410 | 0 | { |
411 | 0 | return *(uint8_t *)a < *(uint8_t *)b ? -1 |
412 | 0 | : *(uint8_t *)a > *(uint8_t *)b ? 1 |
413 | 0 | : 0; |
414 | 0 | } |
415 | | |
416 | | static int |
417 | | b16_cmp (const void *a, const void *b) |
418 | 0 | { |
419 | 0 | return *(uint16_t *)a < *(uint16_t *)b ? -1 |
420 | 0 | : *(uint16_t *)a > *(uint16_t *)b ? 1 |
421 | 0 | : 0; |
422 | 0 | } |
423 | | |
424 | | bool |
425 | | dwg_codepage_isalnum (const Dwg_Codepage cp, const wchar_t c) |
426 | 0 | { |
427 | 0 | if (c < 128) |
428 | 0 | return isalnum ((int)c); |
429 | 0 | switch (cp) |
430 | 0 | { |
431 | 0 | case CP_US_ASCII: |
432 | 0 | return false; |
433 | 0 | case CP_ISO_8859_1: |
434 | 0 | return (c >= 0xC0 && c <= 0xD6) || (c >= 0xD8 && c <= 0xFF); |
435 | 0 | case CP_UTF8: |
436 | 0 | case CP_UTF16: // fallthru |
437 | 0 | #if defined HAVE_WCTYPE_H |
438 | 0 | return iswalnum ((int)c); |
439 | | #else |
440 | | // TODO panic? |
441 | | return false; |
442 | | #endif |
443 | 0 | default: |
444 | 0 | { |
445 | 0 | const uint8_t *fntbl = cp_alnumtbl[cp]; |
446 | 0 | assert (cp != CP_UTF8 && cp != CP_UTF16 && cp != CP_US_ASCII |
447 | 0 | && cp != CP_ISO_8859_1); |
448 | | // 8 or 16bit? |
449 | 0 | if (fntbl) |
450 | 0 | { |
451 | 0 | const uint8_t key = c & 0xff; |
452 | 0 | const uint8_t sz8 = fntbl[0]; |
453 | 0 | const size_t sz = (size_t)sz8; |
454 | 0 | uint8_t *found |
455 | 0 | = (uint8_t *)bsearch (&key, &fntbl[1], sz, 1, b8_cmp); |
456 | 0 | if (!found || found == &fntbl[0]) |
457 | 0 | return false; |
458 | 0 | else |
459 | 0 | return true; |
460 | 0 | } |
461 | 0 | else |
462 | 0 | { |
463 | 0 | const uint16_t key = c & 0xffff; |
464 | 0 | const uint16_t *fntbl16 = cp_alnum16tbl[cp]; |
465 | 0 | const uint16_t sz16 = fntbl16[0]; |
466 | 0 | const size_t sz = (size_t)sz16; |
467 | 0 | uint16_t *found |
468 | 0 | = (uint16_t *)bsearch (&key, &fntbl16[1], sz, 2, b16_cmp); |
469 | 0 | if (!found || found == &fntbl16[0]) |
470 | 0 | return false; |
471 | 0 | else |
472 | 0 | return true; |
473 | 0 | } |
474 | 0 | return false; |
475 | 0 | } |
476 | 0 | } |
477 | 0 | } |
478 | | |
479 | | bool |
480 | | dwg_codepage_is_twobyte (const Dwg_Codepage cp, const unsigned char c) |
481 | 414k | { |
482 | 414k | if (cp == CP_CP932 || cp == CP_ANSI_932) |
483 | 0 | return (c >= 0x80 && c <= 0x9F) || (c >= 0xE0); |
484 | 414k | else if (cp == CP_CP949 || cp == CP_ANSI_949 || cp == CP_ANSI_936 |
485 | 414k | || cp == CP_ANSI_950) |
486 | 106k | return c & 0x80; |
487 | 308k | else if (cp == CP_ANSI_1361) |
488 | 308k | return (c >= 0x80 && c <= 0x83) || (c >= 0xD4 && c <= 0xD7) || (c == 0xDF) |
489 | 308k | || (c >= 0xFA); |
490 | 0 | else if (cp == CP_GB2312 || cp == CP_BIG5) |
491 | 0 | return true; |
492 | 0 | else |
493 | 0 | return false; |
494 | 414k | } |