Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "iso2022.h" |
4 | | #include "sjis.h" |
5 | | #include "big5.h" |
6 | | #include "hz.h" |
7 | | #include "viet.h" |
8 | | #ifdef USE_UNICODE |
9 | | #include "utf8.h" |
10 | | #include "utf7.h" |
11 | | #endif |
12 | | |
13 | | wc_uint8 WC_DETECT_MAP[ 0x100 ] = { |
14 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
15 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
16 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
17 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
18 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
19 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
20 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
21 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
22 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
23 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
24 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
25 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
26 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
27 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
28 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
29 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
30 | | }; |
31 | | |
32 | 17.0k | #define DETECT_NORMAL 0 |
33 | 1.25k | #define DETECT_POSSIBLE 1 |
34 | 16.4k | #define DETECT_OK 2 |
35 | 4.95k | #define DETECT_BROKEN 4 |
36 | 4.99M | #define DETECT_ERROR 8 |
37 | 376k | #define SET_DETECT(x,y) ((x) |= (y)) |
38 | 2.84k | #define SET_BROKEN_ERROR(x) ((x) = ((x) & DETECT_BROKEN) ? DETECT_ERROR : ((x) | DETECT_BROKEN)) |
39 | | |
40 | | void |
41 | | wc_create_detect_map(wc_ces ces, wc_bool esc) |
42 | 12.4k | { |
43 | 12.4k | static wc_ces detect_ces = WC_CES_US_ASCII; |
44 | 12.4k | int i; |
45 | | |
46 | 12.4k | if (ces != detect_ces) { |
47 | 10.0k | if (ces & WC_CES_T_VIET) { |
48 | 391 | wc_uint8 *map = NULL; |
49 | 391 | switch (ces) { |
50 | 122 | case WC_CES_TCVN_5712: |
51 | 122 | map = wc_c0_tcvn57122_map; |
52 | 122 | break; |
53 | 261 | case WC_CES_VISCII_11: |
54 | 261 | map = wc_c0_viscii112_map; |
55 | 261 | break; |
56 | 8 | case WC_CES_VPS: |
57 | 8 | map = wc_c0_vps2_map; |
58 | 8 | break; |
59 | 391 | } |
60 | 12.9k | for (i = 0; i < 0x20; i++) |
61 | 12.5k | WC_DETECT_MAP[i] = map[i] ? 1 : 0; |
62 | 9.69k | } else { |
63 | 319k | for (i = 0; i < 0x20; i++) |
64 | 310k | WC_DETECT_MAP[i] = 0; |
65 | 9.69k | WC_DETECT_MAP[WC_C_HZ_TILDA] = (ces == WC_CES_HZ_GB_2312) ? 1 : 0; |
66 | 9.69k | #ifdef USE_UNICODE |
67 | 9.69k | WC_DETECT_MAP[WC_C_UTF7_PLUS] = (ces == WC_CES_UTF_7) ? 1 : 0; |
68 | 9.69k | #endif |
69 | 9.69k | } |
70 | 10.0k | detect_ces = ces; |
71 | 10.0k | } |
72 | 12.4k | WC_DETECT_MAP[WC_C_ESC] = (esc || (ces & WC_CES_T_ISO_2022)) ? 1 : 0; |
73 | 12.4k | return; |
74 | 12.4k | } |
75 | | |
76 | | wc_ces |
77 | | wc_auto_detect(char *is, size_t len, wc_ces hint) |
78 | 12.0k | { |
79 | 12.0k | wc_uchar *p = (wc_uchar *)is; |
80 | 12.0k | wc_uchar *ep = p + len; |
81 | 12.0k | wc_uchar *q; |
82 | 12.0k | wc_ces euc = 0, priv = 0; |
83 | 12.0k | wc_status st; |
84 | 12.0k | int euc_state = 0, sjis_state = 0, big5_state = 0, hz_state = 0; |
85 | 12.0k | int iso_detect = DETECT_ERROR, euc_detect = DETECT_ERROR, |
86 | 12.0k | sjis_detect = DETECT_ERROR, big5_detect = DETECT_ERROR, |
87 | 12.0k | hz_detect = DETECT_ERROR, latin_detect = DETECT_ERROR, |
88 | 12.0k | priv_detect = DETECT_ERROR; |
89 | 12.0k | int possible = 0; |
90 | 12.0k | wc_bool iso2022jp2 = WC_FALSE, iso2022jp3 = WC_FALSE, |
91 | 12.0k | iso2022cn = WC_FALSE, iso2022kr = WC_FALSE, ok = WC_FALSE; |
92 | 12.0k | #ifdef USE_UNICODE |
93 | 12.0k | int utf8_state = 0; |
94 | 12.0k | int utf8_detect = DETECT_ERROR; |
95 | 12.0k | int utf8_next = 0; |
96 | 12.0k | #endif |
97 | | |
98 | 12.0k | wc_create_detect_map(hint, WC_TRUE); |
99 | 25.4k | for (; p < ep && ! WC_DETECT_MAP[*p]; p++) |
100 | 13.3k | ; |
101 | 12.0k | if (p == ep) |
102 | 1.71k | return hint; |
103 | | |
104 | 10.3k | switch (hint) { |
105 | 360 | case WC_CES_ISO_2022_JP: |
106 | 361 | case WC_CES_ISO_2022_JP_2: |
107 | 362 | case WC_CES_ISO_2022_JP_3: |
108 | 1.20k | case WC_CES_EUC_JP: |
109 | 2.40k | case WC_CES_SHIFT_JIS: |
110 | 2.57k | case WC_CES_SHIFT_JISX0213: |
111 | 2.57k | euc = WC_CES_EUC_JP; |
112 | 2.57k | euc_state = WC_EUC_NOSTATE; |
113 | 2.57k | sjis_state = WC_SJIS_NOSTATE; |
114 | 2.57k | iso_detect = euc_detect = sjis_detect = DETECT_NORMAL; |
115 | 2.57k | possible = 3; |
116 | 2.57k | break; |
117 | 42 | case WC_CES_ISO_2022_CN: |
118 | 269 | case WC_CES_EUC_CN: |
119 | 269 | euc = WC_CES_EUC_CN; |
120 | 269 | euc_state = WC_EUC_NOSTATE; |
121 | 269 | big5_state = WC_BIG5_NOSTATE; |
122 | 269 | iso_detect = euc_detect = big5_detect = DETECT_NORMAL; |
123 | 269 | possible = 3; |
124 | 269 | break; |
125 | 79 | case WC_CES_EUC_TW: |
126 | 322 | case WC_CES_BIG5: |
127 | 322 | euc = WC_CES_EUC_TW; |
128 | 322 | euc_state = WC_EUC_NOSTATE; |
129 | 322 | big5_state = WC_BIG5_NOSTATE; |
130 | 322 | iso_detect = euc_detect = big5_detect = DETECT_NORMAL; |
131 | 322 | possible = 3; |
132 | 322 | break; |
133 | 359 | case WC_CES_HZ_GB_2312: |
134 | 359 | euc = WC_CES_EUC_CN; |
135 | 359 | euc_state = WC_EUC_NOSTATE; |
136 | 359 | hz_state = WC_HZ_NOSTATE; |
137 | 359 | iso_detect = euc_detect = big5_detect = hz_detect = DETECT_NORMAL; |
138 | 359 | possible = 4; |
139 | 359 | break; |
140 | 180 | case WC_CES_ISO_2022_KR: |
141 | 181 | case WC_CES_EUC_KR: |
142 | 181 | euc = WC_CES_EUC_KR; |
143 | 181 | euc_state = WC_EUC_NOSTATE; |
144 | 181 | iso_detect = euc_detect = DETECT_NORMAL; |
145 | 181 | possible = 3; |
146 | 181 | break; |
147 | 0 | #ifdef USE_UNICODE |
148 | 1.17k | case WC_CES_UTF_8: |
149 | 1.17k | iso_detect = DETECT_NORMAL; |
150 | 1.17k | possible = 1; |
151 | 1.17k | break; |
152 | 0 | #endif |
153 | 75 | case WC_CES_US_ASCII: |
154 | 75 | iso_detect = latin_detect = DETECT_NORMAL; |
155 | 75 | possible = 2; |
156 | 75 | break; |
157 | 5.37k | default: |
158 | 5.37k | if (hint & WC_CES_T_ISO_8859) { |
159 | 601 | iso_detect = latin_detect = DETECT_NORMAL; |
160 | 601 | possible = 2; |
161 | 4.77k | } else { |
162 | 4.77k | iso_detect = priv_detect = DETECT_NORMAL; |
163 | 4.77k | priv = hint; /* for TVCN, VISCII, VPS */ |
164 | 4.77k | possible = 2; |
165 | 4.77k | } |
166 | 5.37k | break; |
167 | 10.3k | } |
168 | 10.3k | #ifdef USE_UNICODE |
169 | 10.3k | if (priv_detect == DETECT_ERROR) { |
170 | 5.55k | utf8_detect = DETECT_NORMAL; |
171 | 5.55k | possible++; |
172 | 5.55k | } |
173 | 10.3k | #endif |
174 | | |
175 | 10.3k | wc_input_init(WC_CES_US_ASCII, &st); |
176 | | |
177 | 546k | for (; p < ep; p++) { |
178 | 542k | if (possible == 0 || (possible == 1 && ok)) |
179 | 6.83k | break; |
180 | 535k | if (iso_detect != DETECT_ERROR) { |
181 | 63.4k | switch (*p) { |
182 | 10.4k | case WC_C_ESC: |
183 | 10.4k | if (*(p+1) == WC_C_MBCS) { |
184 | 3.85k | q = p; |
185 | 3.85k | if (! wc_parse_iso2022_esc(&q, &st)) |
186 | 1.06k | break; |
187 | 2.78k | if (st.design[0] == WC_CCS_JIS_C_6226 || |
188 | 1.73k | st.design[0] == WC_CCS_JIS_X_0208) |
189 | 1.34k | ; |
190 | 1.44k | else if (st.design[0] == WC_CCS_JIS_X_0213_1 || |
191 | 1.23k | st.design[0] == WC_CCS_JIS_X_0213_2) |
192 | 418 | iso2022jp3 = WC_TRUE; |
193 | 1.02k | else if (WC_CCS_TYPE(st.design[0]) == WC_CCS_A_CS94W) |
194 | 582 | iso2022jp2 = WC_TRUE; |
195 | 2.78k | if (st.design[1] == WC_CCS_KS_X_1001) |
196 | 195 | iso2022kr = WC_TRUE; |
197 | 2.59k | else if (st.design[1] == WC_CCS_GB_2312 || |
198 | 2.59k | st.design[1] == WC_CCS_ISO_IR_165 || |
199 | 2.20k | st.design[1] == WC_CCS_CNS_11643_1) |
200 | 586 | iso2022cn = WC_TRUE; |
201 | 2.78k | if (WC_CCS_TYPE(st.design[2]) == WC_CCS_A_CS94W || |
202 | 2.57k | WC_CCS_TYPE(st.design[3]) == WC_CCS_A_CS94W) |
203 | 438 | iso2022cn = WC_TRUE; |
204 | 6.62k | } else if (*(p+1) == WC_C_G2_CS96) { |
205 | 1.01k | q = p; |
206 | 1.01k | if (! wc_parse_iso2022_esc(&q, &st)) |
207 | 748 | break; |
208 | 271 | if (WC_CCS_TYPE(st.design[2]) == WC_CCS_A_CS96) |
209 | 271 | iso2022jp2 = WC_TRUE; |
210 | 5.60k | } else if (*(p+1) == WC_C_CSWSR) { |
211 | 482 | q = p; |
212 | 482 | if (! wc_parse_iso2022_esc(&q, &st)) |
213 | 400 | break; |
214 | 82 | possible = 0; |
215 | 82 | iso_detect = DETECT_BROKEN; |
216 | 82 | continue; |
217 | 482 | } |
218 | 8.18k | iso_detect = DETECT_OK; |
219 | 8.18k | ok = WC_TRUE; |
220 | 8.18k | break; |
221 | 216 | case WC_C_SI: |
222 | 431 | case WC_C_SO: |
223 | 431 | iso_detect = DETECT_OK; |
224 | 431 | ok = WC_TRUE; |
225 | 431 | iso2022cn = WC_TRUE; |
226 | 431 | iso2022kr = WC_TRUE; |
227 | 431 | break; |
228 | 52.5k | default: |
229 | 52.5k | if (*p & 0x80) { |
230 | 9.15k | iso_detect = DETECT_ERROR; |
231 | 9.15k | possible--; |
232 | 9.15k | } |
233 | 52.5k | break; |
234 | 63.4k | } |
235 | 63.4k | } |
236 | 535k | if (euc_detect != DETECT_ERROR) { |
237 | 171k | switch (euc_state) { |
238 | 94.9k | case WC_EUC_NOSTATE: |
239 | 94.9k | switch (WC_ISO_MAP[*p]) { |
240 | 76.5k | case WC_ISO_MAP_GR: |
241 | 76.5k | euc_state = WC_EUC_MBYTE1; |
242 | 76.5k | break; |
243 | 765 | case WC_ISO_MAP_SS2: |
244 | 765 | if (euc == WC_CES_EUC_JP) |
245 | 470 | euc_state = WC_EUC_MBYTE1; |
246 | 295 | else if (euc == WC_CES_EUC_TW) |
247 | 284 | euc_state = WC_EUC_TW_SS2; |
248 | 11 | else |
249 | 11 | euc_detect = DETECT_ERROR; |
250 | 765 | break; |
251 | 259 | case WC_ISO_MAP_SS3: |
252 | 259 | if (euc == WC_CES_EUC_JP && |
253 | 247 | WC_ISO_MAP[*(p+1)] == WC_ISO_MAP_GR) |
254 | 218 | ; |
255 | 41 | else |
256 | 41 | euc_detect = DETECT_ERROR; |
257 | 259 | break; |
258 | 907 | case WC_ISO_MAP_C1: |
259 | 1.23k | case WC_ISO_MAP_GR96: |
260 | 1.23k | euc_detect = DETECT_ERROR; |
261 | 1.23k | break; |
262 | 94.9k | } |
263 | 94.9k | break; |
264 | 94.9k | case WC_EUC_MBYTE1: |
265 | 76.6k | if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR) { |
266 | 74.2k | SET_DETECT(euc_detect, DETECT_OK); |
267 | 74.2k | ok = WC_TRUE; |
268 | 74.2k | } else |
269 | 2.35k | SET_BROKEN_ERROR(euc_detect); |
270 | 76.6k | euc_state = WC_EUC_NOSTATE; |
271 | 76.6k | break; |
272 | 275 | case WC_EUC_TW_SS2: |
273 | 275 | if (!( 0xa0 <= *p && *p <= 0xb0) || |
274 | 260 | WC_ISO_MAP[*(p+1)] != WC_ISO_MAP_GR) |
275 | 18 | euc_detect = DETECT_ERROR; |
276 | 275 | euc_state = WC_EUC_NOSTATE; |
277 | 275 | break; |
278 | 171k | } |
279 | 171k | if (euc_detect == DETECT_ERROR) |
280 | 2.04k | possible--; |
281 | 171k | } |
282 | 535k | if (sjis_detect != DETECT_ERROR) { |
283 | 340k | switch (sjis_state) { |
284 | 335k | case WC_SJIS_NOSTATE: |
285 | 335k | switch (WC_SJIS_MAP[*p]) { |
286 | 1.20k | case WC_SJIS_MAP_SL: |
287 | 5.05k | case WC_SJIS_MAP_SH: |
288 | 5.05k | sjis_state = WC_SJIS_SHIFT_L; |
289 | 5.05k | break; |
290 | 283k | case WC_SJIS_MAP_SK: |
291 | 283k | SET_DETECT(sjis_detect, DETECT_POSSIBLE); |
292 | 283k | break; |
293 | 511 | case WC_SJIS_MAP_SX: |
294 | 511 | if (WcOption.use_jisx0213) { |
295 | 0 | sjis_state = WC_SJIS_SHIFT_X; |
296 | 0 | break; |
297 | 0 | } |
298 | 613 | case WC_SJIS_MAP_80: |
299 | 649 | case WC_SJIS_MAP_A0: |
300 | 1.08k | case WC_SJIS_MAP_C1: |
301 | 1.08k | sjis_detect = DETECT_ERROR; |
302 | 1.08k | break; |
303 | 335k | } |
304 | 335k | break; |
305 | 335k | case WC_SJIS_SHIFT_L: |
306 | 4.91k | if (WC_SJIS_MAP[*p] & WC_SJIS_MAP_LB) { |
307 | 4.70k | SET_DETECT(sjis_detect, DETECT_OK); |
308 | 4.70k | ok = WC_TRUE; |
309 | 4.70k | } else |
310 | 213 | SET_BROKEN_ERROR(sjis_detect); |
311 | 4.91k | sjis_state = WC_SJIS_NOSTATE; |
312 | 4.91k | break; |
313 | 0 | case WC_SJIS_SHIFT_X: |
314 | 0 | if (WC_SJIS_MAP[*p] & WC_SJIS_MAP_LB) |
315 | 0 | SET_DETECT(sjis_detect, DETECT_POSSIBLE); |
316 | 0 | else |
317 | 0 | sjis_detect = DETECT_ERROR; |
318 | 0 | sjis_state = WC_SJIS_NOSTATE; |
319 | 0 | break; |
320 | 340k | } |
321 | 340k | if (sjis_detect == DETECT_ERROR) |
322 | 1.12k | possible--; |
323 | 340k | } |
324 | 535k | if (big5_detect != DETECT_ERROR) { |
325 | 17.0k | switch (big5_state) { |
326 | 12.5k | case WC_BIG5_NOSTATE: |
327 | 12.5k | switch (WC_BIG5_MAP[*p]) { |
328 | 4.59k | case WC_BIG5_MAP_UB: |
329 | 4.59k | big5_state = WC_BIG5_MBYTE1; |
330 | 4.59k | break; |
331 | 457 | case WC_BIG5_MAP_C1: |
332 | 457 | big5_detect = DETECT_ERROR; |
333 | 457 | break; |
334 | 12.5k | } |
335 | 12.5k | break; |
336 | 12.5k | case WC_BIG5_MBYTE1: |
337 | 4.51k | if (WC_BIG5_MAP[*p] & WC_BIG5_MAP_LB) { |
338 | 4.23k | SET_DETECT(big5_detect, DETECT_OK); |
339 | 4.23k | ok = WC_TRUE; |
340 | 4.23k | } else |
341 | 281 | SET_BROKEN_ERROR(big5_detect); |
342 | 4.51k | big5_state = WC_BIG5_NOSTATE; |
343 | 4.51k | break; |
344 | 17.0k | } |
345 | 17.0k | if (big5_detect == DETECT_ERROR) |
346 | 503 | possible--; |
347 | 17.0k | } |
348 | 535k | if (hz_detect != DETECT_ERROR) { |
349 | 5.84k | if (*p & 0x80) { |
350 | 257 | hz_detect = DETECT_ERROR; |
351 | 257 | possible--; |
352 | 5.58k | } else { |
353 | 5.58k | switch (hz_state) { |
354 | 1.86k | case WC_HZ_NOSTATE: |
355 | 1.86k | if (*p == WC_C_HZ_TILDA) |
356 | 1.11k | hz_state = WC_HZ_TILDA; |
357 | 1.86k | break; |
358 | 1.09k | case WC_HZ_TILDA: |
359 | 1.09k | if (*p == WC_C_HZ_SI) |
360 | 640 | hz_state = WC_HZ_MBYTE; |
361 | 451 | else |
362 | 451 | hz_state = WC_HZ_NOSTATE; |
363 | 1.09k | break; |
364 | 1.03k | case WC_HZ_TILDA_MB: |
365 | 1.03k | if (*p == WC_C_HZ_SO) |
366 | 300 | hz_state = WC_HZ_NOSTATE; |
367 | 735 | else |
368 | 735 | hz_state = WC_HZ_MBYTE; |
369 | 1.03k | break; |
370 | 1.33k | case WC_HZ_MBYTE: |
371 | 1.33k | if (*p == WC_C_HZ_TILDA) |
372 | 1.05k | hz_state = WC_HZ_TILDA_MB; |
373 | 273 | else |
374 | 273 | hz_state = WC_HZ_MBYTE1; |
375 | 1.33k | break; |
376 | 259 | case WC_HZ_MBYTE1: |
377 | 259 | hz_detect = DETECT_OK; |
378 | 259 | ok = WC_TRUE; |
379 | 259 | hz_state = WC_HZ_NOSTATE; |
380 | 259 | break; |
381 | 5.58k | } |
382 | 5.58k | } |
383 | 5.84k | } |
384 | 535k | if (latin_detect != DETECT_ERROR) { |
385 | 4.38k | switch (WC_ISO_MAP[*p] & WC_ISO_MAP_CG) { |
386 | 1.55k | case WC_ISO_MAP_GR: |
387 | 1.97k | case WC_ISO_MAP_GR96: |
388 | 1.97k | SET_DETECT(latin_detect, DETECT_OK); |
389 | 1.97k | ok = WC_TRUE; |
390 | 1.97k | break; |
391 | 121 | case WC_ISO_MAP_C1: |
392 | 121 | latin_detect = DETECT_ERROR; |
393 | 121 | break; |
394 | 4.38k | } |
395 | 4.38k | if (latin_detect == DETECT_ERROR) |
396 | 121 | possible--; |
397 | 4.38k | } |
398 | 535k | if (priv_detect != DETECT_ERROR) { |
399 | 44.3k | if (*p != WC_C_ESC && WC_DETECT_MAP[*p]) { |
400 | 5.18k | SET_DETECT(priv_detect, DETECT_OK); |
401 | 5.18k | ok = WC_TRUE; |
402 | 5.18k | } |
403 | | /* |
404 | | if (priv_detect == DETECT_ERROR) |
405 | | possible--; |
406 | | */ |
407 | 44.3k | } |
408 | 535k | #ifdef USE_UNICODE |
409 | 535k | if (utf8_detect != DETECT_ERROR) { |
410 | 27.7k | switch (utf8_state) { |
411 | 20.7k | case WC_UTF8_NOSTATE: |
412 | 20.7k | switch (utf8_next = WC_UTF8_MAP[*p]) { |
413 | 11.9k | case 1: |
414 | 14.2k | case 8: |
415 | 14.2k | break; |
416 | 2.29k | case 0: |
417 | 2.65k | case 7: |
418 | 2.65k | utf8_detect = DETECT_ERROR; |
419 | 2.65k | break; |
420 | 3.83k | default: |
421 | 3.83k | utf8_next--; |
422 | 3.83k | utf8_state = WC_UTF8_NEXT; |
423 | 3.83k | break; |
424 | 20.7k | } |
425 | 20.7k | break; |
426 | 20.7k | case WC_UTF8_NEXT: |
427 | 6.99k | if (WC_UTF8_MAP[*p]) { |
428 | 1.28k | utf8_detect = DETECT_ERROR; |
429 | 1.28k | utf8_state = WC_UTF8_NOSTATE; |
430 | 1.28k | break; |
431 | 1.28k | } |
432 | 5.71k | utf8_next--; |
433 | 5.71k | if (! utf8_next) { |
434 | 2.31k | SET_DETECT(utf8_detect, DETECT_OK); |
435 | 2.31k | ok = WC_TRUE; |
436 | 2.31k | utf8_state = WC_UTF8_NOSTATE; |
437 | 2.31k | } |
438 | 5.71k | break; |
439 | 27.7k | } |
440 | 27.7k | if (utf8_detect == DETECT_ERROR) |
441 | 3.93k | possible--; |
442 | 27.7k | } |
443 | 535k | #endif |
444 | 535k | } |
445 | | |
446 | 10.3k | if (iso_detect != DETECT_ERROR) { |
447 | 1.17k | if (iso_detect == DETECT_NORMAL) { |
448 | 331 | if (hz_detect == DETECT_OK) |
449 | 46 | return WC_CES_HZ_GB_2312; |
450 | 285 | if (priv_detect == DETECT_OK) |
451 | 200 | return priv; |
452 | 85 | return WC_CES_US_ASCII; |
453 | 285 | } |
454 | 839 | switch (euc) { |
455 | 13 | case WC_CES_EUC_CN: |
456 | 14 | case WC_CES_EUC_TW: |
457 | 14 | if (iso2022cn) |
458 | 4 | return WC_CES_ISO_2022_CN; |
459 | 10 | break; |
460 | 10 | case WC_CES_EUC_KR: |
461 | 3 | if (iso2022kr) |
462 | 1 | return WC_CES_ISO_2022_KR; |
463 | 2 | break; |
464 | 839 | } |
465 | 834 | if (iso2022jp3) |
466 | 35 | return WC_CES_ISO_2022_JP_3; |
467 | 799 | if (iso2022jp2) |
468 | 131 | return WC_CES_ISO_2022_JP_2; |
469 | 668 | if (iso2022cn) |
470 | 59 | return WC_CES_ISO_2022_CN; |
471 | 609 | if (iso2022kr) |
472 | 8 | return WC_CES_ISO_2022_KR; |
473 | 601 | return WC_CES_ISO_2022_JP; |
474 | 609 | } |
475 | 9.15k | switch (hint) { |
476 | 357 | case WC_CES_ISO_2022_JP: |
477 | 358 | case WC_CES_ISO_2022_JP_2: |
478 | 359 | case WC_CES_ISO_2022_JP_3: |
479 | 536 | case WC_CES_ISO_2022_KR: |
480 | 578 | case WC_CES_ISO_2022_CN: |
481 | 578 | break; |
482 | 838 | case WC_CES_EUC_JP: |
483 | 1.06k | case WC_CES_EUC_CN: |
484 | 1.14k | case WC_CES_EUC_TW: |
485 | 1.14k | case WC_CES_EUC_KR: |
486 | 1.14k | if (euc_detect != DETECT_ERROR) |
487 | 337 | return hint; |
488 | 806 | break; |
489 | 1.19k | case WC_CES_SHIFT_JIS: |
490 | 1.36k | case WC_CES_SHIFT_JISX0213: |
491 | 1.36k | if (sjis_detect != DETECT_ERROR) |
492 | 859 | return hint; |
493 | 504 | break; |
494 | 504 | case WC_CES_BIG5: |
495 | 242 | if (big5_detect != DETECT_ERROR) |
496 | 109 | return hint; |
497 | 133 | break; |
498 | 133 | #ifdef USE_UNICODE |
499 | 1.17k | case WC_CES_UTF_8: |
500 | 1.17k | return hint; |
501 | 0 | #endif |
502 | 73 | case WC_CES_US_ASCII: |
503 | 73 | #ifdef USE_UNICODE |
504 | 73 | if (utf8_detect != DETECT_ERROR) |
505 | 5 | return hint; |
506 | 68 | #endif |
507 | 68 | if (latin_detect != DETECT_ERROR) |
508 | 66 | return WC_CES_ISO_8859_1; |
509 | 2 | return hint; |
510 | 4.58k | default: |
511 | 4.58k | if (latin_detect != DETECT_ERROR) |
512 | 474 | return hint; |
513 | 4.10k | if (priv_detect != DETECT_ERROR) |
514 | 3.73k | return hint; |
515 | 375 | #ifdef USE_UNICODE |
516 | 375 | if (utf8_detect != DETECT_ERROR) |
517 | 122 | return WC_CES_UTF_8; |
518 | 253 | #endif |
519 | 253 | return hint; |
520 | 9.15k | } |
521 | 2.02k | if (euc_detect == DETECT_OK) |
522 | 319 | return euc; |
523 | 1.70k | if (sjis_detect == DETECT_OK) |
524 | 58 | return WC_CES_SHIFT_JIS; |
525 | 1.64k | if (big5_detect == DETECT_OK) |
526 | 19 | return WC_CES_BIG5; |
527 | 1.62k | #ifdef USE_UNICODE |
528 | 1.62k | if (utf8_detect == DETECT_OK) |
529 | 374 | return WC_CES_UTF_8; |
530 | 1.25k | if (sjis_detect & DETECT_POSSIBLE) |
531 | 344 | return WC_CES_SHIFT_JIS; |
532 | 907 | #endif |
533 | 907 | if (euc_detect != DETECT_ERROR) |
534 | 109 | return euc; |
535 | 798 | if (sjis_detect != DETECT_ERROR) |
536 | 8 | return WC_CES_SHIFT_JIS; |
537 | 790 | if (big5_detect != DETECT_ERROR) |
538 | 5 | return WC_CES_BIG5; |
539 | 785 | #ifdef USE_UNICODE |
540 | 785 | if (utf8_detect != DETECT_ERROR) |
541 | 12 | return WC_CES_UTF_8; |
542 | 773 | #endif |
543 | 773 | return hint; |
544 | 785 | } |