Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "iso2022.h" |
4 | | #include "sjis.h" |
5 | | #include "big5.h" |
6 | | #include "hz.h" |
7 | | #include "viet.h" |
8 | | #ifdef USE_UNICODE |
9 | | #include "utf8.h" |
10 | | #include "utf7.h" |
11 | | #endif |
12 | | |
13 | | wc_uint8 WC_DETECT_MAP[ 0x100 ] = { |
14 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
15 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
16 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
17 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
18 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
19 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
20 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
21 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
22 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
23 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
24 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
25 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
26 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
27 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
28 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
29 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
30 | | }; |
31 | | |
32 | 16.2k | #define DETECT_NORMAL 0 |
33 | 1.18k | #define DETECT_POSSIBLE 1 |
34 | 17.5k | #define DETECT_OK 2 |
35 | 4.94k | #define DETECT_BROKEN 4 |
36 | 11.1M | #define DETECT_ERROR 8 |
37 | 1.18M | #define SET_DETECT(x,y) ((x) |= (y)) |
38 | 2.84k | #define SET_BROKEN_ERROR(x) ((x) = ((x) & DETECT_BROKEN) ? DETECT_ERROR : ((x) | DETECT_BROKEN)) |
39 | | |
40 | | void |
41 | | wc_create_detect_map(wc_ces ces, wc_bool esc) |
42 | 11.7k | { |
43 | 11.7k | static wc_ces detect_ces = WC_CES_US_ASCII; |
44 | 11.7k | int i; |
45 | | |
46 | 11.7k | if (ces != detect_ces) { |
47 | 9.74k | if (ces & WC_CES_T_VIET) { |
48 | 372 | wc_uint8 *map = NULL; |
49 | 372 | switch (ces) { |
50 | 106 | case WC_CES_TCVN_5712: |
51 | 106 | map = wc_c0_tcvn57122_map; |
52 | 106 | break; |
53 | 254 | case WC_CES_VISCII_11: |
54 | 254 | map = wc_c0_viscii112_map; |
55 | 254 | break; |
56 | 12 | case WC_CES_VPS: |
57 | 12 | map = wc_c0_vps2_map; |
58 | 12 | break; |
59 | 372 | } |
60 | 12.2k | for (i = 0; i < 0x20; i++) |
61 | 11.9k | WC_DETECT_MAP[i] = map[i] ? 1 : 0; |
62 | 9.37k | } else { |
63 | 309k | for (i = 0; i < 0x20; i++) |
64 | 300k | WC_DETECT_MAP[i] = 0; |
65 | 9.37k | WC_DETECT_MAP[WC_C_HZ_TILDA] = (ces == WC_CES_HZ_GB_2312) ? 1 : 0; |
66 | 9.37k | #ifdef USE_UNICODE |
67 | 9.37k | WC_DETECT_MAP[WC_C_UTF7_PLUS] = (ces == WC_CES_UTF_7) ? 1 : 0; |
68 | 9.37k | #endif |
69 | 9.37k | } |
70 | 9.74k | detect_ces = ces; |
71 | 9.74k | } |
72 | 11.7k | WC_DETECT_MAP[WC_C_ESC] = (esc || (ces & WC_CES_T_ISO_2022)) ? 1 : 0; |
73 | 11.7k | return; |
74 | 11.7k | } |
75 | | |
76 | | wc_ces |
77 | | wc_auto_detect(char *is, size_t len, wc_ces hint) |
78 | 11.3k | { |
79 | 11.3k | wc_uchar *p = (wc_uchar *)is; |
80 | 11.3k | wc_uchar *ep = p + len; |
81 | 11.3k | wc_uchar *q; |
82 | 11.3k | wc_ces euc = 0, priv = 0; |
83 | 11.3k | wc_status st; |
84 | 11.3k | int euc_state = 0, sjis_state = 0, big5_state = 0, hz_state = 0; |
85 | 11.3k | int iso_detect = DETECT_ERROR, euc_detect = DETECT_ERROR, |
86 | 11.3k | sjis_detect = DETECT_ERROR, big5_detect = DETECT_ERROR, |
87 | 11.3k | hz_detect = DETECT_ERROR, latin_detect = DETECT_ERROR, |
88 | 11.3k | priv_detect = DETECT_ERROR; |
89 | 11.3k | int possible = 0; |
90 | 11.3k | wc_bool iso2022jp2 = WC_FALSE, iso2022jp3 = WC_FALSE, |
91 | 11.3k | iso2022cn = WC_FALSE, iso2022kr = WC_FALSE, ok = WC_FALSE; |
92 | 11.3k | #ifdef USE_UNICODE |
93 | 11.3k | int utf8_state = 0; |
94 | 11.3k | int utf8_detect = DETECT_ERROR; |
95 | 11.3k | int utf8_next = 0; |
96 | 11.3k | #endif |
97 | | |
98 | 11.3k | wc_create_detect_map(hint, WC_TRUE); |
99 | 1.37M | for (; p < ep && ! WC_DETECT_MAP[*p]; p++) |
100 | 1.35M | ; |
101 | 11.3k | if (p == ep) |
102 | 1.51k | return hint; |
103 | | |
104 | 9.82k | switch (hint) { |
105 | 297 | case WC_CES_ISO_2022_JP: |
106 | 298 | case WC_CES_ISO_2022_JP_2: |
107 | 300 | case WC_CES_ISO_2022_JP_3: |
108 | 1.17k | case WC_CES_EUC_JP: |
109 | 2.27k | case WC_CES_SHIFT_JIS: |
110 | 2.41k | case WC_CES_SHIFT_JISX0213: |
111 | 2.41k | euc = WC_CES_EUC_JP; |
112 | 2.41k | euc_state = WC_EUC_NOSTATE; |
113 | 2.41k | sjis_state = WC_SJIS_NOSTATE; |
114 | 2.41k | iso_detect = euc_detect = sjis_detect = DETECT_NORMAL; |
115 | 2.41k | possible = 3; |
116 | 2.41k | break; |
117 | 36 | case WC_CES_ISO_2022_CN: |
118 | 273 | case WC_CES_EUC_CN: |
119 | 273 | euc = WC_CES_EUC_CN; |
120 | 273 | euc_state = WC_EUC_NOSTATE; |
121 | 273 | big5_state = WC_BIG5_NOSTATE; |
122 | 273 | iso_detect = euc_detect = big5_detect = DETECT_NORMAL; |
123 | 273 | possible = 3; |
124 | 273 | break; |
125 | 86 | case WC_CES_EUC_TW: |
126 | 334 | case WC_CES_BIG5: |
127 | 334 | euc = WC_CES_EUC_TW; |
128 | 334 | euc_state = WC_EUC_NOSTATE; |
129 | 334 | big5_state = WC_BIG5_NOSTATE; |
130 | 334 | iso_detect = euc_detect = big5_detect = DETECT_NORMAL; |
131 | 334 | possible = 3; |
132 | 334 | break; |
133 | 335 | case WC_CES_HZ_GB_2312: |
134 | 335 | euc = WC_CES_EUC_CN; |
135 | 335 | euc_state = WC_EUC_NOSTATE; |
136 | 335 | hz_state = WC_HZ_NOSTATE; |
137 | 335 | iso_detect = euc_detect = big5_detect = hz_detect = DETECT_NORMAL; |
138 | 335 | possible = 4; |
139 | 335 | break; |
140 | 196 | case WC_CES_ISO_2022_KR: |
141 | 197 | case WC_CES_EUC_KR: |
142 | 197 | euc = WC_CES_EUC_KR; |
143 | 197 | euc_state = WC_EUC_NOSTATE; |
144 | 197 | iso_detect = euc_detect = DETECT_NORMAL; |
145 | 197 | possible = 3; |
146 | 197 | break; |
147 | 0 | #ifdef USE_UNICODE |
148 | 1.02k | case WC_CES_UTF_8: |
149 | 1.02k | iso_detect = DETECT_NORMAL; |
150 | 1.02k | possible = 1; |
151 | 1.02k | break; |
152 | 0 | #endif |
153 | 60 | case WC_CES_US_ASCII: |
154 | 60 | iso_detect = latin_detect = DETECT_NORMAL; |
155 | 60 | possible = 2; |
156 | 60 | break; |
157 | 5.19k | default: |
158 | 5.19k | if (hint & WC_CES_T_ISO_8859) { |
159 | 654 | iso_detect = latin_detect = DETECT_NORMAL; |
160 | 654 | possible = 2; |
161 | 4.53k | } else { |
162 | 4.53k | iso_detect = priv_detect = DETECT_NORMAL; |
163 | 4.53k | priv = hint; /* for TVCN, VISCII, VPS */ |
164 | 4.53k | possible = 2; |
165 | 4.53k | } |
166 | 5.19k | break; |
167 | 9.82k | } |
168 | 9.82k | #ifdef USE_UNICODE |
169 | 9.82k | if (priv_detect == DETECT_ERROR) { |
170 | 5.29k | utf8_detect = DETECT_NORMAL; |
171 | 5.29k | possible++; |
172 | 5.29k | } |
173 | 9.82k | #endif |
174 | | |
175 | 9.82k | wc_input_init(WC_CES_US_ASCII, &st); |
176 | | |
177 | 1.13M | for (; p < ep; p++) { |
178 | 1.13M | if (possible == 0 || (possible == 1 && ok)) |
179 | 6.64k | break; |
180 | 1.12M | if (iso_detect != DETECT_ERROR) { |
181 | 83.1k | switch (*p) { |
182 | 11.0k | case WC_C_ESC: |
183 | 11.0k | if (*(p+1) == WC_C_MBCS) { |
184 | 5.37k | q = p; |
185 | 5.37k | if (! wc_parse_iso2022_esc(&q, &st)) |
186 | 1.14k | break; |
187 | 4.22k | if (st.design[0] == WC_CCS_JIS_C_6226 || |
188 | 2.62k | st.design[0] == WC_CCS_JIS_X_0208) |
189 | 2.99k | ; |
190 | 1.23k | else if (st.design[0] == WC_CCS_JIS_X_0213_1 || |
191 | 964 | st.design[0] == WC_CCS_JIS_X_0213_2) |
192 | 382 | iso2022jp3 = WC_TRUE; |
193 | 848 | else if (WC_CCS_TYPE(st.design[0]) == WC_CCS_A_CS94W) |
194 | 500 | iso2022jp2 = WC_TRUE; |
195 | 4.22k | if (st.design[1] == WC_CCS_KS_X_1001) |
196 | 1.26k | iso2022kr = WC_TRUE; |
197 | 2.96k | else if (st.design[1] == WC_CCS_GB_2312 || |
198 | 2.96k | st.design[1] == WC_CCS_ISO_IR_165 || |
199 | 2.51k | st.design[1] == WC_CCS_CNS_11643_1) |
200 | 646 | iso2022cn = WC_TRUE; |
201 | 4.22k | if (WC_CCS_TYPE(st.design[2]) == WC_CCS_A_CS94W || |
202 | 3.98k | WC_CCS_TYPE(st.design[3]) == WC_CCS_A_CS94W) |
203 | 702 | iso2022cn = WC_TRUE; |
204 | 5.71k | } else if (*(p+1) == WC_C_G2_CS96) { |
205 | 695 | q = p; |
206 | 695 | if (! wc_parse_iso2022_esc(&q, &st)) |
207 | 355 | break; |
208 | 340 | if (WC_CCS_TYPE(st.design[2]) == WC_CCS_A_CS96) |
209 | 340 | iso2022jp2 = WC_TRUE; |
210 | 5.02k | } else if (*(p+1) == WC_C_CSWSR) { |
211 | 366 | q = p; |
212 | 366 | if (! wc_parse_iso2022_esc(&q, &st)) |
213 | 288 | break; |
214 | 78 | possible = 0; |
215 | 78 | iso_detect = DETECT_BROKEN; |
216 | 78 | continue; |
217 | 366 | } |
218 | 9.22k | iso_detect = DETECT_OK; |
219 | 9.22k | ok = WC_TRUE; |
220 | 9.22k | break; |
221 | 366 | case WC_C_SI: |
222 | 485 | case WC_C_SO: |
223 | 485 | iso_detect = DETECT_OK; |
224 | 485 | ok = WC_TRUE; |
225 | 485 | iso2022cn = WC_TRUE; |
226 | 485 | iso2022kr = WC_TRUE; |
227 | 485 | break; |
228 | 71.5k | default: |
229 | 71.5k | if (*p & 0x80) { |
230 | 8.73k | iso_detect = DETECT_ERROR; |
231 | 8.73k | possible--; |
232 | 8.73k | } |
233 | 71.5k | break; |
234 | 83.1k | } |
235 | 83.1k | } |
236 | 1.12M | if (euc_detect != DETECT_ERROR) { |
237 | 851k | switch (euc_state) { |
238 | 444k | case WC_EUC_NOSTATE: |
239 | 444k | switch (WC_ISO_MAP[*p]) { |
240 | 406k | case WC_ISO_MAP_GR: |
241 | 406k | euc_state = WC_EUC_MBYTE1; |
242 | 406k | break; |
243 | 300 | case WC_ISO_MAP_SS2: |
244 | 300 | if (euc == WC_CES_EUC_JP) |
245 | 115 | euc_state = WC_EUC_MBYTE1; |
246 | 185 | else if (euc == WC_CES_EUC_TW) |
247 | 178 | euc_state = WC_EUC_TW_SS2; |
248 | 7 | else |
249 | 7 | euc_detect = DETECT_ERROR; |
250 | 300 | break; |
251 | 261 | case WC_ISO_MAP_SS3: |
252 | 261 | if (euc == WC_CES_EUC_JP && |
253 | 244 | WC_ISO_MAP[*(p+1)] == WC_ISO_MAP_GR) |
254 | 217 | ; |
255 | 44 | else |
256 | 44 | euc_detect = DETECT_ERROR; |
257 | 261 | break; |
258 | 930 | case WC_ISO_MAP_C1: |
259 | 1.23k | case WC_ISO_MAP_GR96: |
260 | 1.23k | euc_detect = DETECT_ERROR; |
261 | 1.23k | break; |
262 | 444k | } |
263 | 444k | break; |
264 | 444k | case WC_EUC_MBYTE1: |
265 | 406k | if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR) { |
266 | 403k | SET_DETECT(euc_detect, DETECT_OK); |
267 | 403k | ok = WC_TRUE; |
268 | 403k | } else |
269 | 2.35k | SET_BROKEN_ERROR(euc_detect); |
270 | 406k | euc_state = WC_EUC_NOSTATE; |
271 | 406k | break; |
272 | 168 | case WC_EUC_TW_SS2: |
273 | 168 | if (!( 0xa0 <= *p && *p <= 0xb0) || |
274 | 151 | WC_ISO_MAP[*(p+1)] != WC_ISO_MAP_GR) |
275 | 24 | euc_detect = DETECT_ERROR; |
276 | 168 | euc_state = WC_EUC_NOSTATE; |
277 | 168 | break; |
278 | 851k | } |
279 | 851k | if (euc_detect == DETECT_ERROR) |
280 | 2.03k | possible--; |
281 | 851k | } |
282 | 1.12M | if (sjis_detect != DETECT_ERROR) { |
283 | 619k | switch (sjis_state) { |
284 | 616k | case WC_SJIS_NOSTATE: |
285 | 616k | switch (WC_SJIS_MAP[*p]) { |
286 | 813 | case WC_SJIS_MAP_SL: |
287 | 2.88k | case WC_SJIS_MAP_SH: |
288 | 2.88k | sjis_state = WC_SJIS_SHIFT_L; |
289 | 2.88k | break; |
290 | 568k | case WC_SJIS_MAP_SK: |
291 | 568k | SET_DETECT(sjis_detect, DETECT_POSSIBLE); |
292 | 568k | break; |
293 | 522 | case WC_SJIS_MAP_SX: |
294 | 522 | if (WcOption.use_jisx0213) { |
295 | 0 | sjis_state = WC_SJIS_SHIFT_X; |
296 | 0 | break; |
297 | 0 | } |
298 | 623 | case WC_SJIS_MAP_80: |
299 | 657 | case WC_SJIS_MAP_A0: |
300 | 1.05k | case WC_SJIS_MAP_C1: |
301 | 1.05k | sjis_detect = DETECT_ERROR; |
302 | 1.05k | break; |
303 | 616k | } |
304 | 616k | break; |
305 | 616k | case WC_SJIS_SHIFT_L: |
306 | 2.75k | if (WC_SJIS_MAP[*p] & WC_SJIS_MAP_LB) { |
307 | 2.58k | SET_DETECT(sjis_detect, DETECT_OK); |
308 | 2.58k | ok = WC_TRUE; |
309 | 2.58k | } else |
310 | 176 | SET_BROKEN_ERROR(sjis_detect); |
311 | 2.75k | sjis_state = WC_SJIS_NOSTATE; |
312 | 2.75k | break; |
313 | 0 | case WC_SJIS_SHIFT_X: |
314 | 0 | if (WC_SJIS_MAP[*p] & WC_SJIS_MAP_LB) |
315 | 0 | SET_DETECT(sjis_detect, DETECT_POSSIBLE); |
316 | 0 | else |
317 | 0 | sjis_detect = DETECT_ERROR; |
318 | 0 | sjis_state = WC_SJIS_NOSTATE; |
319 | 0 | break; |
320 | 619k | } |
321 | 619k | if (sjis_detect == DETECT_ERROR) |
322 | 1.08k | possible--; |
323 | 619k | } |
324 | 1.12M | if (big5_detect != DETECT_ERROR) { |
325 | 419k | switch (big5_state) { |
326 | 222k | case WC_BIG5_NOSTATE: |
327 | 222k | switch (WC_BIG5_MAP[*p]) { |
328 | 197k | case WC_BIG5_MAP_UB: |
329 | 197k | big5_state = WC_BIG5_MBYTE1; |
330 | 197k | break; |
331 | 439 | case WC_BIG5_MAP_C1: |
332 | 439 | big5_detect = DETECT_ERROR; |
333 | 439 | break; |
334 | 222k | } |
335 | 222k | break; |
336 | 222k | case WC_BIG5_MBYTE1: |
337 | 197k | if (WC_BIG5_MAP[*p] & WC_BIG5_MAP_LB) { |
338 | 196k | SET_DETECT(big5_detect, DETECT_OK); |
339 | 196k | ok = WC_TRUE; |
340 | 196k | } else |
341 | 314 | SET_BROKEN_ERROR(big5_detect); |
342 | 197k | big5_state = WC_BIG5_NOSTATE; |
343 | 197k | break; |
344 | 419k | } |
345 | 419k | if (big5_detect == DETECT_ERROR) |
346 | 490 | possible--; |
347 | 419k | } |
348 | 1.12M | if (hz_detect != DETECT_ERROR) { |
349 | 7.10k | if (*p & 0x80) { |
350 | 232 | hz_detect = DETECT_ERROR; |
351 | 232 | possible--; |
352 | 6.87k | } else { |
353 | 6.87k | switch (hz_state) { |
354 | 2.79k | case WC_HZ_NOSTATE: |
355 | 2.79k | if (*p == WC_C_HZ_TILDA) |
356 | 1.24k | hz_state = WC_HZ_TILDA; |
357 | 2.79k | break; |
358 | 1.21k | case WC_HZ_TILDA: |
359 | 1.21k | if (*p == WC_C_HZ_SI) |
360 | 671 | hz_state = WC_HZ_MBYTE; |
361 | 548 | else |
362 | 548 | hz_state = WC_HZ_NOSTATE; |
363 | 1.21k | break; |
364 | 1.13k | case WC_HZ_TILDA_MB: |
365 | 1.13k | if (*p == WC_C_HZ_SO) |
366 | 312 | hz_state = WC_HZ_NOSTATE; |
367 | 821 | else |
368 | 821 | hz_state = WC_HZ_MBYTE; |
369 | 1.13k | break; |
370 | 1.45k | case WC_HZ_MBYTE: |
371 | 1.45k | if (*p == WC_C_HZ_TILDA) |
372 | 1.15k | hz_state = WC_HZ_TILDA_MB; |
373 | 296 | else |
374 | 296 | hz_state = WC_HZ_MBYTE1; |
375 | 1.45k | break; |
376 | 280 | case WC_HZ_MBYTE1: |
377 | 280 | hz_detect = DETECT_OK; |
378 | 280 | ok = WC_TRUE; |
379 | 280 | hz_state = WC_HZ_NOSTATE; |
380 | 280 | break; |
381 | 6.87k | } |
382 | 6.87k | } |
383 | 7.10k | } |
384 | 1.12M | if (latin_detect != DETECT_ERROR) { |
385 | 21.0k | switch (WC_ISO_MAP[*p] & WC_ISO_MAP_CG) { |
386 | 1.63k | case WC_ISO_MAP_GR: |
387 | 2.21k | case WC_ISO_MAP_GR96: |
388 | 2.21k | SET_DETECT(latin_detect, DETECT_OK); |
389 | 2.21k | ok = WC_TRUE; |
390 | 2.21k | break; |
391 | 126 | case WC_ISO_MAP_C1: |
392 | 126 | latin_detect = DETECT_ERROR; |
393 | 126 | break; |
394 | 21.0k | } |
395 | 21.0k | if (latin_detect == DETECT_ERROR) |
396 | 126 | possible--; |
397 | 21.0k | } |
398 | 1.12M | if (priv_detect != DETECT_ERROR) { |
399 | 47.4k | if (*p != WC_C_ESC && WC_DETECT_MAP[*p]) { |
400 | 5.48k | SET_DETECT(priv_detect, DETECT_OK); |
401 | 5.48k | ok = WC_TRUE; |
402 | 5.48k | } |
403 | | /* |
404 | | if (priv_detect == DETECT_ERROR) |
405 | | possible--; |
406 | | */ |
407 | 47.4k | } |
408 | 1.12M | #ifdef USE_UNICODE |
409 | 1.12M | if (utf8_detect != DETECT_ERROR) { |
410 | 43.1k | switch (utf8_state) { |
411 | 36.5k | case WC_UTF8_NOSTATE: |
412 | 36.5k | switch (utf8_next = WC_UTF8_MAP[*p]) { |
413 | 26.4k | case 1: |
414 | 30.4k | case 8: |
415 | 30.4k | break; |
416 | 2.16k | case 0: |
417 | 2.53k | case 7: |
418 | 2.53k | utf8_detect = DETECT_ERROR; |
419 | 2.53k | break; |
420 | 3.56k | default: |
421 | 3.56k | utf8_next--; |
422 | 3.56k | utf8_state = WC_UTF8_NEXT; |
423 | 3.56k | break; |
424 | 36.5k | } |
425 | 36.5k | break; |
426 | 36.5k | case WC_UTF8_NEXT: |
427 | 6.60k | if (WC_UTF8_MAP[*p]) { |
428 | 1.21k | utf8_detect = DETECT_ERROR; |
429 | 1.21k | utf8_state = WC_UTF8_NOSTATE; |
430 | 1.21k | break; |
431 | 1.21k | } |
432 | 5.38k | utf8_next--; |
433 | 5.38k | if (! utf8_next) { |
434 | 2.11k | SET_DETECT(utf8_detect, DETECT_OK); |
435 | 2.11k | ok = WC_TRUE; |
436 | 2.11k | utf8_state = WC_UTF8_NOSTATE; |
437 | 2.11k | } |
438 | 5.38k | break; |
439 | 43.1k | } |
440 | 43.1k | if (utf8_detect == DETECT_ERROR) |
441 | 3.75k | possible--; |
442 | 43.1k | } |
443 | 1.12M | #endif |
444 | 1.12M | } |
445 | | |
446 | 9.82k | if (iso_detect != DETECT_ERROR) { |
447 | 1.09k | if (iso_detect == DETECT_NORMAL) { |
448 | 308 | if (hz_detect == DETECT_OK) |
449 | 48 | return WC_CES_HZ_GB_2312; |
450 | 260 | if (priv_detect == DETECT_OK) |
451 | 180 | return priv; |
452 | 80 | return WC_CES_US_ASCII; |
453 | 260 | } |
454 | 789 | switch (euc) { |
455 | 16 | case WC_CES_EUC_CN: |
456 | 19 | case WC_CES_EUC_TW: |
457 | 19 | if (iso2022cn) |
458 | 3 | return WC_CES_ISO_2022_CN; |
459 | 16 | break; |
460 | 16 | case WC_CES_EUC_KR: |
461 | 4 | if (iso2022kr) |
462 | 1 | return WC_CES_ISO_2022_KR; |
463 | 3 | break; |
464 | 789 | } |
465 | 785 | if (iso2022jp3) |
466 | 35 | return WC_CES_ISO_2022_JP_3; |
467 | 750 | if (iso2022jp2) |
468 | 120 | return WC_CES_ISO_2022_JP_2; |
469 | 630 | if (iso2022cn) |
470 | 62 | return WC_CES_ISO_2022_CN; |
471 | 568 | if (iso2022kr) |
472 | 9 | return WC_CES_ISO_2022_KR; |
473 | 559 | return WC_CES_ISO_2022_JP; |
474 | 568 | } |
475 | 8.73k | switch (hint) { |
476 | 292 | case WC_CES_ISO_2022_JP: |
477 | 293 | case WC_CES_ISO_2022_JP_2: |
478 | 295 | case WC_CES_ISO_2022_JP_3: |
479 | 487 | case WC_CES_ISO_2022_KR: |
480 | 523 | case WC_CES_ISO_2022_CN: |
481 | 523 | break; |
482 | 868 | case WC_CES_EUC_JP: |
483 | 1.10k | case WC_CES_EUC_CN: |
484 | 1.18k | case WC_CES_EUC_TW: |
485 | 1.18k | case WC_CES_EUC_KR: |
486 | 1.18k | if (euc_detect != DETECT_ERROR) |
487 | 311 | return hint; |
488 | 877 | break; |
489 | 1.09k | case WC_CES_SHIFT_JIS: |
490 | 1.24k | case WC_CES_SHIFT_JISX0213: |
491 | 1.24k | if (sjis_detect != DETECT_ERROR) |
492 | 785 | return hint; |
493 | 456 | break; |
494 | 456 | case WC_CES_BIG5: |
495 | 246 | if (big5_detect != DETECT_ERROR) |
496 | 109 | return hint; |
497 | 137 | break; |
498 | 137 | #ifdef USE_UNICODE |
499 | 1.02k | case WC_CES_UTF_8: |
500 | 1.02k | return hint; |
501 | 0 | #endif |
502 | 58 | case WC_CES_US_ASCII: |
503 | 58 | #ifdef USE_UNICODE |
504 | 58 | if (utf8_detect != DETECT_ERROR) |
505 | 7 | return hint; |
506 | 51 | #endif |
507 | 51 | if (latin_detect != DETECT_ERROR) |
508 | 49 | return WC_CES_ISO_8859_1; |
509 | 2 | return hint; |
510 | 4.45k | default: |
511 | 4.45k | if (latin_detect != DETECT_ERROR) |
512 | 517 | return hint; |
513 | 3.93k | if (priv_detect != DETECT_ERROR) |
514 | 3.58k | return hint; |
515 | 355 | #ifdef USE_UNICODE |
516 | 355 | if (utf8_detect != DETECT_ERROR) |
517 | 110 | return WC_CES_UTF_8; |
518 | 245 | #endif |
519 | 245 | return hint; |
520 | 8.73k | } |
521 | 1.99k | if (euc_detect == DETECT_OK) |
522 | 278 | return euc; |
523 | 1.71k | if (sjis_detect == DETECT_OK) |
524 | 64 | return WC_CES_SHIFT_JIS; |
525 | 1.65k | if (big5_detect == DETECT_OK) |
526 | 19 | return WC_CES_BIG5; |
527 | 1.63k | #ifdef USE_UNICODE |
528 | 1.63k | if (utf8_detect == DETECT_OK) |
529 | 450 | return WC_CES_UTF_8; |
530 | 1.18k | if (sjis_detect & DETECT_POSSIBLE) |
531 | 296 | return WC_CES_SHIFT_JIS; |
532 | 886 | #endif |
533 | 886 | if (euc_detect != DETECT_ERROR) |
534 | 101 | return euc; |
535 | 785 | if (sjis_detect != DETECT_ERROR) |
536 | 6 | return WC_CES_SHIFT_JIS; |
537 | 779 | if (big5_detect != DETECT_ERROR) |
538 | 5 | return WC_CES_BIG5; |
539 | 774 | #ifdef USE_UNICODE |
540 | 774 | if (utf8_detect != DETECT_ERROR) |
541 | 35 | return WC_CES_UTF_8; |
542 | 739 | #endif |
543 | 739 | return hint; |
544 | 774 | } |