Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | #include "wc.h" |
3 | | #include "iso2022.h" |
4 | | #include "sjis.h" |
5 | | #include "big5.h" |
6 | | #include "hz.h" |
7 | | #include "viet.h" |
8 | | #ifdef USE_UNICODE |
9 | | #include "utf8.h" |
10 | | #include "utf7.h" |
11 | | #endif |
12 | | |
13 | | wc_uint8 WC_DETECT_MAP[ 0x100 ] = { |
14 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
15 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
16 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
17 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
18 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
19 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
20 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
21 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
22 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
23 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
24 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
25 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
26 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
27 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
28 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
29 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
30 | | }; |
31 | | |
32 | 18.3k | #define DETECT_NORMAL 0 |
33 | 1.26k | #define DETECT_POSSIBLE 1 |
34 | 18.0k | #define DETECT_OK 2 |
35 | 4.92k | #define DETECT_BROKEN 4 |
36 | 6.13M | #define DETECT_ERROR 8 |
37 | 498k | #define SET_DETECT(x,y) ((x) |= (y)) |
38 | 2.82k | #define SET_BROKEN_ERROR(x) ((x) = ((x) & DETECT_BROKEN) ? DETECT_ERROR : ((x) | DETECT_BROKEN)) |
39 | | |
40 | | void |
41 | | wc_create_detect_map(wc_ces ces, wc_bool esc) |
42 | 13.1k | { |
43 | 13.1k | static wc_ces detect_ces = WC_CES_US_ASCII; |
44 | 13.1k | int i; |
45 | | |
46 | 13.1k | if (ces != detect_ces) { |
47 | 10.5k | if (ces & WC_CES_T_VIET) { |
48 | 369 | wc_uint8 *map = NULL; |
49 | 369 | switch (ces) { |
50 | 104 | case WC_CES_TCVN_5712: |
51 | 104 | map = wc_c0_tcvn57122_map; |
52 | 104 | break; |
53 | 253 | case WC_CES_VISCII_11: |
54 | 253 | map = wc_c0_viscii112_map; |
55 | 253 | break; |
56 | 12 | case WC_CES_VPS: |
57 | 12 | map = wc_c0_vps2_map; |
58 | 12 | break; |
59 | 369 | } |
60 | 12.1k | for (i = 0; i < 0x20; i++) |
61 | 11.8k | WC_DETECT_MAP[i] = map[i] ? 1 : 0; |
62 | 10.1k | } else { |
63 | 335k | for (i = 0; i < 0x20; i++) |
64 | 325k | WC_DETECT_MAP[i] = 0; |
65 | 10.1k | WC_DETECT_MAP[WC_C_HZ_TILDA] = (ces == WC_CES_HZ_GB_2312) ? 1 : 0; |
66 | 10.1k | #ifdef USE_UNICODE |
67 | 10.1k | WC_DETECT_MAP[WC_C_UTF7_PLUS] = (ces == WC_CES_UTF_7) ? 1 : 0; |
68 | 10.1k | #endif |
69 | 10.1k | } |
70 | 10.5k | detect_ces = ces; |
71 | 10.5k | } |
72 | 13.1k | WC_DETECT_MAP[WC_C_ESC] = (esc || (ces & WC_CES_T_ISO_2022)) ? 1 : 0; |
73 | 13.1k | return; |
74 | 13.1k | } |
75 | | |
76 | | wc_ces |
77 | | wc_auto_detect(char *is, size_t len, wc_ces hint) |
78 | 12.7k | { |
79 | 12.7k | wc_uchar *p = (wc_uchar *)is; |
80 | 12.7k | wc_uchar *ep = p + len; |
81 | 12.7k | wc_uchar *q; |
82 | 12.7k | wc_ces euc = 0, priv = 0; |
83 | 12.7k | wc_status st; |
84 | 12.7k | int euc_state = 0, sjis_state = 0, big5_state = 0, hz_state = 0; |
85 | 12.7k | int iso_detect = DETECT_ERROR, euc_detect = DETECT_ERROR, |
86 | 12.7k | sjis_detect = DETECT_ERROR, big5_detect = DETECT_ERROR, |
87 | 12.7k | hz_detect = DETECT_ERROR, latin_detect = DETECT_ERROR, |
88 | 12.7k | priv_detect = DETECT_ERROR; |
89 | 12.7k | int possible = 0; |
90 | 12.7k | wc_bool iso2022jp2 = WC_FALSE, iso2022jp3 = WC_FALSE, |
91 | 12.7k | iso2022cn = WC_FALSE, iso2022kr = WC_FALSE, ok = WC_FALSE; |
92 | 12.7k | #ifdef USE_UNICODE |
93 | 12.7k | int utf8_state = 0; |
94 | 12.7k | int utf8_detect = DETECT_ERROR; |
95 | 12.7k | int utf8_next = 0; |
96 | 12.7k | #endif |
97 | | |
98 | 12.7k | wc_create_detect_map(hint, WC_TRUE); |
99 | 886k | for (; p < ep && ! WC_DETECT_MAP[*p]; p++) |
100 | 873k | ; |
101 | 12.7k | if (p == ep) |
102 | 1.84k | return hint; |
103 | | |
104 | 10.9k | switch (hint) { |
105 | 455 | case WC_CES_ISO_2022_JP: |
106 | 456 | case WC_CES_ISO_2022_JP_2: |
107 | 458 | case WC_CES_ISO_2022_JP_3: |
108 | 1.13k | case WC_CES_EUC_JP: |
109 | 2.41k | case WC_CES_SHIFT_JIS: |
110 | 2.59k | case WC_CES_SHIFT_JISX0213: |
111 | 2.59k | euc = WC_CES_EUC_JP; |
112 | 2.59k | euc_state = WC_EUC_NOSTATE; |
113 | 2.59k | sjis_state = WC_SJIS_NOSTATE; |
114 | 2.59k | iso_detect = euc_detect = sjis_detect = DETECT_NORMAL; |
115 | 2.59k | possible = 3; |
116 | 2.59k | break; |
117 | 63 | case WC_CES_ISO_2022_CN: |
118 | 303 | case WC_CES_EUC_CN: |
119 | 303 | euc = WC_CES_EUC_CN; |
120 | 303 | euc_state = WC_EUC_NOSTATE; |
121 | 303 | big5_state = WC_BIG5_NOSTATE; |
122 | 303 | iso_detect = euc_detect = big5_detect = DETECT_NORMAL; |
123 | 303 | possible = 3; |
124 | 303 | break; |
125 | 95 | case WC_CES_EUC_TW: |
126 | 389 | case WC_CES_BIG5: |
127 | 389 | euc = WC_CES_EUC_TW; |
128 | 389 | euc_state = WC_EUC_NOSTATE; |
129 | 389 | big5_state = WC_BIG5_NOSTATE; |
130 | 389 | iso_detect = euc_detect = big5_detect = DETECT_NORMAL; |
131 | 389 | possible = 3; |
132 | 389 | break; |
133 | 379 | case WC_CES_HZ_GB_2312: |
134 | 379 | euc = WC_CES_EUC_CN; |
135 | 379 | euc_state = WC_EUC_NOSTATE; |
136 | 379 | hz_state = WC_HZ_NOSTATE; |
137 | 379 | iso_detect = euc_detect = big5_detect = hz_detect = DETECT_NORMAL; |
138 | 379 | possible = 4; |
139 | 379 | break; |
140 | 196 | case WC_CES_ISO_2022_KR: |
141 | 197 | case WC_CES_EUC_KR: |
142 | 197 | euc = WC_CES_EUC_KR; |
143 | 197 | euc_state = WC_EUC_NOSTATE; |
144 | 197 | iso_detect = euc_detect = DETECT_NORMAL; |
145 | 197 | possible = 3; |
146 | 197 | break; |
147 | 0 | #ifdef USE_UNICODE |
148 | 1.43k | case WC_CES_UTF_8: |
149 | 1.43k | iso_detect = DETECT_NORMAL; |
150 | 1.43k | possible = 1; |
151 | 1.43k | break; |
152 | 0 | #endif |
153 | 57 | case WC_CES_US_ASCII: |
154 | 57 | iso_detect = latin_detect = DETECT_NORMAL; |
155 | 57 | possible = 2; |
156 | 57 | break; |
157 | 5.54k | default: |
158 | 5.54k | if (hint & WC_CES_T_ISO_8859) { |
159 | 783 | iso_detect = latin_detect = DETECT_NORMAL; |
160 | 783 | possible = 2; |
161 | 4.76k | } else { |
162 | 4.76k | iso_detect = priv_detect = DETECT_NORMAL; |
163 | 4.76k | priv = hint; /* for TVCN, VISCII, VPS */ |
164 | 4.76k | possible = 2; |
165 | 4.76k | } |
166 | 5.54k | break; |
167 | 10.9k | } |
168 | 10.9k | #ifdef USE_UNICODE |
169 | 10.9k | if (priv_detect == DETECT_ERROR) { |
170 | 6.13k | utf8_detect = DETECT_NORMAL; |
171 | 6.13k | possible++; |
172 | 6.13k | } |
173 | 10.9k | #endif |
174 | | |
175 | 10.9k | wc_input_init(WC_CES_US_ASCII, &st); |
176 | | |
177 | 666k | for (; p < ep; p++) { |
178 | 662k | if (possible == 0 || (possible == 1 && ok)) |
179 | 7.04k | break; |
180 | 655k | if (iso_detect != DETECT_ERROR) { |
181 | 88.0k | switch (*p) { |
182 | 12.4k | case WC_C_ESC: |
183 | 12.4k | if (*(p+1) == WC_C_MBCS) { |
184 | 5.08k | q = p; |
185 | 5.08k | if (! wc_parse_iso2022_esc(&q, &st)) |
186 | 1.74k | break; |
187 | 3.34k | if (st.design[0] == WC_CCS_JIS_C_6226 || |
188 | 3.34k | st.design[0] == WC_CCS_JIS_X_0208) |
189 | 1.63k | ; |
190 | 1.71k | else if (st.design[0] == WC_CCS_JIS_X_0213_1 || |
191 | 1.71k | st.design[0] == WC_CCS_JIS_X_0213_2) |
192 | 442 | iso2022jp3 = WC_TRUE; |
193 | 1.26k | else if (WC_CCS_TYPE(st.design[0]) == WC_CCS_A_CS94W) |
194 | 439 | iso2022jp2 = WC_TRUE; |
195 | 3.34k | if (st.design[1] == WC_CCS_KS_X_1001) |
196 | 228 | iso2022kr = WC_TRUE; |
197 | 3.11k | else if (st.design[1] == WC_CCS_GB_2312 || |
198 | 3.11k | st.design[1] == WC_CCS_ISO_IR_165 || |
199 | 3.11k | st.design[1] == WC_CCS_CNS_11643_1) |
200 | 663 | iso2022cn = WC_TRUE; |
201 | 3.34k | if (WC_CCS_TYPE(st.design[2]) == WC_CCS_A_CS94W || |
202 | 3.34k | WC_CCS_TYPE(st.design[3]) == WC_CCS_A_CS94W) |
203 | 535 | iso2022cn = WC_TRUE; |
204 | 7.33k | } else if (*(p+1) == WC_C_G2_CS96) { |
205 | 895 | q = p; |
206 | 895 | if (! wc_parse_iso2022_esc(&q, &st)) |
207 | 567 | break; |
208 | 328 | if (WC_CCS_TYPE(st.design[2]) == WC_CCS_A_CS96) |
209 | 328 | iso2022jp2 = WC_TRUE; |
210 | 6.44k | } else if (*(p+1) == WC_C_CSWSR) { |
211 | 485 | q = p; |
212 | 485 | if (! wc_parse_iso2022_esc(&q, &st)) |
213 | 382 | break; |
214 | 103 | possible = 0; |
215 | 103 | iso_detect = DETECT_BROKEN; |
216 | 103 | continue; |
217 | 485 | } |
218 | 9.62k | iso_detect = DETECT_OK; |
219 | 9.62k | ok = WC_TRUE; |
220 | 9.62k | break; |
221 | 452 | case WC_C_SI: |
222 | 692 | case WC_C_SO: |
223 | 692 | iso_detect = DETECT_OK; |
224 | 692 | ok = WC_TRUE; |
225 | 692 | iso2022cn = WC_TRUE; |
226 | 692 | iso2022kr = WC_TRUE; |
227 | 692 | break; |
228 | 74.9k | default: |
229 | 74.9k | if (*p & 0x80) { |
230 | 9.61k | iso_detect = DETECT_ERROR; |
231 | 9.61k | possible--; |
232 | 9.61k | } |
233 | 74.9k | break; |
234 | 88.0k | } |
235 | 88.0k | } |
236 | 655k | if (euc_detect != DETECT_ERROR) { |
237 | 307k | switch (euc_state) { |
238 | 162k | case WC_EUC_NOSTATE: |
239 | 162k | switch (WC_ISO_MAP[*p]) { |
240 | 144k | case WC_ISO_MAP_GR: |
241 | 144k | euc_state = WC_EUC_MBYTE1; |
242 | 144k | break; |
243 | 705 | case WC_ISO_MAP_SS2: |
244 | 705 | if (euc == WC_CES_EUC_JP) |
245 | 383 | euc_state = WC_EUC_MBYTE1; |
246 | 322 | else if (euc == WC_CES_EUC_TW) |
247 | 311 | euc_state = WC_EUC_TW_SS2; |
248 | 11 | else |
249 | 11 | euc_detect = DETECT_ERROR; |
250 | 705 | break; |
251 | 122 | case WC_ISO_MAP_SS3: |
252 | 122 | if (euc == WC_CES_EUC_JP && |
253 | 122 | WC_ISO_MAP[*(p+1)] == WC_ISO_MAP_GR) |
254 | 90 | ; |
255 | 32 | else |
256 | 32 | euc_detect = DETECT_ERROR; |
257 | 122 | break; |
258 | 813 | case WC_ISO_MAP_C1: |
259 | 1.14k | case WC_ISO_MAP_GR96: |
260 | 1.14k | euc_detect = DETECT_ERROR; |
261 | 1.14k | break; |
262 | 162k | } |
263 | 162k | break; |
264 | 162k | case WC_EUC_MBYTE1: |
265 | 144k | if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR) { |
266 | 142k | SET_DETECT(euc_detect, DETECT_OK); |
267 | 142k | ok = WC_TRUE; |
268 | 142k | } else |
269 | 2.32k | SET_BROKEN_ERROR(euc_detect); |
270 | 144k | euc_state = WC_EUC_NOSTATE; |
271 | 144k | break; |
272 | 288 | case WC_EUC_TW_SS2: |
273 | 288 | if (!( 0xa0 <= *p && *p <= 0xb0) || |
274 | 288 | WC_ISO_MAP[*(p+1)] != WC_ISO_MAP_GR) |
275 | 23 | euc_detect = DETECT_ERROR; |
276 | 288 | euc_state = WC_EUC_NOSTATE; |
277 | 288 | break; |
278 | 307k | } |
279 | 307k | if (euc_detect == DETECT_ERROR) |
280 | 1.93k | possible--; |
281 | 307k | } |
282 | 655k | if (sjis_detect != DETECT_ERROR) { |
283 | 369k | switch (sjis_state) { |
284 | 364k | case WC_SJIS_NOSTATE: |
285 | 364k | switch (WC_SJIS_MAP[*p]) { |
286 | 1.02k | case WC_SJIS_MAP_SL: |
287 | 4.88k | case WC_SJIS_MAP_SH: |
288 | 4.88k | sjis_state = WC_SJIS_SHIFT_L; |
289 | 4.88k | break; |
290 | 337k | case WC_SJIS_MAP_SK: |
291 | 337k | SET_DETECT(sjis_detect, DETECT_POSSIBLE); |
292 | 337k | break; |
293 | 427 | case WC_SJIS_MAP_SX: |
294 | 427 | if (WcOption.use_jisx0213) { |
295 | 0 | sjis_state = WC_SJIS_SHIFT_X; |
296 | 0 | break; |
297 | 0 | } |
298 | 536 | case WC_SJIS_MAP_80: |
299 | 574 | case WC_SJIS_MAP_A0: |
300 | 1.03k | case WC_SJIS_MAP_C1: |
301 | 1.03k | sjis_detect = DETECT_ERROR; |
302 | 1.03k | break; |
303 | 364k | } |
304 | 364k | break; |
305 | 364k | case WC_SJIS_SHIFT_L: |
306 | 4.74k | if (WC_SJIS_MAP[*p] & WC_SJIS_MAP_LB) { |
307 | 4.57k | SET_DETECT(sjis_detect, DETECT_OK); |
308 | 4.57k | ok = WC_TRUE; |
309 | 4.57k | } else |
310 | 168 | SET_BROKEN_ERROR(sjis_detect); |
311 | 4.74k | sjis_state = WC_SJIS_NOSTATE; |
312 | 4.74k | break; |
313 | 0 | case WC_SJIS_SHIFT_X: |
314 | 0 | if (WC_SJIS_MAP[*p] & WC_SJIS_MAP_LB) |
315 | 0 | SET_DETECT(sjis_detect, DETECT_POSSIBLE); |
316 | 0 | else |
317 | 0 | sjis_detect = DETECT_ERROR; |
318 | 0 | sjis_state = WC_SJIS_NOSTATE; |
319 | 0 | break; |
320 | 369k | } |
321 | 369k | if (sjis_detect == DETECT_ERROR) |
322 | 1.07k | possible--; |
323 | 369k | } |
324 | 655k | if (big5_detect != DETECT_ERROR) { |
325 | 19.1k | switch (big5_state) { |
326 | 14.4k | case WC_BIG5_NOSTATE: |
327 | 14.4k | switch (WC_BIG5_MAP[*p]) { |
328 | 4.78k | case WC_BIG5_MAP_UB: |
329 | 4.78k | big5_state = WC_BIG5_MBYTE1; |
330 | 4.78k | break; |
331 | 496 | case WC_BIG5_MAP_C1: |
332 | 496 | big5_detect = DETECT_ERROR; |
333 | 496 | break; |
334 | 14.4k | } |
335 | 14.4k | break; |
336 | 14.4k | case WC_BIG5_MBYTE1: |
337 | 4.69k | if (WC_BIG5_MAP[*p] & WC_BIG5_MAP_LB) { |
338 | 4.36k | SET_DETECT(big5_detect, DETECT_OK); |
339 | 4.36k | ok = WC_TRUE; |
340 | 4.36k | } else |
341 | 330 | SET_BROKEN_ERROR(big5_detect); |
342 | 4.69k | big5_state = WC_BIG5_NOSTATE; |
343 | 4.69k | break; |
344 | 19.1k | } |
345 | 19.1k | if (big5_detect == DETECT_ERROR) |
346 | 557 | possible--; |
347 | 19.1k | } |
348 | 655k | if (hz_detect != DETECT_ERROR) { |
349 | 6.39k | if (*p & 0x80) { |
350 | 273 | hz_detect = DETECT_ERROR; |
351 | 273 | possible--; |
352 | 6.11k | } else { |
353 | 6.11k | switch (hz_state) { |
354 | 2.04k | case WC_HZ_NOSTATE: |
355 | 2.04k | if (*p == WC_C_HZ_TILDA) |
356 | 1.32k | hz_state = WC_HZ_TILDA; |
357 | 2.04k | break; |
358 | 1.28k | case WC_HZ_TILDA: |
359 | 1.28k | if (*p == WC_C_HZ_SI) |
360 | 650 | hz_state = WC_HZ_MBYTE; |
361 | 636 | else |
362 | 636 | hz_state = WC_HZ_NOSTATE; |
363 | 1.28k | break; |
364 | 1.08k | case WC_HZ_TILDA_MB: |
365 | 1.08k | if (*p == WC_C_HZ_SO) |
366 | 272 | hz_state = WC_HZ_NOSTATE; |
367 | 810 | else |
368 | 810 | hz_state = WC_HZ_MBYTE; |
369 | 1.08k | break; |
370 | 1.41k | case WC_HZ_MBYTE: |
371 | 1.41k | if (*p == WC_C_HZ_TILDA) |
372 | 1.11k | hz_state = WC_HZ_TILDA_MB; |
373 | 303 | else |
374 | 303 | hz_state = WC_HZ_MBYTE1; |
375 | 1.41k | break; |
376 | 285 | case WC_HZ_MBYTE1: |
377 | 285 | hz_detect = DETECT_OK; |
378 | 285 | ok = WC_TRUE; |
379 | 285 | hz_state = WC_HZ_NOSTATE; |
380 | 285 | break; |
381 | 6.11k | } |
382 | 6.11k | } |
383 | 6.39k | } |
384 | 655k | if (latin_detect != DETECT_ERROR) { |
385 | 4.87k | switch (WC_ISO_MAP[*p] & WC_ISO_MAP_CG) { |
386 | 1.48k | case WC_ISO_MAP_GR: |
387 | 2.29k | case WC_ISO_MAP_GR96: |
388 | 2.29k | SET_DETECT(latin_detect, DETECT_OK); |
389 | 2.29k | ok = WC_TRUE; |
390 | 2.29k | break; |
391 | 154 | case WC_ISO_MAP_C1: |
392 | 154 | latin_detect = DETECT_ERROR; |
393 | 154 | break; |
394 | 4.87k | } |
395 | 4.87k | if (latin_detect == DETECT_ERROR) |
396 | 154 | possible--; |
397 | 4.87k | } |
398 | 655k | if (priv_detect != DETECT_ERROR) { |
399 | 68.4k | if (*p != WC_C_ESC && WC_DETECT_MAP[*p]) { |
400 | 5.36k | SET_DETECT(priv_detect, DETECT_OK); |
401 | 5.36k | ok = WC_TRUE; |
402 | 5.36k | } |
403 | | /* |
404 | | if (priv_detect == DETECT_ERROR) |
405 | | possible--; |
406 | | */ |
407 | 68.4k | } |
408 | 655k | #ifdef USE_UNICODE |
409 | 655k | if (utf8_detect != DETECT_ERROR) { |
410 | 28.3k | switch (utf8_state) { |
411 | 20.7k | case WC_UTF8_NOSTATE: |
412 | 20.7k | switch (utf8_next = WC_UTF8_MAP[*p]) { |
413 | 10.1k | case 1: |
414 | 13.5k | case 8: |
415 | 13.5k | break; |
416 | 2.61k | case 0: |
417 | 3.02k | case 7: |
418 | 3.02k | utf8_detect = DETECT_ERROR; |
419 | 3.02k | break; |
420 | 4.19k | default: |
421 | 4.19k | utf8_next--; |
422 | 4.19k | utf8_state = WC_UTF8_NEXT; |
423 | 4.19k | break; |
424 | 20.7k | } |
425 | 20.7k | break; |
426 | 20.7k | case WC_UTF8_NEXT: |
427 | 7.60k | if (WC_UTF8_MAP[*p]) { |
428 | 1.29k | utf8_detect = DETECT_ERROR; |
429 | 1.29k | utf8_state = WC_UTF8_NOSTATE; |
430 | 1.29k | break; |
431 | 1.29k | } |
432 | 6.30k | utf8_next--; |
433 | 6.30k | if (! utf8_next) { |
434 | 2.61k | SET_DETECT(utf8_detect, DETECT_OK); |
435 | 2.61k | ok = WC_TRUE; |
436 | 2.61k | utf8_state = WC_UTF8_NOSTATE; |
437 | 2.61k | } |
438 | 6.30k | break; |
439 | 28.3k | } |
440 | 28.3k | if (utf8_detect == DETECT_ERROR) |
441 | 4.32k | possible--; |
442 | 28.3k | } |
443 | 655k | #endif |
444 | 655k | } |
445 | | |
446 | 10.9k | if (iso_detect != DETECT_ERROR) { |
447 | 1.28k | if (iso_detect == DETECT_NORMAL) { |
448 | 327 | if (hz_detect == DETECT_OK) |
449 | 30 | return WC_CES_HZ_GB_2312; |
450 | 297 | if (priv_detect == DETECT_OK) |
451 | 206 | return priv; |
452 | 91 | return WC_CES_US_ASCII; |
453 | 297 | } |
454 | 956 | switch (euc) { |
455 | 28 | case WC_CES_EUC_CN: |
456 | 31 | case WC_CES_EUC_TW: |
457 | 31 | if (iso2022cn) |
458 | 3 | return WC_CES_ISO_2022_CN; |
459 | 28 | break; |
460 | 28 | case WC_CES_EUC_KR: |
461 | 3 | if (iso2022kr) |
462 | 1 | return WC_CES_ISO_2022_KR; |
463 | 2 | break; |
464 | 956 | } |
465 | 952 | if (iso2022jp3) |
466 | 36 | return WC_CES_ISO_2022_JP_3; |
467 | 916 | if (iso2022jp2) |
468 | 147 | return WC_CES_ISO_2022_JP_2; |
469 | 769 | if (iso2022cn) |
470 | 59 | return WC_CES_ISO_2022_CN; |
471 | 710 | if (iso2022kr) |
472 | 9 | return WC_CES_ISO_2022_KR; |
473 | 701 | return WC_CES_ISO_2022_JP; |
474 | 710 | } |
475 | 9.61k | switch (hint) { |
476 | 454 | case WC_CES_ISO_2022_JP: |
477 | 455 | case WC_CES_ISO_2022_JP_2: |
478 | 457 | case WC_CES_ISO_2022_JP_3: |
479 | 650 | case WC_CES_ISO_2022_KR: |
480 | 713 | case WC_CES_ISO_2022_CN: |
481 | 713 | break; |
482 | 672 | case WC_CES_EUC_JP: |
483 | 905 | case WC_CES_EUC_CN: |
484 | 998 | case WC_CES_EUC_TW: |
485 | 999 | case WC_CES_EUC_KR: |
486 | 999 | if (euc_detect != DETECT_ERROR) |
487 | 368 | return hint; |
488 | 631 | break; |
489 | 1.27k | case WC_CES_SHIFT_JIS: |
490 | 1.45k | case WC_CES_SHIFT_JISX0213: |
491 | 1.45k | if (sjis_detect != DETECT_ERROR) |
492 | 927 | return hint; |
493 | 526 | break; |
494 | 526 | case WC_CES_BIG5: |
495 | 293 | if (big5_detect != DETECT_ERROR) |
496 | 136 | return hint; |
497 | 157 | break; |
498 | 157 | #ifdef USE_UNICODE |
499 | 1.43k | case WC_CES_UTF_8: |
500 | 1.43k | return hint; |
501 | 0 | #endif |
502 | 54 | case WC_CES_US_ASCII: |
503 | 54 | #ifdef USE_UNICODE |
504 | 54 | if (utf8_detect != DETECT_ERROR) |
505 | 6 | return hint; |
506 | 48 | #endif |
507 | 48 | if (latin_detect != DETECT_ERROR) |
508 | 47 | return WC_CES_ISO_8859_1; |
509 | 1 | return hint; |
510 | 4.67k | default: |
511 | 4.67k | if (latin_detect != DETECT_ERROR) |
512 | 621 | return hint; |
513 | 4.05k | if (priv_detect != DETECT_ERROR) |
514 | 3.63k | return hint; |
515 | 424 | #ifdef USE_UNICODE |
516 | 424 | if (utf8_detect != DETECT_ERROR) |
517 | 145 | return WC_CES_UTF_8; |
518 | 279 | #endif |
519 | 279 | return hint; |
520 | 9.61k | } |
521 | 2.02k | if (euc_detect == DETECT_OK) |
522 | 388 | return euc; |
523 | 1.63k | if (sjis_detect == DETECT_OK) |
524 | 56 | return WC_CES_SHIFT_JIS; |
525 | 1.58k | if (big5_detect == DETECT_OK) |
526 | 32 | return WC_CES_BIG5; |
527 | 1.55k | #ifdef USE_UNICODE |
528 | 1.55k | if (utf8_detect == DETECT_OK) |
529 | 291 | return WC_CES_UTF_8; |
530 | 1.26k | if (sjis_detect & DETECT_POSSIBLE) |
531 | 338 | return WC_CES_SHIFT_JIS; |
532 | 922 | #endif |
533 | 922 | if (euc_detect != DETECT_ERROR) |
534 | 152 | return euc; |
535 | 770 | if (sjis_detect != DETECT_ERROR) |
536 | 11 | return WC_CES_SHIFT_JIS; |
537 | 759 | if (big5_detect != DETECT_ERROR) |
538 | 6 | return WC_CES_BIG5; |
539 | 753 | #ifdef USE_UNICODE |
540 | 753 | if (utf8_detect != DETECT_ERROR) |
541 | 28 | return WC_CES_UTF_8; |
542 | 725 | #endif |
543 | 725 | return hint; |
544 | 753 | } |