Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "iso2022.h" |
4 | | #include "sjis.h" |
5 | | #include "big5.h" |
6 | | #include "hz.h" |
7 | | #include "viet.h" |
8 | | #ifdef USE_UNICODE |
9 | | #include "utf8.h" |
10 | | #include "utf7.h" |
11 | | #endif |
12 | | |
13 | | wc_uint8 WC_DETECT_MAP[ 0x100 ] = { |
14 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
15 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
16 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
17 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
18 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
19 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
20 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
21 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
22 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
23 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
24 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
25 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
26 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
27 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
28 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
29 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
30 | | }; |
31 | | |
32 | 16.5k | #define DETECT_NORMAL 0 |
33 | 1.16k | #define DETECT_POSSIBLE 1 |
34 | 16.5k | #define DETECT_OK 2 |
35 | 4.95k | #define DETECT_BROKEN 4 |
36 | 14.2M | #define DETECT_ERROR 8 |
37 | 1.23M | #define SET_DETECT(x,y) ((x) |= (y)) |
38 | 2.84k | #define SET_BROKEN_ERROR(x) ((x) = ((x) & DETECT_BROKEN) ? DETECT_ERROR : ((x) | DETECT_BROKEN)) |
39 | | |
40 | | void |
41 | | wc_create_detect_map(wc_ces ces, wc_bool esc) |
42 | 12.0k | { |
43 | 12.0k | static wc_ces detect_ces = WC_CES_US_ASCII; |
44 | 12.0k | int i; |
45 | | |
46 | 12.0k | if (ces != detect_ces) { |
47 | 9.89k | if (ces & WC_CES_T_VIET) { |
48 | 365 | wc_uint8 *map = NULL; |
49 | 365 | switch (ces) { |
50 | 105 | case WC_CES_TCVN_5712: |
51 | 105 | map = wc_c0_tcvn57122_map; |
52 | 105 | break; |
53 | 250 | case WC_CES_VISCII_11: |
54 | 250 | map = wc_c0_viscii112_map; |
55 | 250 | break; |
56 | 10 | case WC_CES_VPS: |
57 | 10 | map = wc_c0_vps2_map; |
58 | 10 | break; |
59 | 365 | } |
60 | 12.0k | for (i = 0; i < 0x20; i++) |
61 | 11.6k | WC_DETECT_MAP[i] = map[i] ? 1 : 0; |
62 | 9.52k | } else { |
63 | 314k | for (i = 0; i < 0x20; i++) |
64 | 304k | WC_DETECT_MAP[i] = 0; |
65 | 9.52k | WC_DETECT_MAP[WC_C_HZ_TILDA] = (ces == WC_CES_HZ_GB_2312) ? 1 : 0; |
66 | 9.52k | #ifdef USE_UNICODE |
67 | 9.52k | WC_DETECT_MAP[WC_C_UTF7_PLUS] = (ces == WC_CES_UTF_7) ? 1 : 0; |
68 | 9.52k | #endif |
69 | 9.52k | } |
70 | 9.89k | detect_ces = ces; |
71 | 9.89k | } |
72 | 12.0k | WC_DETECT_MAP[WC_C_ESC] = (esc || (ces & WC_CES_T_ISO_2022)) ? 1 : 0; |
73 | 12.0k | return; |
74 | 12.0k | } |
75 | | |
76 | | wc_ces |
77 | | wc_auto_detect(char *is, size_t len, wc_ces hint) |
78 | 11.6k | { |
79 | 11.6k | wc_uchar *p = (wc_uchar *)is; |
80 | 11.6k | wc_uchar *ep = p + len; |
81 | 11.6k | wc_uchar *q; |
82 | 11.6k | wc_ces euc = 0, priv = 0; |
83 | 11.6k | wc_status st; |
84 | 11.6k | int euc_state = 0, sjis_state = 0, big5_state = 0, hz_state = 0; |
85 | 11.6k | int iso_detect = DETECT_ERROR, euc_detect = DETECT_ERROR, |
86 | 11.6k | sjis_detect = DETECT_ERROR, big5_detect = DETECT_ERROR, |
87 | 11.6k | hz_detect = DETECT_ERROR, latin_detect = DETECT_ERROR, |
88 | 11.6k | priv_detect = DETECT_ERROR; |
89 | 11.6k | int possible = 0; |
90 | 11.6k | wc_bool iso2022jp2 = WC_FALSE, iso2022jp3 = WC_FALSE, |
91 | 11.6k | iso2022cn = WC_FALSE, iso2022kr = WC_FALSE, ok = WC_FALSE; |
92 | 11.6k | #ifdef USE_UNICODE |
93 | 11.6k | int utf8_state = 0; |
94 | 11.6k | int utf8_detect = DETECT_ERROR; |
95 | 11.6k | int utf8_next = 0; |
96 | 11.6k | #endif |
97 | | |
98 | 11.6k | wc_create_detect_map(hint, WC_TRUE); |
99 | 2.25M | for (; p < ep && ! WC_DETECT_MAP[*p]; p++) |
100 | 2.24M | ; |
101 | 11.6k | if (p == ep) |
102 | 1.61k | return hint; |
103 | | |
104 | 10.0k | switch (hint) { |
105 | 272 | case WC_CES_ISO_2022_JP: |
106 | 273 | case WC_CES_ISO_2022_JP_2: |
107 | 275 | case WC_CES_ISO_2022_JP_3: |
108 | 1.13k | case WC_CES_EUC_JP: |
109 | 2.26k | case WC_CES_SHIFT_JIS: |
110 | 2.42k | case WC_CES_SHIFT_JISX0213: |
111 | 2.42k | euc = WC_CES_EUC_JP; |
112 | 2.42k | euc_state = WC_EUC_NOSTATE; |
113 | 2.42k | sjis_state = WC_SJIS_NOSTATE; |
114 | 2.42k | iso_detect = euc_detect = sjis_detect = DETECT_NORMAL; |
115 | 2.42k | possible = 3; |
116 | 2.42k | break; |
117 | 38 | case WC_CES_ISO_2022_CN: |
118 | 286 | case WC_CES_EUC_CN: |
119 | 286 | euc = WC_CES_EUC_CN; |
120 | 286 | euc_state = WC_EUC_NOSTATE; |
121 | 286 | big5_state = WC_BIG5_NOSTATE; |
122 | 286 | iso_detect = euc_detect = big5_detect = DETECT_NORMAL; |
123 | 286 | possible = 3; |
124 | 286 | break; |
125 | 71 | case WC_CES_EUC_TW: |
126 | 329 | case WC_CES_BIG5: |
127 | 329 | euc = WC_CES_EUC_TW; |
128 | 329 | euc_state = WC_EUC_NOSTATE; |
129 | 329 | big5_state = WC_BIG5_NOSTATE; |
130 | 329 | iso_detect = euc_detect = big5_detect = DETECT_NORMAL; |
131 | 329 | possible = 3; |
132 | 329 | break; |
133 | 340 | case WC_CES_HZ_GB_2312: |
134 | 340 | euc = WC_CES_EUC_CN; |
135 | 340 | euc_state = WC_EUC_NOSTATE; |
136 | 340 | hz_state = WC_HZ_NOSTATE; |
137 | 340 | iso_detect = euc_detect = big5_detect = hz_detect = DETECT_NORMAL; |
138 | 340 | possible = 4; |
139 | 340 | break; |
140 | 199 | case WC_CES_ISO_2022_KR: |
141 | 200 | case WC_CES_EUC_KR: |
142 | 200 | euc = WC_CES_EUC_KR; |
143 | 200 | euc_state = WC_EUC_NOSTATE; |
144 | 200 | iso_detect = euc_detect = DETECT_NORMAL; |
145 | 200 | possible = 3; |
146 | 200 | break; |
147 | 0 | #ifdef USE_UNICODE |
148 | 1.07k | case WC_CES_UTF_8: |
149 | 1.07k | iso_detect = DETECT_NORMAL; |
150 | 1.07k | possible = 1; |
151 | 1.07k | break; |
152 | 0 | #endif |
153 | 62 | case WC_CES_US_ASCII: |
154 | 62 | iso_detect = latin_detect = DETECT_NORMAL; |
155 | 62 | possible = 2; |
156 | 62 | break; |
157 | 5.36k | default: |
158 | 5.36k | if (hint & WC_CES_T_ISO_8859) { |
159 | 645 | iso_detect = latin_detect = DETECT_NORMAL; |
160 | 645 | possible = 2; |
161 | 4.71k | } else { |
162 | 4.71k | iso_detect = priv_detect = DETECT_NORMAL; |
163 | 4.71k | priv = hint; /* for TVCN, VISCII, VPS */ |
164 | 4.71k | possible = 2; |
165 | 4.71k | } |
166 | 5.36k | break; |
167 | 10.0k | } |
168 | 10.0k | #ifdef USE_UNICODE |
169 | 10.0k | if (priv_detect == DETECT_ERROR) { |
170 | 5.36k | utf8_detect = DETECT_NORMAL; |
171 | 5.36k | possible++; |
172 | 5.36k | } |
173 | 10.0k | #endif |
174 | | |
175 | 10.0k | wc_input_init(WC_CES_US_ASCII, &st); |
176 | | |
177 | 1.52M | for (; p < ep; p++) { |
178 | 1.52M | if (possible == 0 || (possible == 1 && ok)) |
179 | 6.79k | break; |
180 | 1.51M | if (iso_detect != DETECT_ERROR) { |
181 | 69.6k | switch (*p) { |
182 | 10.2k | case WC_C_ESC: |
183 | 10.2k | if (*(p+1) == WC_C_MBCS) { |
184 | 4.05k | q = p; |
185 | 4.05k | if (! wc_parse_iso2022_esc(&q, &st)) |
186 | 1.22k | break; |
187 | 2.82k | if (st.design[0] == WC_CCS_JIS_C_6226 || |
188 | 2.10k | st.design[0] == WC_CCS_JIS_X_0208) |
189 | 1.50k | ; |
190 | 1.32k | else if (st.design[0] == WC_CCS_JIS_X_0213_1 || |
191 | 1.06k | st.design[0] == WC_CCS_JIS_X_0213_2) |
192 | 487 | iso2022jp3 = WC_TRUE; |
193 | 839 | else if (WC_CCS_TYPE(st.design[0]) == WC_CCS_A_CS94W) |
194 | 396 | iso2022jp2 = WC_TRUE; |
195 | 2.82k | if (st.design[1] == WC_CCS_KS_X_1001) |
196 | 158 | iso2022kr = WC_TRUE; |
197 | 2.66k | else if (st.design[1] == WC_CCS_GB_2312 || |
198 | 2.66k | st.design[1] == WC_CCS_ISO_IR_165 || |
199 | 2.23k | st.design[1] == WC_CCS_CNS_11643_1) |
200 | 670 | iso2022cn = WC_TRUE; |
201 | 2.82k | if (WC_CCS_TYPE(st.design[2]) == WC_CCS_A_CS94W || |
202 | 2.56k | WC_CCS_TYPE(st.design[3]) == WC_CCS_A_CS94W) |
203 | 748 | iso2022cn = WC_TRUE; |
204 | 6.24k | } else if (*(p+1) == WC_C_G2_CS96) { |
205 | 713 | q = p; |
206 | 713 | if (! wc_parse_iso2022_esc(&q, &st)) |
207 | 443 | break; |
208 | 270 | if (WC_CCS_TYPE(st.design[2]) == WC_CCS_A_CS96) |
209 | 270 | iso2022jp2 = WC_TRUE; |
210 | 5.53k | } else if (*(p+1) == WC_C_CSWSR) { |
211 | 429 | q = p; |
212 | 429 | if (! wc_parse_iso2022_esc(&q, &st)) |
213 | 345 | break; |
214 | 84 | possible = 0; |
215 | 84 | iso_detect = DETECT_BROKEN; |
216 | 84 | continue; |
217 | 429 | } |
218 | 8.20k | iso_detect = DETECT_OK; |
219 | 8.20k | ok = WC_TRUE; |
220 | 8.20k | break; |
221 | 348 | case WC_C_SI: |
222 | 559 | case WC_C_SO: |
223 | 559 | iso_detect = DETECT_OK; |
224 | 559 | ok = WC_TRUE; |
225 | 559 | iso2022cn = WC_TRUE; |
226 | 559 | iso2022kr = WC_TRUE; |
227 | 559 | break; |
228 | 58.7k | default: |
229 | 58.7k | if (*p & 0x80) { |
230 | 8.91k | iso_detect = DETECT_ERROR; |
231 | 8.91k | possible--; |
232 | 8.91k | } |
233 | 58.7k | break; |
234 | 69.6k | } |
235 | 69.6k | } |
236 | 1.51M | if (euc_detect != DETECT_ERROR) { |
237 | 881k | switch (euc_state) { |
238 | 460k | case WC_EUC_NOSTATE: |
239 | 460k | switch (WC_ISO_MAP[*p]) { |
240 | 420k | case WC_ISO_MAP_GR: |
241 | 420k | euc_state = WC_EUC_MBYTE1; |
242 | 420k | break; |
243 | 602 | case WC_ISO_MAP_SS2: |
244 | 602 | if (euc == WC_CES_EUC_JP) |
245 | 315 | euc_state = WC_EUC_MBYTE1; |
246 | 287 | else if (euc == WC_CES_EUC_TW) |
247 | 273 | euc_state = WC_EUC_TW_SS2; |
248 | 14 | else |
249 | 14 | euc_detect = DETECT_ERROR; |
250 | 602 | break; |
251 | 248 | case WC_ISO_MAP_SS3: |
252 | 248 | if (euc == WC_CES_EUC_JP && |
253 | 233 | WC_ISO_MAP[*(p+1)] == WC_ISO_MAP_GR) |
254 | 207 | ; |
255 | 41 | else |
256 | 41 | euc_detect = DETECT_ERROR; |
257 | 248 | break; |
258 | 975 | case WC_ISO_MAP_C1: |
259 | 1.29k | case WC_ISO_MAP_GR96: |
260 | 1.29k | euc_detect = DETECT_ERROR; |
261 | 1.29k | break; |
262 | 460k | } |
263 | 460k | break; |
264 | 460k | case WC_EUC_MBYTE1: |
265 | 420k | if (WC_ISO_MAP[*p] == WC_ISO_MAP_GR) { |
266 | 417k | SET_DETECT(euc_detect, DETECT_OK); |
267 | 417k | ok = WC_TRUE; |
268 | 417k | } else |
269 | 2.34k | SET_BROKEN_ERROR(euc_detect); |
270 | 420k | euc_state = WC_EUC_NOSTATE; |
271 | 420k | break; |
272 | 263 | case WC_EUC_TW_SS2: |
273 | 263 | if (!( 0xa0 <= *p && *p <= 0xb0) || |
274 | 253 | WC_ISO_MAP[*(p+1)] != WC_ISO_MAP_GR) |
275 | 17 | euc_detect = DETECT_ERROR; |
276 | 263 | euc_state = WC_EUC_NOSTATE; |
277 | 263 | break; |
278 | 881k | } |
279 | 881k | if (euc_detect == DETECT_ERROR) |
280 | 2.08k | possible--; |
281 | 881k | } |
282 | 1.51M | if (sjis_detect != DETECT_ERROR) { |
283 | 661k | switch (sjis_state) { |
284 | 658k | case WC_SJIS_NOSTATE: |
285 | 658k | switch (WC_SJIS_MAP[*p]) { |
286 | 988 | case WC_SJIS_MAP_SL: |
287 | 3.15k | case WC_SJIS_MAP_SH: |
288 | 3.15k | sjis_state = WC_SJIS_SHIFT_L; |
289 | 3.15k | break; |
290 | 607k | case WC_SJIS_MAP_SK: |
291 | 607k | SET_DETECT(sjis_detect, DETECT_POSSIBLE); |
292 | 607k | break; |
293 | 539 | case WC_SJIS_MAP_SX: |
294 | 539 | if (WcOption.use_jisx0213) { |
295 | 0 | sjis_state = WC_SJIS_SHIFT_X; |
296 | 0 | break; |
297 | 0 | } |
298 | 637 | case WC_SJIS_MAP_80: |
299 | 669 | case WC_SJIS_MAP_A0: |
300 | 1.06k | case WC_SJIS_MAP_C1: |
301 | 1.06k | sjis_detect = DETECT_ERROR; |
302 | 1.06k | break; |
303 | 658k | } |
304 | 658k | break; |
305 | 658k | case WC_SJIS_SHIFT_L: |
306 | 3.02k | if (WC_SJIS_MAP[*p] & WC_SJIS_MAP_LB) { |
307 | 2.85k | SET_DETECT(sjis_detect, DETECT_OK); |
308 | 2.85k | ok = WC_TRUE; |
309 | 2.85k | } else |
310 | 169 | SET_BROKEN_ERROR(sjis_detect); |
311 | 3.02k | sjis_state = WC_SJIS_NOSTATE; |
312 | 3.02k | break; |
313 | 0 | case WC_SJIS_SHIFT_X: |
314 | 0 | if (WC_SJIS_MAP[*p] & WC_SJIS_MAP_LB) |
315 | 0 | SET_DETECT(sjis_detect, DETECT_POSSIBLE); |
316 | 0 | else |
317 | 0 | sjis_detect = DETECT_ERROR; |
318 | 0 | sjis_state = WC_SJIS_NOSTATE; |
319 | 0 | break; |
320 | 661k | } |
321 | 661k | if (sjis_detect == DETECT_ERROR) |
322 | 1.10k | possible--; |
323 | 661k | } |
324 | 1.51M | if (big5_detect != DETECT_ERROR) { |
325 | 424k | switch (big5_state) { |
326 | 228k | case WC_BIG5_NOSTATE: |
327 | 228k | switch (WC_BIG5_MAP[*p]) { |
328 | 196k | case WC_BIG5_MAP_UB: |
329 | 196k | big5_state = WC_BIG5_MBYTE1; |
330 | 196k | break; |
331 | 467 | case WC_BIG5_MAP_C1: |
332 | 467 | big5_detect = DETECT_ERROR; |
333 | 467 | break; |
334 | 228k | } |
335 | 228k | break; |
336 | 228k | case WC_BIG5_MBYTE1: |
337 | 196k | if (WC_BIG5_MAP[*p] & WC_BIG5_MAP_LB) { |
338 | 196k | SET_DETECT(big5_detect, DETECT_OK); |
339 | 196k | ok = WC_TRUE; |
340 | 196k | } else |
341 | 329 | SET_BROKEN_ERROR(big5_detect); |
342 | 196k | big5_state = WC_BIG5_NOSTATE; |
343 | 196k | break; |
344 | 424k | } |
345 | 424k | if (big5_detect == DETECT_ERROR) |
346 | 524 | possible--; |
347 | 424k | } |
348 | 1.51M | if (hz_detect != DETECT_ERROR) { |
349 | 6.65k | if (*p & 0x80) { |
350 | 237 | hz_detect = DETECT_ERROR; |
351 | 237 | possible--; |
352 | 6.41k | } else { |
353 | 6.41k | switch (hz_state) { |
354 | 2.53k | case WC_HZ_NOSTATE: |
355 | 2.53k | if (*p == WC_C_HZ_TILDA) |
356 | 1.52k | hz_state = WC_HZ_TILDA; |
357 | 2.53k | break; |
358 | 1.49k | case WC_HZ_TILDA: |
359 | 1.49k | if (*p == WC_C_HZ_SI) |
360 | 702 | hz_state = WC_HZ_MBYTE; |
361 | 796 | else |
362 | 796 | hz_state = WC_HZ_NOSTATE; |
363 | 1.49k | break; |
364 | 935 | case WC_HZ_TILDA_MB: |
365 | 935 | if (*p == WC_C_HZ_SO) |
366 | 394 | hz_state = WC_HZ_NOSTATE; |
367 | 541 | else |
368 | 541 | hz_state = WC_HZ_MBYTE; |
369 | 935 | break; |
370 | 1.20k | case WC_HZ_MBYTE: |
371 | 1.20k | if (*p == WC_C_HZ_TILDA) |
372 | 955 | hz_state = WC_HZ_TILDA_MB; |
373 | 254 | else |
374 | 254 | hz_state = WC_HZ_MBYTE1; |
375 | 1.20k | break; |
376 | 240 | case WC_HZ_MBYTE1: |
377 | 240 | hz_detect = DETECT_OK; |
378 | 240 | ok = WC_TRUE; |
379 | 240 | hz_state = WC_HZ_NOSTATE; |
380 | 240 | break; |
381 | 6.41k | } |
382 | 6.41k | } |
383 | 6.65k | } |
384 | 1.51M | if (latin_detect != DETECT_ERROR) { |
385 | 10.1k | switch (WC_ISO_MAP[*p] & WC_ISO_MAP_CG) { |
386 | 1.60k | case WC_ISO_MAP_GR: |
387 | 2.04k | case WC_ISO_MAP_GR96: |
388 | 2.04k | SET_DETECT(latin_detect, DETECT_OK); |
389 | 2.04k | ok = WC_TRUE; |
390 | 2.04k | break; |
391 | 112 | case WC_ISO_MAP_C1: |
392 | 112 | latin_detect = DETECT_ERROR; |
393 | 112 | break; |
394 | 10.1k | } |
395 | 10.1k | if (latin_detect == DETECT_ERROR) |
396 | 112 | possible--; |
397 | 10.1k | } |
398 | 1.51M | if (priv_detect != DETECT_ERROR) { |
399 | 45.5k | if (*p != WC_C_ESC && WC_DETECT_MAP[*p]) { |
400 | 5.31k | SET_DETECT(priv_detect, DETECT_OK); |
401 | 5.31k | ok = WC_TRUE; |
402 | 5.31k | } |
403 | | /* |
404 | | if (priv_detect == DETECT_ERROR) |
405 | | possible--; |
406 | | */ |
407 | 45.5k | } |
408 | 1.51M | #ifdef USE_UNICODE |
409 | 1.51M | if (utf8_detect != DETECT_ERROR) { |
410 | 32.4k | switch (utf8_state) { |
411 | 25.2k | case WC_UTF8_NOSTATE: |
412 | 25.2k | switch (utf8_next = WC_UTF8_MAP[*p]) { |
413 | 15.5k | case 1: |
414 | 18.9k | case 8: |
415 | 18.9k | break; |
416 | 2.18k | case 0: |
417 | 2.56k | case 7: |
418 | 2.56k | utf8_detect = DETECT_ERROR; |
419 | 2.56k | break; |
420 | 3.71k | default: |
421 | 3.71k | utf8_next--; |
422 | 3.71k | utf8_state = WC_UTF8_NEXT; |
423 | 3.71k | break; |
424 | 25.2k | } |
425 | 25.2k | break; |
426 | 25.2k | case WC_UTF8_NEXT: |
427 | 7.22k | if (WC_UTF8_MAP[*p]) { |
428 | 1.17k | utf8_detect = DETECT_ERROR; |
429 | 1.17k | utf8_state = WC_UTF8_NOSTATE; |
430 | 1.17k | break; |
431 | 1.17k | } |
432 | 6.05k | utf8_next--; |
433 | 6.05k | if (! utf8_next) { |
434 | 2.29k | SET_DETECT(utf8_detect, DETECT_OK); |
435 | 2.29k | ok = WC_TRUE; |
436 | 2.29k | utf8_state = WC_UTF8_NOSTATE; |
437 | 2.29k | } |
438 | 6.05k | break; |
439 | 32.4k | } |
440 | 32.4k | if (utf8_detect == DETECT_ERROR) |
441 | 3.73k | possible--; |
442 | 32.4k | } |
443 | 1.51M | #endif |
444 | 1.51M | } |
445 | | |
446 | 10.0k | if (iso_detect != DETECT_ERROR) { |
447 | 1.16k | if (iso_detect == DETECT_NORMAL) { |
448 | 327 | if (hz_detect == DETECT_OK) |
449 | 47 | return WC_CES_HZ_GB_2312; |
450 | 280 | if (priv_detect == DETECT_OK) |
451 | 197 | return priv; |
452 | 83 | return WC_CES_US_ASCII; |
453 | 280 | } |
454 | 834 | switch (euc) { |
455 | 15 | case WC_CES_EUC_CN: |
456 | 16 | case WC_CES_EUC_TW: |
457 | 16 | if (iso2022cn) |
458 | 2 | return WC_CES_ISO_2022_CN; |
459 | 14 | break; |
460 | 14 | case WC_CES_EUC_KR: |
461 | 3 | if (iso2022kr) |
462 | 2 | return WC_CES_ISO_2022_KR; |
463 | 1 | break; |
464 | 834 | } |
465 | 830 | if (iso2022jp3) |
466 | 35 | return WC_CES_ISO_2022_JP_3; |
467 | 795 | if (iso2022jp2) |
468 | 125 | return WC_CES_ISO_2022_JP_2; |
469 | 670 | if (iso2022cn) |
470 | 59 | return WC_CES_ISO_2022_CN; |
471 | 611 | if (iso2022kr) |
472 | 8 | return WC_CES_ISO_2022_KR; |
473 | 603 | return WC_CES_ISO_2022_JP; |
474 | 611 | } |
475 | 8.91k | switch (hint) { |
476 | 270 | case WC_CES_ISO_2022_JP: |
477 | 271 | case WC_CES_ISO_2022_JP_2: |
478 | 273 | case WC_CES_ISO_2022_JP_3: |
479 | 469 | case WC_CES_ISO_2022_KR: |
480 | 504 | case WC_CES_ISO_2022_CN: |
481 | 504 | break; |
482 | 852 | case WC_CES_EUC_JP: |
483 | 1.09k | case WC_CES_EUC_CN: |
484 | 1.16k | case WC_CES_EUC_TW: |
485 | 1.17k | case WC_CES_EUC_KR: |
486 | 1.17k | if (euc_detect != DETECT_ERROR) |
487 | 290 | return hint; |
488 | 880 | break; |
489 | 1.13k | case WC_CES_SHIFT_JIS: |
490 | 1.28k | case WC_CES_SHIFT_JISX0213: |
491 | 1.28k | if (sjis_detect != DETECT_ERROR) |
492 | 826 | return hint; |
493 | 461 | break; |
494 | 461 | case WC_CES_BIG5: |
495 | 257 | if (big5_detect != DETECT_ERROR) |
496 | 117 | return hint; |
497 | 140 | break; |
498 | 140 | #ifdef USE_UNICODE |
499 | 1.07k | case WC_CES_UTF_8: |
500 | 1.07k | return hint; |
501 | 0 | #endif |
502 | 62 | case WC_CES_US_ASCII: |
503 | 62 | #ifdef USE_UNICODE |
504 | 62 | if (utf8_detect != DETECT_ERROR) |
505 | 4 | return hint; |
506 | 58 | #endif |
507 | 58 | if (latin_detect != DETECT_ERROR) |
508 | 57 | return WC_CES_ISO_8859_1; |
509 | 1 | return hint; |
510 | 4.56k | default: |
511 | 4.56k | if (latin_detect != DETECT_ERROR) |
512 | 524 | return hint; |
513 | 4.04k | if (priv_detect != DETECT_ERROR) |
514 | 3.69k | return hint; |
515 | 346 | #ifdef USE_UNICODE |
516 | 346 | if (utf8_detect != DETECT_ERROR) |
517 | 108 | return WC_CES_UTF_8; |
518 | 238 | #endif |
519 | 238 | return hint; |
520 | 8.91k | } |
521 | 1.98k | if (euc_detect == DETECT_OK) |
522 | 290 | return euc; |
523 | 1.69k | if (sjis_detect == DETECT_OK) |
524 | 57 | return WC_CES_SHIFT_JIS; |
525 | 1.63k | if (big5_detect == DETECT_OK) |
526 | 18 | return WC_CES_BIG5; |
527 | 1.62k | #ifdef USE_UNICODE |
528 | 1.62k | if (utf8_detect == DETECT_OK) |
529 | 460 | return WC_CES_UTF_8; |
530 | 1.16k | if (sjis_detect & DETECT_POSSIBLE) |
531 | 275 | return WC_CES_SHIFT_JIS; |
532 | 885 | #endif |
533 | 885 | if (euc_detect != DETECT_ERROR) |
534 | 87 | return euc; |
535 | 798 | if (sjis_detect != DETECT_ERROR) |
536 | 7 | return WC_CES_SHIFT_JIS; |
537 | 791 | if (big5_detect != DETECT_ERROR) |
538 | 6 | return WC_CES_BIG5; |
539 | 785 | #ifdef USE_UNICODE |
540 | 785 | if (utf8_detect != DETECT_ERROR) |
541 | 35 | return WC_CES_UTF_8; |
542 | 750 | #endif |
543 | 750 | return hint; |
544 | 785 | } |