Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "wtf.h" |
4 | | #include "sjis.h" |
5 | | #include "big5.h" |
6 | | #include "hkscs.h" |
7 | | #include "johab.h" |
8 | | #include "jis.h" |
9 | | #include "viet.h" |
10 | | #include "gbk.h" |
11 | | #include "gb18030.h" |
12 | | #include "uhc.h" |
13 | | #ifdef USE_UNICODE |
14 | | #include "ucs.h" |
15 | | #include "utf8.h" |
16 | | #endif |
17 | | |
18 | | wc_uint8 WTF_WIDTH_MAP[ 0x100 ] = { |
19 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
20 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
21 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
22 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
23 | | |
24 | | 1,2,1,2,1,1,1,2, 1,2,1,2,1,1,1,1, 0,0,0,0,0,0,0,0, 0,0,0,0,1,1,1,1, |
25 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
26 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
27 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
28 | | }; |
29 | | |
30 | | wc_uint8 WTF_LEN_MAP[ 0x100 ] = { |
31 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
32 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
33 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
34 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
35 | | |
36 | | 3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1, 3,4,3,4,3,3,3,4, 4,4,6,6,1,1,1,1, |
37 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
38 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
39 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
40 | | }; |
41 | | |
42 | | wc_uint8 WTF_TYPE_MAP[ 0x100 ] = { |
43 | | 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, |
44 | | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
45 | | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, |
46 | | 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,1, |
47 | | |
48 | | 2, 0xA,2, 0xA, 2, 0x12,2, 0xA, 2, 0xA,2, 0xA, 0x20,0x20,0x20,0x20, |
49 | | 4, 0xC,4, 0xC, 4, 0x20,4, 0xC, 4, 0xC,4, 0xC, 0x20,0x20,0x20,0x20, |
50 | | 0x20,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, |
51 | | 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, |
52 | | 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, |
53 | | }; |
54 | | |
55 | | static wc_uint16 CCS_MAP[ 33 ] = { |
56 | | WC_CCS_A_CS94 >> 8, WC_CCS_A_CS94W >> 8, |
57 | | WC_CCS_A_CS96 >> 8, WC_CCS_A_CS96W >> 8, |
58 | | WC_CCS_A_CS942 >> 8, WC_CCS_A_UNKNOWN >> 8, |
59 | | WC_CCS_A_PCS >> 8, WC_CCS_A_PCSW >> 8, |
60 | | WC_CCS_A_WCS16 >> 8, WC_CCS_A_WCS16W >> 8, |
61 | | WC_CCS_A_WCS32 >> 8, WC_CCS_A_WCS32W >> 8, |
62 | | 0, 0, |
63 | | 0, 0, |
64 | | WC_CCS_A_CS94_C >> 8, WC_CCS_A_CS94W_C >> 8, |
65 | | WC_CCS_A_CS96_C >> 8, WC_CCS_A_CS96W_C >> 8, |
66 | | WC_CCS_A_CS942_C >> 8, 0, |
67 | | WC_CCS_A_PCS_C >> 8, WC_CCS_A_PCSW_C >> 8, |
68 | | WC_CCS_A_WCS16_C >> 8, WC_CCS_A_WCS16W_C >> 8, |
69 | | WC_CCS_A_WCS32_C >> 8, WC_CCS_A_WCS32W_C >> 8, |
70 | | 0, 0, |
71 | | 0, 0, |
72 | | 0, |
73 | | }; |
74 | | |
75 | | wc_ccs wtf_gr_ccs = 0; |
76 | | static wc_ces wtf_major_ces = WC_CES_US_ASCII; |
77 | | static wc_status wtf_major_st; |
78 | | |
79 | | void |
80 | | wtf_init(wc_ces ces1, wc_ces ces2) |
81 | 1 | { |
82 | 1 | int i; |
83 | 1 | wc_gset *gset; |
84 | | |
85 | 1 | if (wc_check_ces(ces2)) |
86 | 1 | wtf_major_ces = ces2; |
87 | | |
88 | 1 | if (! wc_check_ces(ces1)) |
89 | 0 | return; |
90 | 1 | gset = WcCesInfo[WC_CES_INDEX(ces1)].gset; |
91 | 1 | if (gset == NULL || gset[1].ccs == 0 || |
92 | 1 | gset[1].ccs & (WC_CCS_A_WCS16|WC_CCS_A_WCS32)) |
93 | 1 | return; |
94 | 0 | wtf_gr_ccs = gset[1].ccs; |
95 | |
|
96 | 0 | if (WC_CCS_IS_WIDE(wtf_gr_ccs)) { |
97 | 0 | for (i = 0xa1; i <= 0xff; i++) { |
98 | 0 | WTF_WIDTH_MAP[i] = 2; |
99 | 0 | WTF_LEN_MAP[i] = 2; |
100 | 0 | WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1W; |
101 | 0 | } |
102 | 0 | } else { |
103 | 0 | for (i = 0xa1; i <= 0xff; i++) { |
104 | 0 | WTF_WIDTH_MAP[i] = 1; |
105 | 0 | WTF_LEN_MAP[i] = 1; |
106 | 0 | WTF_TYPE_MAP[i] = WTF_TYPE_WCHAR1; |
107 | 0 | } |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | | /* |
112 | | int |
113 | | wtf_width(wc_uchar *p) |
114 | | { |
115 | | return (int)WTF_WIDTH_MAP[*p]; |
116 | | } |
117 | | */ |
118 | | |
119 | | int |
120 | | wtf_strwidth(wc_uchar *p) |
121 | 0 | { |
122 | 0 | int w = 0; |
123 | 0 | wc_uchar *q = p + strlen((char *)p); |
124 | |
|
125 | 0 | while (p < q) { |
126 | 0 | w += wtf_width(p); |
127 | 0 | p += WTF_LEN_MAP[*p]; |
128 | 0 | } |
129 | 0 | return w; |
130 | 0 | } |
131 | | |
132 | | size_t |
133 | | wtf_len1(wc_uchar *p) |
134 | 0 | { |
135 | 0 | size_t len, len_max = WTF_LEN_MAP[*p]; |
136 | |
|
137 | 0 | for (len = 0; *(p + len); len++) |
138 | 0 | if (len == len_max) |
139 | 0 | break; |
140 | 0 | if (len == 0) |
141 | 0 | len = 1; |
142 | 0 | return len; |
143 | 0 | } |
144 | | |
145 | | size_t |
146 | | wtf_len(wc_uchar *p) |
147 | 0 | { |
148 | 0 | wc_uchar *q = p; |
149 | 0 | wc_uchar *strz = p + strlen((char *)p); |
150 | |
|
151 | 0 | q += WTF_LEN_MAP[*q]; |
152 | 0 | while (q < strz && ! WTF_WIDTH_MAP[*q]) |
153 | 0 | q += WTF_LEN_MAP[*q]; |
154 | 0 | return q - p; |
155 | 0 | } |
156 | | |
157 | | /* |
158 | | int |
159 | | wtf_type(wc_uchar *p) |
160 | | { |
161 | | return (int)WTF_TYPE_MAP[*p]; |
162 | | } |
163 | | */ |
164 | | |
165 | | #define wcs16_to_wtf(c, p) \ |
166 | 56.3k | ((p)[0] = (((c) >> 14) & 0x03) | 0x80), \ |
167 | 56.3k | ((p)[1] = (((c) >> 7) & 0x7f) | 0x80), \ |
168 | 56.3k | ((p)[2] = ( (c) & 0x7f) | 0x80) |
169 | | #define wcs32_to_wtf(c, p) \ |
170 | 41.6M | ((p)[0] = (((c) >> 28) & 0x0f) | 0x80), \ |
171 | 41.6M | ((p)[1] = (((c) >> 21) & 0x7f) | 0x80), \ |
172 | 41.6M | ((p)[2] = (((c) >> 14) & 0x7f) | 0x80), \ |
173 | 41.6M | ((p)[3] = (((c) >> 7) & 0x7f) | 0x80), \ |
174 | 41.6M | ((p)[4] = ( (c) & 0x7f) | 0x80) |
175 | | #define wtf_to_wcs16(p) \ |
176 | 59.4k | ((p)[0] == 0 || (p)[1] == 0 || (p)[2] == 0 ? 0 : \ |
177 | 59.4k | ((wc_uint32)((p)[0] & 0x03) << 14) \ |
178 | 59.4k | | ((wc_uint32)((p)[1] & 0x7f) << 7) \ |
179 | 59.4k | | ((wc_uint32)((p)[2] & 0x7f) )) |
180 | | #define wtf_to_wcs32(p) \ |
181 | 41.7M | ((p)[0] == 0 || (p)[1] == 0 || (p)[2] == 0 || (p)[3] == 0 || (p)[4] == 0 ? 0 : \ |
182 | 41.7M | ((wc_uint32)((p)[0] & 0x0f) << 28) \ |
183 | 41.7M | | ((wc_uint32)((p)[1] & 0x7f) << 21) \ |
184 | 41.7M | | ((wc_uint32)((p)[2] & 0x7f) << 14) \ |
185 | 41.7M | | ((wc_uint32)((p)[3] & 0x7f) << 7) \ |
186 | 41.7M | | ((wc_uint32)((p)[4] & 0x7f) )) |
187 | | |
188 | | void |
189 | | wtf_push(Str os, wc_ccs ccs, wc_uint32 code) |
190 | 270M | { |
191 | 270M | wc_uchar s[8]; |
192 | 270M | wc_wchar_t cc, cc2; |
193 | 270M | size_t n; |
194 | | |
195 | 270M | if (ccs == WC_CCS_US_ASCII) { |
196 | 999k | Strcat_char(os, (char)(code & 0x7f)); |
197 | 999k | return; |
198 | 999k | } |
199 | 269M | cc.ccs = ccs; |
200 | 269M | cc.code = code; |
201 | 269M | if (WcOption.pre_conv && !(cc.ccs & WC_CCS_A_UNKNOWN)) { |
202 | 0 | if ((ccs == WC_CCS_JOHAB || ccs == WC_CCS_JOHAB_1 || |
203 | 0 | ccs == WC_CCS_JOHAB_2 || ccs == WC_CCS_JOHAB_3) && |
204 | 0 | (wtf_major_ces == WC_CES_EUC_KR || |
205 | 0 | wtf_major_ces == WC_CES_ISO_2022_KR)) { |
206 | 0 | cc2 = wc_johab_to_ksx1001(cc); |
207 | 0 | if (!WC_CCS_IS_UNKNOWN(cc2.ccs)) |
208 | 0 | cc = cc2; |
209 | 0 | } else if (ccs == WC_CCS_KS_X_1001 && |
210 | 0 | wtf_major_ces == WC_CES_JOHAB) { |
211 | 0 | cc2 = wc_ksx1001_to_johab(cc); |
212 | 0 | if (!WC_CCS_IS_UNKNOWN(cc2.ccs)) |
213 | 0 | cc = cc2; |
214 | 0 | } |
215 | 0 | #ifdef USE_UNICODE |
216 | 0 | else if (WcOption.ucs_conv) { |
217 | 0 | wc_bool fix_width_conv = WcOption.fix_width_conv; |
218 | 0 | WcOption.fix_width_conv = WC_FALSE; |
219 | 0 | wc_output_init(wtf_major_ces, &wtf_major_st); |
220 | 0 | if (! wc_ces_has_ccs(WC_CCS_SET(ccs), &wtf_major_st)) { |
221 | 0 | cc2 = wc_any_to_any_ces(cc, &wtf_major_st); |
222 | 0 | if (cc2.ccs == WC_CCS_US_ASCII) { |
223 | 0 | Strcat_char(os, (char)(cc2.code & 0x7f)); |
224 | 0 | return; |
225 | 0 | } |
226 | 0 | if (!WC_CCS_IS_UNKNOWN(cc2.ccs) && |
227 | 0 | cc2.ccs != WC_CCS_CP1258_2 && |
228 | 0 | cc2.ccs != WC_CCS_TCVN_5712_3) |
229 | 0 | cc = cc2; |
230 | 0 | } |
231 | 0 | WcOption.fix_width_conv = fix_width_conv; |
232 | 0 | } |
233 | 0 | #endif |
234 | 0 | } |
235 | | |
236 | 269M | switch (WC_CCS_TYPE(cc.ccs)) { |
237 | 77.6M | case WC_CCS_A_CS94: |
238 | 77.6M | if (cc.ccs == wtf_gr_ccs) { |
239 | 0 | s[0] = (cc.code & 0x7f) | 0x80; |
240 | 0 | n = 1; |
241 | 0 | break; |
242 | 0 | } |
243 | 77.6M | if (cc.ccs == WC_CCS_JIS_X_0201K && !WcOption.use_jisx0201k) { |
244 | 77.5M | cc2 = wc_jisx0201k_to_jisx0208(cc); |
245 | 77.5M | if (!WC_CCS_IS_UNKNOWN(cc2.ccs)) { |
246 | 77.1M | wtf_push(os, cc2.ccs, cc2.code); |
247 | 77.1M | return; |
248 | 77.1M | } |
249 | 77.5M | } |
250 | 489k | s[0] = WTF_C_CS94; |
251 | 489k | s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; |
252 | 489k | s[2] = (cc.code & 0x7f) | 0x80; |
253 | 489k | n = 3; |
254 | 489k | break; |
255 | 82.1M | case WC_CCS_A_CS94W: |
256 | 82.1M | if (cc.ccs == wtf_gr_ccs) { |
257 | 0 | s[0] = ((cc.code >> 8) & 0x7f) | 0x80; |
258 | 0 | s[1] = ( cc.code & 0x7f) | 0x80; |
259 | 0 | n = 2; |
260 | 0 | break; |
261 | 0 | } |
262 | 82.1M | s[0] = WTF_C_CS94W; |
263 | 82.1M | s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; |
264 | 82.1M | s[2] = ((cc.code >> 8) & 0x7f) | 0x80; |
265 | 82.1M | s[3] = ( cc.code & 0x7f) | 0x80; |
266 | 82.1M | n = 4; |
267 | 82.1M | break; |
268 | 4.40M | case WC_CCS_A_CS96: |
269 | 4.40M | if (WcOption.use_combining && wc_is_combining(cc)) |
270 | 8.90k | s[0] = WTF_C_CS96_C; |
271 | 4.39M | else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) { |
272 | 0 | s[0] = (cc.code & 0x7f) | 0x80; |
273 | 0 | n = 1; |
274 | 0 | break; |
275 | 0 | } else |
276 | 4.39M | s[0] = WTF_C_CS96; |
277 | 4.40M | s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; |
278 | 4.40M | s[2] = (cc.code & 0x7f) | 0x80; |
279 | 4.40M | n = 3; |
280 | 4.40M | break; |
281 | 4.25k | case WC_CCS_A_CS96W: |
282 | 4.25k | if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) { |
283 | 0 | s[0] = ((cc.code >> 8) & 0x7f) | 0x80; |
284 | 0 | s[1] = ( cc.code & 0x7f) | 0x80; |
285 | 0 | n = 2; |
286 | 0 | break; |
287 | 0 | } |
288 | 4.25k | s[0] = WTF_C_CS96W; |
289 | 4.25k | s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; |
290 | 4.25k | s[2] = ((cc.code >> 8) & 0x7f) | 0x80; |
291 | 4.25k | s[3] = ( cc.code & 0x7f) | 0x80; |
292 | 4.25k | n = 4; |
293 | 4.25k | break; |
294 | 23.3k | case WC_CCS_A_CS942: |
295 | 23.3k | if (cc.ccs == wtf_gr_ccs) { |
296 | 0 | s[0] = (cc.code & 0x7f) | 0x80; |
297 | 0 | n = 1; |
298 | 0 | break; |
299 | 0 | } |
300 | 23.3k | s[0] = WTF_C_CS942; |
301 | 23.3k | s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; |
302 | 23.3k | s[2] = (cc.code & 0x7f) | 0x80; |
303 | 23.3k | n = 3; |
304 | 23.3k | break; |
305 | 18.3M | case WC_CCS_A_PCS: |
306 | 18.3M | if (WcOption.use_combining && wc_is_combining(cc)) |
307 | 51.7k | s[0] = WTF_C_PCS_C; |
308 | 18.3M | else if (cc.ccs == wtf_gr_ccs && (cc.code & 0x7f) > 0x20) { |
309 | 0 | s[0] = (cc.code & 0x7f) | 0x80; |
310 | 0 | n = 1; |
311 | 0 | break; |
312 | 0 | } else |
313 | 18.3M | s[0] = WTF_C_PCS; |
314 | 18.3M | s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; |
315 | 18.3M | s[2] = (cc.code & 0x7f) | 0x80; |
316 | 18.3M | n = 3; |
317 | 18.3M | break; |
318 | 7.77M | case WC_CCS_A_PCSW: |
319 | 7.77M | switch (cc.ccs) { |
320 | 105k | case WC_CCS_SJIS_EXT: |
321 | 105k | cc = wc_sjis_ext_to_cs94w(cc); |
322 | 105k | break; |
323 | 3.82M | case WC_CCS_GBK: |
324 | 3.82M | cc = wc_gbk_to_cs128w(cc); |
325 | 3.82M | break; |
326 | 457k | case WC_CCS_GBK_EXT: |
327 | 457k | cc = wc_gbk_ext_to_cs128w(cc); |
328 | 457k | break; |
329 | 666k | case WC_CCS_BIG5: |
330 | 666k | cc = wc_big5_to_cs94w(cc); |
331 | 666k | break; |
332 | 25.5k | case WC_CCS_HKSCS: |
333 | 25.5k | cc = wc_hkscs_to_cs128w(cc); |
334 | 25.5k | break; |
335 | 1.52M | case WC_CCS_JOHAB: |
336 | 1.52M | cc = wc_johab_to_cs128w(cc); |
337 | 1.52M | break; |
338 | 1.14M | case WC_CCS_UHC: |
339 | 1.14M | cc = wc_uhc_to_cs128w(cc); |
340 | 1.14M | break; |
341 | 7.77M | } |
342 | 7.77M | if (cc.ccs == wtf_gr_ccs && ((cc.code >> 8) & 0x7f) > 0x20) { |
343 | 0 | s[0] = ((cc.code >> 8) & 0x7f) | 0x80; |
344 | 0 | s[1] = ( cc.code & 0x7f) | 0x80; |
345 | 0 | n = 2; |
346 | 0 | break; |
347 | 0 | } |
348 | 7.77M | s[0] = WTF_C_PCSW; |
349 | 7.77M | s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; |
350 | 7.77M | s[2] = ((cc.code >> 8) & 0x7f) | 0x80; |
351 | 7.77M | s[3] = ( cc.code & 0x7f) | 0x80; |
352 | 7.77M | n = 4; |
353 | 7.77M | break; |
354 | 56.3k | case WC_CCS_A_WCS16: |
355 | 56.3k | s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS16W : WTF_C_WCS16) |
356 | 56.3k | | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0); |
357 | 56.3k | wcs16_to_wtf(cc.code, s + 1); |
358 | 56.3k | s[1] |= (WC_CCS_INDEX(cc.ccs) << 2); |
359 | 56.3k | n = 4; |
360 | 56.3k | break; |
361 | 41.6M | case WC_CCS_A_WCS32: |
362 | 41.6M | s[0] = (WC_CCS_IS_WIDE(cc.ccs) ? WTF_C_WCS32W : WTF_C_WCS32) |
363 | 41.6M | | (WC_CCS_IS_COMB(cc.ccs) ? WTF_C_COMB : 0); |
364 | 41.6M | wcs32_to_wtf(cc.code, s + 1); |
365 | 41.6M | s[1] |= (WC_CCS_INDEX(cc.ccs) << 4); |
366 | 41.6M | n = 6; |
367 | 41.6M | break; |
368 | 37.0M | default: |
369 | 37.0M | s[0] = WTF_C_UNKNOWN; |
370 | 37.0M | s[1] = WC_CCS_INDEX(cc.ccs) | 0x80; |
371 | 37.0M | s[2] = (cc.code & 0x7f) | 0x80; |
372 | 37.0M | n = 3; |
373 | 37.0M | break; |
374 | 269M | } |
375 | 192M | Strcat_charp_n(os, (char *)s, n); |
376 | 192M | } |
377 | | |
378 | | void |
379 | | wtf_push_unknown(Str os, wc_uchar *p, size_t len) |
380 | 28.0M | { |
381 | 59.4M | for (; len--; p++) { |
382 | 31.4M | if (*p & 0x80) |
383 | 29.3M | wtf_push(os, WC_CCS_UNKNOWN, *p); |
384 | 2.09M | else |
385 | 2.09M | Strcat_char(os, (char)*p); |
386 | 31.4M | } |
387 | 28.0M | } |
388 | | |
389 | | wc_wchar_t |
390 | | wtf_parse1(wc_uchar **p) |
391 | 192M | { |
392 | 192M | wc_uchar *q = *p; |
393 | 192M | wc_wchar_t cc; |
394 | | |
395 | 192M | if (*q < 0x80) { |
396 | 494 | cc.ccs = WC_CCS_US_ASCII; |
397 | 494 | cc.code = *(q++); |
398 | 192M | } else if (*q > 0xa0) { |
399 | 4.77k | cc.ccs = wtf_gr_ccs; |
400 | 4.77k | if (WC_CCS_IS_WIDE(cc.ccs) && *(q+1)) { |
401 | 0 | cc.code = ((wc_uint32)*q << 8) | *(q+1); |
402 | 0 | q += 2; |
403 | 0 | } else |
404 | 4.77k | cc.code = *(q++); |
405 | 192M | } else { |
406 | 192M | cc.ccs = (wc_uint32)CCS_MAP[*(q++) - 0x80] << 8; |
407 | 192M | switch (WC_CCS_TYPE(cc.ccs)) { |
408 | 505k | case WC_CCS_A_CS94: |
409 | 4.92M | case WC_CCS_A_CS96: |
410 | 4.94M | case WC_CCS_A_CS942: |
411 | 23.3M | case WC_CCS_A_PCS: |
412 | 60.4M | case WC_CCS_A_UNKNOWN: |
413 | 60.4M | if (*q && *(q+1)) { |
414 | 60.4M | cc.ccs |= *(q++) & 0x7f; |
415 | 60.4M | cc.code = *(q++); |
416 | 60.4M | } else { |
417 | 2.38k | cc.ccs = WC_CCS_US_ASCII; |
418 | 2.38k | cc.code = (wc_uint32)' '; |
419 | 2.38k | } |
420 | 60.4M | break; |
421 | 82.1M | case WC_CCS_A_CS94W: |
422 | 82.1M | case WC_CCS_A_CS96W: |
423 | 89.9M | case WC_CCS_A_PCSW: |
424 | 89.9M | if (*q && *(q+1) && *(q+2)) { |
425 | 89.9M | cc.ccs |= *(q++) & 0x7f; |
426 | 89.9M | cc.code = ((wc_uint32)*q << 8) | *(q+1); |
427 | 89.9M | q += 2; |
428 | 89.9M | } else { |
429 | 1.55k | cc.ccs = WC_CCS_US_ASCII; |
430 | 1.55k | cc.code = (wc_uint32)' '; |
431 | 1.55k | } |
432 | 89.9M | break; |
433 | 61.1k | case WC_CCS_A_WCS16: |
434 | 61.1k | case WC_CCS_A_WCS16W: |
435 | 61.1k | if (*q && *(q+1) && *(q+2)) { |
436 | 59.4k | cc.ccs |= (*q & 0x7c) >> 2; |
437 | 59.4k | cc.code = wtf_to_wcs16(q); |
438 | 59.4k | q += 3; |
439 | 59.4k | } else { |
440 | 1.67k | cc.ccs = WC_CCS_US_ASCII; |
441 | 1.67k | cc.code = (wc_uint32)' '; |
442 | 1.67k | } |
443 | 61.1k | break; |
444 | 41.7M | case WC_CCS_A_WCS32: |
445 | 41.7M | case WC_CCS_A_WCS32W: |
446 | 41.7M | if (*q && *(q+1) && *(q+2) && *(q+3) && *(q+4)) { |
447 | 41.7M | cc.ccs |= (*q & 0x70) >> 4; |
448 | 41.7M | cc.code = wtf_to_wcs32(q); |
449 | 41.7M | q += 5; |
450 | 41.7M | } else { |
451 | 2.69k | cc.ccs = WC_CCS_US_ASCII; |
452 | 2.69k | cc.code = (wc_uint32)' '; |
453 | 2.69k | } |
454 | 41.7M | break; |
455 | 988 | default: |
456 | | /* case 0: */ |
457 | 988 | cc.ccs = WC_CCS_US_ASCII; |
458 | 988 | cc.code = (wc_uint32)' '; |
459 | 988 | break; |
460 | 192M | } |
461 | 192M | } |
462 | | |
463 | 192M | *p = q; |
464 | 192M | switch (cc.ccs) { |
465 | 74.3k | case WC_CCS_SJIS_EXT_1: |
466 | 105k | case WC_CCS_SJIS_EXT_2: |
467 | 105k | return wc_cs94w_to_sjis_ext(cc); |
468 | 3.78M | case WC_CCS_GBK_1: |
469 | 3.81M | case WC_CCS_GBK_2: |
470 | 3.81M | return wc_cs128w_to_gbk(cc); |
471 | 436k | case WC_CCS_GBK_EXT_1: |
472 | 447k | case WC_CCS_GBK_EXT_2: |
473 | 447k | return wc_cs128w_to_gbk_ext(cc); |
474 | 402k | case WC_CCS_BIG5_1: |
475 | 666k | case WC_CCS_BIG5_2: |
476 | 666k | return wc_cs94w_to_big5(cc); |
477 | 16.4k | case WC_CCS_HKSCS_1: |
478 | 25.5k | case WC_CCS_HKSCS_2: |
479 | 25.5k | return wc_cs128w_to_hkscs(cc); |
480 | 1.38M | case WC_CCS_JOHAB_1: |
481 | 1.45M | case WC_CCS_JOHAB_2: |
482 | 1.52M | case WC_CCS_JOHAB_3: |
483 | 1.52M | return wc_cs128w_to_johab(cc); |
484 | 1.14M | case WC_CCS_UHC_1: |
485 | 1.14M | case WC_CCS_UHC_2: |
486 | 1.14M | return wc_cs128w_to_uhc(cc); |
487 | 192M | } |
488 | 184M | return cc; |
489 | 192M | } |
490 | | |
491 | | wc_wchar_t |
492 | | wtf_parse(wc_uchar **p) |
493 | 203M | { |
494 | 203M | wc_uchar *q; |
495 | 203M | wc_wchar_t cc, cc2; |
496 | 203M | wc_uint32 ucs, ucs2; |
497 | | |
498 | 203M | if (**p < 0x80) { |
499 | 11.0M | cc.ccs = WC_CCS_US_ASCII; |
500 | 11.0M | cc.code = *((*p)++); |
501 | 11.0M | } else |
502 | 192M | cc = wtf_parse1(p); |
503 | 203M | if ((! WcOption.use_combining) || WTF_WIDTH_MAP[**p]) |
504 | 203M | return cc; |
505 | | |
506 | 98.4k | q = *p; |
507 | 98.4k | cc2 = wtf_parse1(&q); |
508 | 98.4k | if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_CP1258_1) && |
509 | 41.2k | WC_CCS_SET(cc2.ccs) == WC_CCS_CP1258_1) { |
510 | 18.5k | cc2.code = wc_cp1258_precompose(cc.code, cc2.code); |
511 | 18.5k | if (cc2.code) { |
512 | 1.93k | cc2.ccs = WC_CCS_CP1258_2; |
513 | 1.93k | *p = q; |
514 | 1.93k | return cc2; |
515 | 1.93k | } |
516 | 79.8k | } else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_TCVN_5712_1) && |
517 | 37.6k | WC_CCS_SET(cc2.ccs) == WC_CCS_TCVN_5712_1) { |
518 | 28.9k | cc2.code = wc_tcvn5712_precompose(cc.code, cc2.code); |
519 | 28.9k | if (cc2.code) { |
520 | 3.05k | cc2.ccs = WC_CCS_TCVN_5712_3; |
521 | 3.05k | *p = q; |
522 | 3.05k | return cc2; |
523 | 3.05k | } |
524 | 28.9k | } |
525 | 50.8k | #ifdef USE_UNICODE |
526 | 50.8k | else if ((cc.ccs == WC_CCS_US_ASCII || cc.ccs == WC_CCS_ISO_8859_1 || |
527 | 42.0k | WC_CCS_IS_UNICODE(cc.ccs)) && WC_CCS_IS_UNICODE(cc2.ccs)) { |
528 | 20.3k | while (1) { |
529 | 20.3k | ucs = (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG) |
530 | 20.3k | ? wc_ucs_tag_to_ucs(cc.code) : cc.code; |
531 | 20.3k | ucs2 = (WC_CCS_SET(cc2.ccs) == WC_CCS_UCS_TAG) |
532 | 20.3k | ? wc_ucs_tag_to_ucs(cc2.code) : cc2.code; |
533 | 20.3k | ucs = wc_ucs_precompose(ucs, ucs2); |
534 | 20.3k | if (ucs == WC_C_UCS4_ERROR) |
535 | 18.3k | break; |
536 | 2.06k | if (WC_CCS_SET(cc.ccs) == WC_CCS_UCS_TAG) |
537 | 819 | cc.code = wc_ucs_to_ucs_tag(ucs, wc_ucs_tag_to_tag(cc.code)); |
538 | 1.24k | else { |
539 | 1.24k | cc.ccs = wc_ucs_to_ccs(ucs); |
540 | 1.24k | cc.code = ucs; |
541 | 1.24k | } |
542 | 2.06k | *p = q; |
543 | 2.06k | if (! WTF_WIDTH_MAP[*q]) |
544 | 273 | break; |
545 | 1.79k | cc2 = wtf_parse1(&q); |
546 | 1.79k | if (! WC_CCS_IS_UNICODE(cc2.ccs)) |
547 | 609 | break; |
548 | 1.79k | } |
549 | 19.1k | } |
550 | 93.4k | #endif |
551 | 93.4k | return cc; |
552 | 98.4k | } |
553 | | |
554 | | wc_ccs |
555 | | wtf_get_ccs(wc_uchar *p) |
556 | 0 | { |
557 | 0 | return wtf_parse1(&p).ccs; |
558 | 0 | } |
559 | | |
560 | | wc_uint32 |
561 | | wtf_get_code(wc_uchar *p) |
562 | 0 | { |
563 | 0 | return wtf_parse1(&p).code; |
564 | 0 | } |
565 | | |
566 | | wc_bool |
567 | | wtf_is_hangul(wc_uchar *p) |
568 | 0 | { |
569 | 0 | if (*p > 0xa0) |
570 | 0 | return (wtf_gr_ccs == WC_CCS_KS_X_1001 || wtf_gr_ccs == WC_CCS_JOHAB_1); |
571 | 0 | else if (*p == WTF_C_CS94W) |
572 | 0 | return ((*(p + 1) & 0x7f) == WC_F_KS_X_1001); |
573 | 0 | else if (*p == WTF_C_PCSW) { |
574 | 0 | wc_uchar f = *(p + 1) & 0x7f; |
575 | 0 | return (f == WC_F_JOHAB_1 || f == WC_F_JOHAB_2 || f == WC_F_JOHAB_3 || |
576 | 0 | f == WC_F_UHC_1 || f == WC_F_UHC_2); |
577 | 0 | } |
578 | 0 | #ifdef USE_UNICODE |
579 | 0 | else if (*p == WTF_C_WCS16W) { |
580 | 0 | wc_uchar f = (*(++p) & 0x7f) >> 2; |
581 | 0 | if (f == WC_F_UCS2) |
582 | 0 | return wc_is_ucs_hangul(wtf_to_wcs16(p)); |
583 | 0 | } else if (*p == WTF_C_WCS32W) { |
584 | 0 | wc_uchar f = (*(++p) & 0x7f) >> 4; |
585 | 0 | if (f == WC_F_UCS_TAG) |
586 | 0 | return wc_is_ucs_hangul(wc_ucs_tag_to_ucs(wtf_to_wcs32(p))); |
587 | 0 | } |
588 | 0 | #endif |
589 | 0 | return WC_FALSE; |
590 | 0 | } |
591 | | |
592 | | char * |
593 | | wtf_conv_fit(char *s, wc_ces ces) |
594 | 0 | { |
595 | 0 | wc_uchar *p; |
596 | 0 | Str os; |
597 | 0 | wc_wchar_t cc; |
598 | 0 | wc_ces major_ces; |
599 | 0 | wc_bool pre_conv, ucs_conv; |
600 | |
|
601 | 0 | if (ces == WC_CES_WTF || ces == WC_CES_US_ASCII) |
602 | 0 | return s; |
603 | | |
604 | 0 | for (p = (wc_uchar *)s; *p && *p < 0x80; p++) |
605 | 0 | ; |
606 | 0 | if (! *p) |
607 | 0 | return s; |
608 | | |
609 | 0 | os = Strnew_size(strlen(s)); |
610 | 0 | if (p > (wc_uchar *)s) |
611 | 0 | Strcopy_charp_n(os, s, (int)(p - (wc_uchar *)s)); |
612 | |
|
613 | 0 | major_ces = wtf_major_ces; |
614 | 0 | pre_conv = WcOption.pre_conv; |
615 | 0 | ucs_conv = WcOption.ucs_conv; |
616 | 0 | wtf_major_ces = ces; |
617 | 0 | WcOption.pre_conv = WC_TRUE; |
618 | 0 | WcOption.ucs_conv = WC_TRUE; |
619 | 0 | while (*p) { |
620 | 0 | cc = wtf_parse1(&p); |
621 | 0 | wtf_push(os, cc.ccs, cc.code); |
622 | 0 | } |
623 | 0 | wtf_major_ces = major_ces; |
624 | 0 | WcOption.pre_conv = pre_conv; |
625 | 0 | WcOption.ucs_conv = ucs_conv; |
626 | 0 | return os->ptr; |
627 | 0 | } |