Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | #ifdef USE_UNICODE |
3 | | |
4 | | #include <stdlib.h> |
5 | | #include "wc.h" |
6 | | #include "ucs.h" |
7 | | #include "search.h" |
8 | | #include "big5.h" |
9 | | #include "hkscs.h" |
10 | | #include "sjis.h" |
11 | | #include "johab.h" |
12 | | #include "gbk.h" |
13 | | #include "gb18030.h" |
14 | | #include "uhc.h" |
15 | | #include "viet.h" |
16 | | #include "wtf.h" |
17 | | |
18 | | #include "ucs.map" |
19 | | |
20 | | #include "map/ucs_ambwidth.map" |
21 | | #include "map/ucs_wide.map" |
22 | | #include "map/ucs_combining.map" |
23 | | #include "map/ucs_precompose.map" |
24 | | #include "map/ucs_hangul.map" |
25 | | #include "map/ucs_fullwidth.map" |
26 | | #include "map/ucs_isalpha.map" |
27 | | #include "map/ucs_isdigit.map" |
28 | | #include "map/ucs_islower.map" |
29 | | #include "map/ucs_isupper.map" |
30 | | #include "map/ucs_case.map" |
31 | | |
32 | 72.7k | #define MAX_TAG_LEN (8 + 1 + 8) |
33 | 257 | #define MAX_TAG_MAP 0x100 |
34 | | static int n_tag_map = 0; |
35 | | static char *tag_map[ MAX_TAG_MAP ]; |
36 | | |
37 | | wc_table * |
38 | | wc_get_ucs_table(wc_ccs ccs) |
39 | 2.34M | { |
40 | 2.34M | int f = WC_CCS_INDEX(ccs); |
41 | | |
42 | 2.34M | switch (WC_CCS_TYPE(ccs)) { |
43 | 8.92k | case WC_CCS_A_CS94: |
44 | 8.92k | if (f < WC_F_ISO_BASE || f > WC_F_CS94_END) |
45 | 0 | return NULL; |
46 | 8.92k | return &ucs_cs94_table[f - WC_F_ISO_BASE]; |
47 | 5.83k | case WC_CCS_A_CS94W: |
48 | 5.83k | if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END) |
49 | 0 | return NULL; |
50 | 5.83k | return &ucs_cs94w_table[f - WC_F_ISO_BASE]; |
51 | 231 | case WC_CCS_A_CS96: |
52 | 231 | if (f < WC_F_ISO_BASE || f > WC_F_CS96_END) |
53 | 0 | return NULL; |
54 | 231 | return &ucs_cs96_table[f - WC_F_ISO_BASE]; |
55 | 0 | case WC_CCS_A_CS96W: |
56 | 0 | if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END) |
57 | 0 | return NULL; |
58 | 0 | return &ucs_cs96w_table[f - WC_F_ISO_BASE]; |
59 | 0 | case WC_CCS_A_CS942: |
60 | 0 | if (f < WC_F_ISO_BASE || f > WC_F_CS942_END) |
61 | 0 | return NULL; |
62 | 0 | return &ucs_cs942_table[f - WC_F_ISO_BASE]; |
63 | 935 | case WC_CCS_A_PCS: |
64 | 935 | if (f < WC_F_PCS_BASE || f > WC_F_PCS_END) |
65 | 0 | return NULL; |
66 | 935 | return &ucs_pcs_table[f - WC_F_PCS_BASE]; |
67 | 2.32M | case WC_CCS_A_PCSW: |
68 | 2.32M | if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END) |
69 | 0 | return NULL; |
70 | 2.32M | return &ucs_pcsw_table[f - WC_F_PCS_BASE]; |
71 | 3.38k | default: |
72 | 3.38k | return NULL; |
73 | 2.34M | } |
74 | 2.34M | } |
75 | | |
76 | | wc_wchar_t |
77 | | wc_ucs_to_any(wc_uint32 ucs, wc_table *t) |
78 | 134M | { |
79 | 134M | wc_wchar_t cc; |
80 | 134M | wc_map *map; |
81 | | |
82 | 134M | if (t && t->map && ucs && ucs <= WC_C_UCS2_END) { |
83 | 134M | map = wc_map_search((wc_uint16)ucs, t->map, t->n); |
84 | 134M | if (map) |
85 | 41.2M | return t->conv(t->ccs, map->code2); |
86 | 134M | } |
87 | 93.0M | if (t && (ucs & ~0xFFFF) == WC_C_UCS4_PLANE2) { |
88 | 9.36k | if (t->ccs == WC_CCS_JIS_X_0213_1) |
89 | 0 | map = wc_map_search((wc_uint16)(ucs & 0xffff), |
90 | 0 | ucs_p2_jisx02131_map, N_ucs_p2_jisx02131_map); |
91 | 9.36k | else if (t->ccs == WC_CCS_JIS_X_0213_2) |
92 | 0 | map = wc_map_search((wc_uint16)(ucs & 0xffff), |
93 | 0 | ucs_p2_jisx02132_map, N_ucs_p2_jisx02132_map); |
94 | 9.36k | else if (t->ccs == WC_CCS_HKSCS || |
95 | 9.36k | t->ccs == WC_CCS_HKSCS_1 || t->ccs == WC_CCS_HKSCS_2) |
96 | 1.71k | map = wc_map_search((wc_uint16)(ucs & 0xffff), |
97 | 1.71k | ucs_p2_hkscs_map, N_ucs_p2_hkscs_map); |
98 | 7.64k | else |
99 | 7.64k | map = NULL; |
100 | 9.36k | if (map) |
101 | 214 | return t->conv(t->ccs, map->code2); |
102 | 9.36k | } |
103 | 93.0M | cc.ccs = WC_CCS_UNKNOWN; |
104 | 93.0M | cc.code = 0; |
105 | 93.0M | return cc; |
106 | 93.0M | } |
107 | | |
108 | | wc_uint32 |
109 | | wc_any_to_ucs(wc_wchar_t cc) |
110 | 98.0M | { |
111 | 98.0M | int f; |
112 | 98.0M | wc_uint16 *map = NULL; |
113 | 98.0M | wc_uint32 map_size = 0x80; |
114 | 98.0M | wc_map *map2; |
115 | | |
116 | 98.0M | f = WC_CCS_INDEX(cc.ccs); |
117 | 98.0M | switch (WC_CCS_TYPE(cc.ccs)) { |
118 | 437k | case WC_CCS_A_CS94: |
119 | 437k | if (cc.ccs == WC_CCS_US_ASCII) |
120 | 0 | return cc.code; |
121 | 437k | if (f < WC_F_ISO_BASE || f > WC_F_CS94_END) |
122 | 65.7k | return WC_C_UCS4_ERROR; |
123 | 371k | map = cs94_ucs_map[f - WC_F_ISO_BASE]; |
124 | 371k | cc.code &= 0x7f; |
125 | 371k | break; |
126 | 68.9M | case WC_CCS_A_CS94W: |
127 | 68.9M | if (cc.ccs == WC_CCS_GB_2312 && WcOption.use_gb12345_map) { |
128 | 0 | cc.ccs = WC_CCS_GB_12345; |
129 | 0 | return wc_any_to_ucs(cc); |
130 | 68.9M | } else if (cc.ccs == WC_CCS_JIS_X_0213_1) { |
131 | 75.5k | map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f), |
132 | 75.5k | jisx02131_ucs_p2_map, N_jisx02131_ucs_p2_map); |
133 | 75.5k | if (map2) |
134 | 711 | return map2->code2 | WC_C_UCS4_PLANE2; |
135 | 68.9M | } else if (cc.ccs == WC_CCS_JIS_X_0213_2) { |
136 | 252k | map2 = wc_map_search((wc_uint16)(cc.code & 0x7f7f), |
137 | 252k | jisx02132_ucs_p2_map, N_jisx02132_ucs_p2_map); |
138 | 252k | if (map2) |
139 | 2.73k | return map2->code2 | WC_C_UCS4_PLANE2; |
140 | 252k | } |
141 | 68.9M | if (f < WC_F_ISO_BASE || f > WC_F_CS94W_END) |
142 | 514k | return 0; |
143 | 68.4M | map = cs94w_ucs_map[f - WC_F_ISO_BASE]; |
144 | 68.4M | map_size = cs94w_ucs_map_size[f - WC_F_ISO_BASE]; |
145 | 68.4M | cc.code = WC_CS94W_N(cc.code); |
146 | 68.4M | break; |
147 | 4.70M | case WC_CCS_A_CS96: |
148 | 4.70M | if (f < WC_F_ISO_BASE || f > WC_F_CS96_END) |
149 | 18.2k | return WC_C_UCS4_ERROR; |
150 | 4.68M | map = cs96_ucs_map[f - WC_F_ISO_BASE]; |
151 | 4.68M | cc.code &= 0x7f; |
152 | 4.68M | break; |
153 | 21.6k | case WC_CCS_A_CS96W: |
154 | 21.6k | if (f < WC_F_ISO_BASE || f > WC_F_CS96W_END) |
155 | 21.6k | return WC_C_UCS4_ERROR; |
156 | 0 | map = cs96w_ucs_map[f - WC_F_ISO_BASE]; |
157 | 0 | map_size = cs96w_ucs_map_size[f - WC_F_ISO_BASE]; |
158 | 0 | cc.code = WC_CS96W_N(cc.code); |
159 | 0 | break; |
160 | 21.9k | case WC_CCS_A_CS942: |
161 | 21.9k | if (f < WC_F_ISO_BASE || f > WC_F_CS942_END) |
162 | 21.9k | return WC_C_UCS4_ERROR; |
163 | 0 | map = cs942_ucs_map[f - WC_F_ISO_BASE]; |
164 | 0 | cc.code &= 0x7f; |
165 | 0 | break; |
166 | 5.91M | case WC_CCS_A_PCS: |
167 | 5.91M | if (f < WC_F_PCS_BASE || f > WC_F_PCS_END) |
168 | 410 | return WC_C_UCS4_ERROR; |
169 | 5.91M | switch (cc.ccs) { |
170 | 449 | case WC_CCS_CP1258_2: |
171 | 449 | map2 = wc_map_search((wc_uint16)cc.code, |
172 | 449 | cp12582_ucs_map, N_cp12582_ucs_map); |
173 | 449 | if (map2) |
174 | 252 | return map2->code2; |
175 | 197 | return WC_C_UCS4_ERROR; |
176 | 300 | case WC_CCS_TCVN_5712_3: |
177 | 300 | return wc_any_to_ucs(wc_tcvn57123_to_tcvn5712(cc)); |
178 | 21.6k | case WC_CCS_GBK_80: |
179 | 21.6k | return WC_C_UCS2_EURO; |
180 | 5.91M | } |
181 | 5.89M | map = pcs_ucs_map[f - WC_F_PCS_BASE]; |
182 | 5.89M | map_size = pcs_ucs_map_size[f - WC_F_PCS_BASE]; |
183 | 5.89M | cc.code &= 0x7f; |
184 | 5.89M | break; |
185 | 1.39M | case WC_CCS_A_PCSW: |
186 | 1.39M | if (f < WC_F_PCS_BASE || f > WC_F_PCSW_END) |
187 | 596 | return WC_C_UCS4_ERROR; |
188 | 1.39M | map = pcsw_ucs_map[f - WC_F_PCS_BASE]; |
189 | 1.39M | map_size = pcsw_ucs_map_size[f - WC_F_PCS_BASE]; |
190 | 1.39M | switch (cc.ccs) { |
191 | 107k | case WC_CCS_BIG5: |
192 | 107k | cc.code = WC_BIG5_N(cc.code); |
193 | 107k | break; |
194 | 0 | case WC_CCS_BIG5_2: |
195 | 0 | cc.code = WC_CS94W_N(cc.code) + WC_C_BIG5_2_BASE; |
196 | 0 | break; |
197 | 0 | case WC_CCS_HKSCS_1: |
198 | 0 | case WC_CCS_HKSCS_2: |
199 | 0 | cc = wc_cs128w_to_hkscs(cc); |
200 | 1.18k | case WC_CCS_HKSCS: |
201 | 1.18k | map2 = wc_map_search((wc_uint16)cc.code, |
202 | 1.18k | hkscs_ucs_p2_map, N_hkscs_ucs_p2_map); |
203 | 1.18k | if (map2) |
204 | 398 | return map2->code2 | WC_C_UCS4_PLANE2; |
205 | 790 | cc.code = wc_hkscs_to_N(cc.code); |
206 | 790 | break; |
207 | 487k | case WC_CCS_JOHAB: |
208 | 487k | return wc_any_to_ucs(wc_johab_to_cs128w(cc)); |
209 | 447k | case WC_CCS_JOHAB_1: |
210 | 447k | return WC_CS94x128_N(cc.code) + WC_C_UCS2_HANGUL; |
211 | 37.6k | case WC_CCS_JOHAB_2: |
212 | 37.6k | cc.code = WC_CS128W_N(cc.code); |
213 | 37.6k | cc.code = WC_N_JOHAB2(cc.code); |
214 | 37.6k | map2 = wc_map_search((wc_uint16)cc.code, |
215 | 37.6k | johab2_ucs_map, N_johab2_ucs_map); |
216 | 37.6k | if (map2) |
217 | 578 | return map2->code2; |
218 | 37.0k | return WC_C_UCS4_ERROR; |
219 | 40.9k | case WC_CCS_JOHAB_3: |
220 | 40.9k | if ((cc.code & 0x7f7f) < 0x2121) |
221 | 1.63k | return WC_C_UCS4_ERROR; |
222 | 88.9k | case WC_CCS_SJIS_EXT: |
223 | 88.9k | return wc_any_to_ucs(wc_sjis_ext_to_cs94w(cc)); |
224 | 36.6k | case WC_CCS_SJIS_EXT_1: |
225 | 36.6k | cc.code = wc_sjis_ext1_to_N(cc.code); |
226 | 36.6k | if (cc.code == WC_C_SJIS_ERROR) |
227 | 28.2k | return WC_C_UCS4_ERROR; |
228 | 8.45k | break; |
229 | 52.3k | case WC_CCS_SJIS_EXT_2: |
230 | 52.3k | cc.code = wc_sjis_ext2_to_N(cc.code); |
231 | 52.3k | if (cc.code == WC_C_SJIS_ERROR) |
232 | 41.9k | return WC_C_UCS4_ERROR; |
233 | 10.4k | break; |
234 | 10.4k | case WC_CCS_GBK_1: |
235 | 0 | case WC_CCS_GBK_2: |
236 | 0 | cc = wc_cs128w_to_gbk(cc); |
237 | 90.1k | case WC_CCS_GBK: |
238 | 90.1k | cc.code = wc_gbk_to_N(cc.code); |
239 | 90.1k | break; |
240 | 26.4k | case WC_CCS_GBK_EXT: |
241 | 26.4k | case WC_CCS_GBK_EXT_1: |
242 | 26.4k | case WC_CCS_GBK_EXT_2: |
243 | 26.4k | return wc_gb18030_to_ucs(cc); |
244 | 0 | case WC_CCS_UHC_1: |
245 | 0 | case WC_CCS_UHC_2: |
246 | 0 | cc = wc_cs128w_to_uhc(cc); |
247 | 15.6k | case WC_CCS_UHC: |
248 | 15.6k | if (cc.code > WC_C_UHC_END) |
249 | 5.44k | return WC_C_UCS4_ERROR; |
250 | 10.2k | cc.code = wc_uhc_to_N(cc.code); |
251 | 10.2k | break; |
252 | 546 | default: |
253 | 546 | cc.code = WC_CS94W_N(cc.code); |
254 | 546 | break; |
255 | 1.39M | } |
256 | 228k | break; |
257 | 228k | case WC_CCS_A_WCS16: |
258 | 7.39k | switch (WC_CCS_SET(cc.ccs)) { |
259 | 7.11k | case WC_CCS_UCS2: |
260 | 7.11k | return cc.code; |
261 | 7.39k | } |
262 | 284 | return WC_C_UCS4_ERROR; |
263 | 15.8M | case WC_CCS_A_WCS32: |
264 | 15.8M | switch (WC_CCS_SET(cc.ccs)) { |
265 | 4.59k | case WC_CCS_UCS4: |
266 | 4.59k | return cc.code; |
267 | 15.8M | case WC_CCS_UCS_TAG: |
268 | 15.8M | return wc_ucs_tag_to_ucs(cc.code); |
269 | 3.57k | case WC_CCS_GB18030: |
270 | 3.57k | return wc_gb18030_to_ucs(cc); |
271 | 15.8M | } |
272 | 335 | return WC_C_UCS4_ERROR; |
273 | 725k | case WC_CCS_A_UNKNOWN: |
274 | 725k | if (cc.ccs == WC_CCS_C1) |
275 | 725k | return (cc.code | 0x80); |
276 | 2.23k | default: |
277 | 2.23k | return WC_C_UCS4_ERROR; |
278 | 98.0M | } |
279 | 79.6M | if (map == NULL) |
280 | 308k | return WC_C_UCS4_ERROR; |
281 | 79.3M | if (map_size == 0 || cc.code > map_size - 1) |
282 | 1.85k | return WC_C_UCS4_ERROR; |
283 | 79.3M | cc.code = map[cc.code]; |
284 | 79.3M | return cc.code ? cc.code : WC_C_UCS4_ERROR; |
285 | 79.3M | } |
286 | | |
287 | | wc_wchar_t |
288 | | wc_any_to_any(wc_wchar_t cc, wc_table *t) |
289 | 38.8k | { |
290 | 38.8k | wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs); |
291 | 38.8k | wc_uint32 ucs = wc_any_to_ucs(cc); |
292 | | |
293 | 38.8k | if (ucs != WC_C_UCS4_ERROR) { |
294 | 35.8k | cc = wc_ucs_to_any(ucs, t); |
295 | 35.8k | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
296 | 16.9k | return cc; |
297 | | |
298 | 18.9k | ucs = wc_ucs_to_fullwidth(ucs); |
299 | 18.9k | if (ucs != WC_C_UCS4_ERROR) { |
300 | 0 | cc = wc_ucs_to_any(ucs, t); |
301 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
302 | 0 | return cc; |
303 | 0 | } |
304 | 18.9k | } |
305 | 21.9k | cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
306 | 21.9k | return cc; |
307 | 38.8k | } |
308 | | |
309 | | wc_wchar_t |
310 | | wc_ucs_to_any_list(wc_uint32 ucs, wc_table **tlist) |
311 | 78.2M | { |
312 | 78.2M | wc_wchar_t cc; |
313 | 78.2M | wc_table **t; |
314 | | |
315 | 78.2M | if (tlist != NULL) { |
316 | 223M | for (t = tlist; *t != NULL; t++) { |
317 | 184M | if ((*t)->map == NULL) |
318 | 52.9M | continue; |
319 | 131M | cc = wc_ucs_to_any(ucs, *t); |
320 | 131M | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
321 | 39.9M | return cc; |
322 | 131M | } |
323 | 78.2M | } |
324 | 38.2M | cc.ccs = WC_CCS_UNKNOWN; |
325 | 38.2M | return cc; |
326 | 78.2M | } |
327 | | |
328 | | wc_wchar_t |
329 | | wc_any_to_any_ces(wc_wchar_t cc, wc_status *st) |
330 | 94.3M | { |
331 | 94.3M | wc_uint32 ucs = wc_any_to_ucs(cc); |
332 | 94.3M | wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs); |
333 | | |
334 | 94.3M | if (ucs < 0x80) { |
335 | 16.1M | cc.ccs = WC_CCS_US_ASCII; |
336 | 16.1M | cc.code = ucs; |
337 | 16.1M | return cc; |
338 | 16.1M | } |
339 | 78.1M | if (ucs != WC_C_UCS4_ERROR) { |
340 | 77.3M | if (st->ces_info->id & WC_CES_T_UTF) { |
341 | 0 | cc.ccs = wc_ucs_to_ccs(ucs); |
342 | 0 | cc.code = ucs; |
343 | 0 | return cc; |
344 | 77.3M | } else if (st->ces_info->id == WC_CES_JOHAB) { |
345 | 2.26M | cc = wc_ucs_to_johab(ucs); |
346 | 2.26M | if (WC_CCS_IS_UNKNOWN(cc.ccs)) |
347 | 1.03M | cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
348 | 2.26M | return cc; |
349 | 2.26M | } |
350 | 75.1M | cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist); |
351 | 75.1M | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
352 | 37.5M | return cc; |
353 | 37.6M | if (! WcOption.fix_width_conv) { |
354 | 0 | cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw); |
355 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
356 | 0 | return cc; |
357 | 0 | } |
358 | 37.6M | if (st->ces_info->id == WC_CES_GB18030) { |
359 | 15.7M | cc = wc_ucs_to_gb18030(ucs); |
360 | 15.7M | if (WC_CCS_IS_UNKNOWN(cc.ccs)) |
361 | 10.7k | cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
362 | 15.7M | return cc; |
363 | 15.7M | } |
364 | 21.8M | if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */ |
365 | 12.2k | cc.ccs = WC_CCS_US_ASCII; |
366 | 12.2k | cc.code = 0x20; |
367 | 12.2k | return cc; |
368 | 12.2k | } |
369 | 21.8M | if (st->ces_info->id & (WC_CES_T_ISO_8859|WC_CES_T_EUC) && |
370 | 21.8M | 0x80 <= ucs && ucs <= 0x9F) { |
371 | 1.25k | cc.ccs = WC_CCS_C1; |
372 | 1.25k | cc.code = ucs; |
373 | 1.25k | return cc; |
374 | 1.25k | } |
375 | | |
376 | 21.8M | ucs = wc_ucs_to_fullwidth(ucs); |
377 | 21.8M | if (ucs != WC_C_UCS4_ERROR) { |
378 | 537k | cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist); |
379 | 537k | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
380 | 455 | return cc; |
381 | 536k | if (! WcOption.fix_width_conv) { |
382 | 0 | cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw); |
383 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
384 | 0 | return cc; |
385 | 0 | } |
386 | 536k | } |
387 | 21.8M | } |
388 | 22.6M | cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
389 | 22.6M | return cc; |
390 | 78.1M | } |
391 | | |
392 | | wc_wchar_t |
393 | | wc_any_to_iso2022(wc_wchar_t cc, wc_status *st) |
394 | 2.55M | { |
395 | 2.55M | wc_uint32 ucs = wc_any_to_ucs(cc); |
396 | 2.55M | wc_ccs is_wide = WC_CCS_IS_WIDE(cc.ccs); |
397 | | |
398 | 2.55M | if (ucs < 0x80) { |
399 | 368 | cc.ccs = WC_CCS_US_ASCII; |
400 | 368 | cc.code = ucs; |
401 | 368 | return cc; |
402 | 368 | } |
403 | 2.55M | if (ucs != WC_C_UCS4_ERROR) { |
404 | 2.53M | cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist); |
405 | 2.53M | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
406 | 2.43M | return cc; |
407 | 102k | if (! WcOption.strict_iso2022) { |
408 | 0 | cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs); |
409 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
410 | 0 | return cc; |
411 | 0 | } |
412 | 102k | if (! WcOption.fix_width_conv) { |
413 | 0 | cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw); |
414 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
415 | 0 | return cc; |
416 | 0 | if (! WcOption.strict_iso2022) { |
417 | 0 | cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs); |
418 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
419 | 0 | return cc; |
420 | 0 | } |
421 | 0 | } |
422 | 102k | if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */ |
423 | 195 | cc.ccs = WC_CCS_US_ASCII; |
424 | 195 | cc.code = 0x20; |
425 | 195 | return cc; |
426 | 195 | } |
427 | | |
428 | 102k | ucs = wc_ucs_to_fullwidth(ucs); |
429 | 102k | if (ucs != WC_C_UCS4_ERROR) { |
430 | 536 | cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlistw : st->tlist); |
431 | 536 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
432 | 211 | return cc; |
433 | 325 | if (! WcOption.strict_iso2022) { |
434 | 0 | cc = (is_wide) ? wc_ucs_to_iso2022w(ucs) : wc_ucs_to_iso2022(ucs); |
435 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
436 | 0 | return cc; |
437 | 0 | } |
438 | 325 | if (! WcOption.fix_width_conv) { |
439 | 0 | cc = wc_ucs_to_any_list(ucs, is_wide ? st->tlist : st->tlistw); |
440 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
441 | 0 | return cc; |
442 | 0 | if (! WcOption.strict_iso2022) { |
443 | 0 | cc = (is_wide) ? wc_ucs_to_iso2022(ucs) : wc_ucs_to_iso2022w(ucs); |
444 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
445 | 0 | return cc; |
446 | 0 | } |
447 | 0 | } |
448 | 325 | } |
449 | 102k | if (ucs == WC_C_UCS2_NBSP) { /* NBSP -> SP */ |
450 | 0 | cc.ccs = WC_CCS_US_ASCII; |
451 | 0 | cc.code = 0x20; |
452 | 0 | return cc; |
453 | 0 | } |
454 | 102k | } |
455 | 113k | cc.ccs = is_wide ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
456 | 113k | return cc; |
457 | 2.55M | } |
458 | | |
459 | | wc_wchar_t |
460 | | wc_ucs_to_iso2022(wc_uint32 ucs) |
461 | 0 | { |
462 | 0 | wc_table *t; |
463 | 0 | wc_wchar_t cc; |
464 | 0 | int f; |
465 | |
|
466 | 0 | if (ucs <= WC_C_UCS2_END) { |
467 | 0 | for (f = 0; f <= WC_F_CS96_END - WC_F_ISO_BASE; f++) { |
468 | 0 | t = &ucs_cs96_table[f]; |
469 | 0 | if (t->map == NULL) |
470 | 0 | continue; |
471 | 0 | cc = wc_ucs_to_any((wc_uint16)ucs, t); |
472 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
473 | 0 | return cc; |
474 | 0 | } |
475 | 0 | for (f = 0; f <= WC_F_CS94_END - WC_F_ISO_BASE; f++) { |
476 | 0 | t = &ucs_cs94_table[f]; |
477 | 0 | if (t->map == NULL) |
478 | 0 | continue; |
479 | 0 | cc = wc_ucs_to_any((wc_uint16)ucs, t); |
480 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
481 | 0 | return cc; |
482 | 0 | } |
483 | 0 | for (f = 0; f <= WC_F_CS942_END - WC_F_ISO_BASE; f++) { |
484 | 0 | t = &ucs_cs942_table[f]; |
485 | 0 | if (t->map == NULL) |
486 | 0 | continue; |
487 | 0 | cc = wc_ucs_to_any((wc_uint16)ucs, t); |
488 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
489 | 0 | return cc; |
490 | 0 | } |
491 | 0 | } |
492 | 0 | cc.ccs = WC_CCS_UNKNOWN; |
493 | 0 | return cc; |
494 | 0 | } |
495 | | |
496 | | wc_wchar_t |
497 | | wc_ucs_to_iso2022w(wc_uint32 ucs) |
498 | 0 | { |
499 | 0 | wc_table *t; |
500 | 0 | wc_wchar_t cc; |
501 | 0 | int f; |
502 | |
|
503 | 0 | if (ucs <= WC_C_UCS2_END) { |
504 | 0 | for (f = 0; f <= WC_F_CS94W_END - WC_F_ISO_BASE; f++) { |
505 | 0 | t = &ucs_cs94w_table[f]; |
506 | 0 | if (t->map == NULL) |
507 | 0 | continue; |
508 | 0 | cc = wc_ucs_to_any((wc_uint16)ucs, t); |
509 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
510 | 0 | return cc; |
511 | 0 | } |
512 | 0 | for (f = 0; f <= WC_F_CS96W_END - WC_F_ISO_BASE; f++) { |
513 | 0 | t = &ucs_cs96w_table[f]; |
514 | 0 | if (t->map == NULL) |
515 | 0 | continue; |
516 | 0 | cc = wc_ucs_to_any((wc_uint16)ucs, t); |
517 | 0 | if (!WC_CCS_IS_UNKNOWN(cc.ccs)) |
518 | 0 | return cc; |
519 | 0 | } |
520 | 0 | } |
521 | 0 | cc.ccs = WC_CCS_UNKNOWN_W; |
522 | 0 | return cc; |
523 | 0 | } |
524 | | |
525 | | wc_ccs |
526 | | wc_ucs_to_ccs(wc_uint32 ucs) |
527 | 212k | { |
528 | 212k | if (0x80 <= ucs && ucs <= 0x9F) |
529 | 2.75k | return WC_CCS_C1; |
530 | 209k | return ((ucs <= WC_C_UCS2_END) ? WC_CCS_UCS2 : WC_CCS_UCS4) |
531 | 209k | | ((WcOption.east_asian_width && wc_is_ucs_ambiguous_width(ucs)) |
532 | 209k | ? WC_CCS_A_WIDE : 0) |
533 | 209k | | (wc_is_ucs_wide(ucs) ? WC_CCS_A_WIDE : 0) |
534 | 209k | | (wc_is_ucs_combining(ucs) ? WC_CCS_A_COMB : 0); |
535 | 212k | } |
536 | | |
537 | | wc_bool |
538 | | wc_is_ucs_ambiguous_width(wc_uint32 ucs) |
539 | 0 | { |
540 | 0 | if (0xa1 <= ucs && ucs <= 0xfe && WcOption.use_jisx0213) |
541 | 0 | return 1; |
542 | 0 | else if (ucs <= WC_C_UCS2_END) |
543 | 0 | return (wc_map_range_search((wc_uint16)ucs, |
544 | 0 | ucs_ambwidth_map, N_ucs_ambwidth_map) != NULL); |
545 | 0 | else |
546 | 0 | return ((0xF0000 <= ucs && ucs <= 0xFFFFD) |
547 | 0 | || (0x100000 <= ucs && ucs <= 0x10FFFD)); |
548 | 0 | } |
549 | | |
550 | | wc_bool |
551 | | wc_is_ucs_wide(wc_uint32 ucs) |
552 | 209k | { |
553 | 209k | if (ucs <= WC_C_UCS2_END) |
554 | 203k | return (wc_map_range_search((wc_uint16)ucs, |
555 | 203k | ucs_wide_map, N_ucs_wide_map) != NULL); |
556 | 6.25k | else |
557 | 6.25k | return ((ucs & ~0xFFFF) == WC_C_UCS4_PLANE2 || |
558 | 6.25k | (ucs & ~0xFFFF) == WC_C_UCS4_PLANE3); |
559 | 209k | } |
560 | | |
561 | | wc_bool |
562 | | wc_is_ucs_combining(wc_uint32 ucs) |
563 | 209k | { |
564 | 209k | return (WcOption.use_combining && ucs <= WC_C_UCS2_END && |
565 | 209k | wc_map_range_search((wc_uint16)ucs, |
566 | 203k | ucs_combining_map, N_ucs_combining_map) != NULL); |
567 | 209k | } |
568 | | |
569 | | wc_bool |
570 | | wc_is_ucs_hangul(wc_uint32 ucs) |
571 | 0 | { |
572 | 0 | return (ucs <= WC_C_UCS2_END && |
573 | 0 | wc_map_range_search((wc_uint16)ucs, |
574 | 0 | ucs_hangul_map, N_ucs_hangul_map) != NULL); |
575 | 0 | } |
576 | | |
577 | | wc_bool |
578 | | wc_is_ucs_alpha(wc_uint32 ucs) |
579 | 0 | { |
580 | 0 | return (ucs <= WC_C_UCS2_END && |
581 | 0 | wc_map_range_search((wc_uint16)ucs, |
582 | 0 | ucs_isalpha_map, N_ucs_isalpha_map) != NULL); |
583 | 0 | } |
584 | | |
585 | | wc_bool |
586 | | wc_is_ucs_digit(wc_uint32 ucs) |
587 | 0 | { |
588 | 0 | return (ucs <= WC_C_UCS2_END && |
589 | 0 | wc_map_range_search((wc_uint16)ucs, |
590 | 0 | ucs_isdigit_map, N_ucs_isdigit_map) != NULL); |
591 | 0 | } |
592 | | |
593 | | wc_bool |
594 | | wc_is_ucs_alnum(wc_uint32 ucs) |
595 | 0 | { |
596 | 0 | return (wc_is_ucs_alpha(ucs) || wc_is_ucs_digit(ucs)); |
597 | 0 | } |
598 | | |
599 | | wc_bool |
600 | | wc_is_ucs_lower(wc_uint32 ucs) |
601 | 0 | { |
602 | 0 | return (ucs <= WC_C_UCS2_END && |
603 | 0 | wc_map_range_search((wc_uint16)ucs, |
604 | 0 | ucs_islower_map, N_ucs_islower_map) != NULL); |
605 | 0 | } |
606 | | |
607 | | wc_bool |
608 | | wc_is_ucs_upper(wc_uint32 ucs) |
609 | 0 | { |
610 | 0 | return (ucs <= WC_C_UCS2_END && |
611 | 0 | wc_map_range_search((wc_uint16)ucs, |
612 | 0 | ucs_isupper_map, N_ucs_isupper_map) != NULL); |
613 | 0 | } |
614 | | |
615 | | wc_uint32 |
616 | | wc_ucs_toupper(wc_uint32 ucs) |
617 | 0 | { |
618 | 0 | wc_map *conv = NULL; |
619 | 0 | if (ucs <= WC_C_UCS2_END) |
620 | 0 | conv = wc_map_search((wc_uint16)ucs, |
621 | 0 | ucs_toupper_map, N_ucs_toupper_map); |
622 | 0 | return conv ? (wc_uint32)(conv->code2) : ucs; |
623 | 0 | } |
624 | | |
625 | | wc_uint32 |
626 | | wc_ucs_tolower(wc_uint32 ucs) |
627 | 0 | { |
628 | 0 | wc_map *conv = NULL; |
629 | 0 | if (ucs <= WC_C_UCS2_END) |
630 | 0 | conv = wc_map_search((wc_uint16)ucs, |
631 | 0 | ucs_tolower_map, N_ucs_tolower_map); |
632 | 0 | return conv ? (wc_uint32)(conv->code2) : ucs; |
633 | 0 | } |
634 | | |
635 | | wc_uint32 |
636 | | wc_ucs_totitle(wc_uint32 ucs) |
637 | 0 | { |
638 | 0 | wc_map *conv = NULL; |
639 | 0 | if (ucs <= WC_C_UCS2_END) |
640 | 0 | conv = wc_map_search((wc_uint16)ucs, |
641 | 0 | ucs_totitle_map, N_ucs_totitle_map); |
642 | 0 | return conv ? (wc_uint32)(conv->code2) : ucs; |
643 | 0 | } |
644 | | |
645 | | wc_uint32 |
646 | | wc_ucs_precompose(wc_uint32 ucs1, wc_uint32 ucs2) |
647 | 16.2k | { |
648 | 16.2k | wc_map3 *map; |
649 | | |
650 | 16.2k | if (WcOption.use_combining && |
651 | 16.2k | ucs1 <= WC_C_UCS2_END && ucs2 <= WC_C_UCS2_END && |
652 | 16.2k | (map = wc_map3_search((wc_uint16)ucs1, (wc_uint16)ucs2, |
653 | 12.2k | ucs_precompose_map, N_ucs_precompose_map)) != NULL) |
654 | 5.53k | return map->code3; |
655 | 10.7k | return WC_C_UCS4_ERROR; |
656 | 16.2k | } |
657 | | |
658 | | wc_uint32 |
659 | | wc_ucs_to_fullwidth(wc_uint32 ucs) |
660 | 21.9M | { |
661 | 21.9M | wc_map *map; |
662 | | |
663 | 21.9M | if (ucs <= WC_C_UCS2_END && |
664 | 21.9M | (map = wc_map_search((wc_uint16)ucs, |
665 | 21.9M | ucs_fullwidth_map, N_ucs_fullwidth_map)) != NULL) |
666 | 537k | return map->code2; |
667 | 21.4M | return WC_C_UCS4_ERROR; |
668 | 21.9M | } |
669 | | |
670 | | int |
671 | | wc_ucs_put_tag(char *p) |
672 | 3.07k | { |
673 | 3.07k | int i; |
674 | | |
675 | 3.07k | if (p == NULL || *p == '\0') |
676 | 687 | return 0; |
677 | 194k | for (i = 1; i <= n_tag_map; i++) { |
678 | 194k | if (!strcasecmp(p, tag_map[i])) |
679 | 2.13k | return i; |
680 | 194k | } |
681 | 257 | if (n_tag_map + 1 >= MAX_TAG_MAP) |
682 | 2 | return 0; |
683 | 255 | n_tag_map++; |
684 | 255 | tag_map[n_tag_map] = p; |
685 | 255 | return n_tag_map; |
686 | 257 | } |
687 | | |
688 | | char * |
689 | | wc_ucs_get_tag(int ntag) |
690 | 27.6M | { |
691 | 27.6M | if (ntag <= 0 || ntag > n_tag_map) |
692 | 191 | return NULL; |
693 | 27.6M | return tag_map[ntag]; |
694 | 27.6M | } |
695 | | |
696 | | void |
697 | | wtf_push_ucs(Str os, wc_uint32 ucs, wc_status *st) |
698 | 45.5M | { |
699 | 45.5M | wc_ccs ccs; |
700 | | |
701 | 45.5M | if (ucs >= WC_C_LANGUAGE_TAG0 && ucs <= WC_C_CANCEL_TAG) { |
702 | 44.6k | if (! WcOption.use_language_tag) |
703 | 0 | return; |
704 | 44.6k | if (ucs == WC_C_LANGUAGE_TAG) |
705 | 3.83k | if (st->tag) |
706 | 246 | Strclear(st->tag); |
707 | 3.59k | else |
708 | 3.59k | st->tag = Strnew_size(MAX_TAG_LEN); |
709 | 40.7k | else if (ucs == WC_C_CANCEL_TAG) { |
710 | 665 | if (st->tag) |
711 | 469 | Strfree(st->tag); |
712 | 665 | st->tag = NULL; |
713 | 665 | st->ntag = 0; |
714 | 40.1k | } else if (st->tag && st->tag->length < MAX_TAG_LEN && |
715 | 40.1k | ucs >= WC_C_TAG_SPACE) |
716 | 12.2k | Strcat_char(st->tag, (char)(ucs & 0x7f)); |
717 | 44.6k | return; |
718 | 44.6k | } |
719 | 45.4M | if (st->tag) { |
720 | 3.07k | st->ntag = wc_ucs_put_tag(st->tag->ptr); |
721 | 3.07k | st->tag = NULL; |
722 | 3.07k | } |
723 | 45.4M | if (ucs < 0x80) { |
724 | 45.2M | if (st->ntag) |
725 | 45.0M | wtf_push(os, WC_CCS_UCS_TAG, wc_ucs_to_ucs_tag(ucs, st->ntag)); |
726 | 212k | else |
727 | 212k | Strcat_char(os, (char)ucs); |
728 | 45.2M | } else { |
729 | 209k | ccs = wc_ucs_to_ccs(ucs); |
730 | 209k | if (st->ntag && ucs <= WC_C_UNICODE_END) { |
731 | 197k | ccs = wc_ccs_ucs_to_ccs_ucs_tag(ccs); |
732 | 197k | ucs = wc_ucs_to_ucs_tag(ucs, st->ntag); |
733 | 197k | } |
734 | 209k | wtf_push(os, ccs, ucs); |
735 | 209k | } |
736 | 45.4M | } |
737 | | |
738 | | #endif |