Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | #include "wc.h" |
3 | | #include "johab.h" |
4 | | #include "wtf.h" |
5 | | #ifdef USE_UNICODE |
6 | | #include "ucs.h" |
7 | | #endif |
8 | | |
9 | | #define C0 WC_JOHAB_MAP_C0 |
10 | | #define GL WC_JOHAB_MAP_GL |
11 | | #define C1 WC_JOHAB_MAP_C1 |
12 | | #define GH WC_JOHAB_MAP_GH |
13 | | #define GB WC_JOHAB_MAP_GB |
14 | | #define JJ WC_JOHAB_MAP_JJ |
15 | | #define JB WC_JOHAB_MAP_JB |
16 | | #define HB WC_JOHAB_MAP_HB |
17 | | #define CJ WC_JOHAB_MAP_CJ |
18 | | #define CB WC_JOHAB_MAP_CB |
19 | | |
20 | | /* |
21 | | 00-1F 20-30 31-40 41-7E 7F 80 81-83 84-90 91-D3 D4-D7 D8-DE DF E0-F9 FA-FE FF |
22 | | C0 GL GL GL C0 - - J J - H - H - - |
23 | | - - J B - - J J B B B B B B - |
24 | | |
25 | | C0 GL GH GB C0 C1 CJ JJ JB CB HB CB HB CB C1 |
26 | | */ |
27 | | |
28 | | wc_uint8 WC_JOHAB_MAP[ 0x100 ] = { |
29 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
30 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
31 | | /* 20 */ |
32 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
33 | | /* 30 31 */ |
34 | | GL, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, |
35 | | /* 40 41 */ |
36 | | GH, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, |
37 | | GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, |
38 | | GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, |
39 | | GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, C0, |
40 | | |
41 | | /* 80 83 84 */ |
42 | | C1, CJ, CJ, CJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, |
43 | | /* 90 91 */ |
44 | | JJ, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, |
45 | | JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, |
46 | | JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, |
47 | | JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, |
48 | | /* D3 D4 D7 D8 DF */ |
49 | | JB, JB, JB, JB, CB, CB, CB, CB, HB, HB, HB, HB, HB, HB, HB, CB, |
50 | | HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, |
51 | | /* F9 FA FE FF */ |
52 | | HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, CB, CB, CB, CB, CB, C1, |
53 | | }; |
54 | | |
55 | | static wc_uint8 johab1_N_map[ 3 ][ 32 ] = { |
56 | | { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, |
57 | | 15,16,17,18,19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
58 | | { 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9,10,11, |
59 | | 0, 0,12,13,14,15,16,17, 0, 0,18,19,20,21, 0, 0 }, |
60 | | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, |
61 | | 16,17, 0,18,19,20,21,22,23,24,25,26,27,28, 0, 0 } |
62 | | }; |
63 | | |
64 | | static wc_uint8 N_johab1_map[ 3 ][ 32 ] = { |
65 | | { 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17, |
66 | | 18,19,20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
67 | | { 3, 4, 5, 6, 7,10,11,12,13,14,15,18,19,20,21,22, |
68 | | 23,26,27,28,29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
69 | | { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, |
70 | | 17,19,20,21,22,23,24,25,26,27,28,29, 0, 0, 0, 0 } |
71 | | }; |
72 | | |
73 | | wc_wchar_t |
74 | | wc_johab_to_ksx1001(wc_wchar_t cc) |
75 | 432k | { |
76 | 432k | #ifdef USE_UNICODE |
77 | 432k | static wc_table *t = NULL; |
78 | 432k | #endif |
79 | | |
80 | 432k | switch (cc.ccs) { |
81 | 216k | case WC_CCS_JOHAB: |
82 | 216k | return wc_johab_to_ksx1001(wc_johab_to_cs128w(cc)); |
83 | 193k | case WC_CCS_JOHAB_1: |
84 | 202k | case WC_CCS_JOHAB_2: |
85 | 202k | #ifdef USE_UNICODE |
86 | 202k | if (WcOption.ucs_conv) { |
87 | 202k | if (t == NULL) |
88 | 1 | t = wc_get_ucs_table(WC_CCS_KS_X_1001); |
89 | 202k | cc = wc_any_to_any(cc, t); |
90 | 202k | } else |
91 | 0 | #endif |
92 | 0 | cc.ccs = WC_CCS_UNKNOWN_W; |
93 | 202k | break; |
94 | 14.0k | case WC_CCS_JOHAB_3: |
95 | 14.0k | if (cc.code >= 0x2121) |
96 | 13.5k | cc.ccs = WC_CCS_KS_X_1001; |
97 | 495 | else |
98 | 495 | cc.ccs = WC_CCS_UNKNOWN_W; |
99 | 14.0k | break; |
100 | 432k | } |
101 | 216k | return cc; |
102 | 432k | } |
103 | | |
104 | | wc_wchar_t |
105 | | wc_ksx1001_to_johab(wc_wchar_t cc) |
106 | 2.67k | { |
107 | 2.67k | cc.code &= 0x7f7f; |
108 | 2.67k | if ((cc.code >= 0x2121 && cc.code < 0x2421) || |
109 | 2.67k | (cc.code > 0x2453 && cc.code <= 0x2C7E) || |
110 | 2.67k | (cc.code >= 0x4A21 && cc.code <= 0x7D7E)) { |
111 | 1.58k | cc.ccs = WC_CCS_JOHAB_3; |
112 | 1.58k | return cc; |
113 | 1.58k | } |
114 | 1.09k | #ifdef USE_UNICODE |
115 | 1.09k | if (WcOption.ucs_conv) |
116 | 1.09k | cc = wc_ucs_to_johab(wc_any_to_ucs(cc)); |
117 | 0 | else |
118 | 0 | #endif |
119 | 0 | cc.ccs = WC_CCS_UNKNOWN_W; |
120 | 1.09k | return cc; |
121 | 2.67k | } |
122 | | |
123 | | #ifdef USE_UNICODE |
124 | | wc_wchar_t |
125 | | wc_ucs_to_johab(wc_uint32 ucs) |
126 | 6.45M | { |
127 | 6.45M | wc_table *t; |
128 | 6.45M | wc_wchar_t cc; |
129 | | |
130 | 6.45M | if (ucs >= WC_C_UCS2_HANGUL && ucs <= WC_C_UCS2_HANGUL_END) { |
131 | 1.53k | ucs -= WC_C_UCS2_HANGUL; |
132 | 1.53k | cc.code = WC_N_JOHAB1(ucs); |
133 | 1.53k | cc.ccs = WC_CCS_JOHAB; |
134 | 6.45M | } else if (ucs >= 0x3131 && ucs <= 0x3163) { |
135 | 209 | t = wc_get_ucs_table(WC_CCS_JOHAB_2); |
136 | 209 | cc = wc_ucs_to_any(ucs, t); |
137 | 6.45M | } else { |
138 | 6.45M | t = wc_get_ucs_table(WC_CCS_JOHAB_3); |
139 | 6.45M | cc = wc_ucs_to_any(ucs, t); |
140 | 6.45M | } |
141 | 6.45M | return cc; |
142 | 6.45M | } |
143 | | #endif |
144 | | |
145 | | wc_uint32 |
146 | | wc_johab1_to_N(wc_uint32 code) |
147 | 895k | { |
148 | 895k | wc_uint32 a, b, c; |
149 | | |
150 | 895k | a = johab1_N_map[0][(code >> 10) & 0x1F]; |
151 | 895k | b = johab1_N_map[1][(code >> 5) & 0x1F]; |
152 | 895k | c = johab1_N_map[2][ code & 0x1F]; |
153 | 895k | if (a && b && c) |
154 | 849k | return ((a - 1) * 21 + (b - 1)) * 28 + (c - 1); |
155 | 46.0k | return WC_C_JOHAB_ERROR; |
156 | 895k | } |
157 | | |
158 | | wc_uint32 |
159 | | wc_N_to_johab1(wc_uint32 code) |
160 | 352k | { |
161 | 352k | wc_uint32 a, b, c; |
162 | | |
163 | 352k | a = N_johab1_map[0][(code / 28) / 21 & 0x1F]; |
164 | 352k | b = N_johab1_map[1][(code / 28) % 21 & 0x1F]; |
165 | 352k | c = N_johab1_map[2][ code % 28 & 0x1F]; |
166 | 352k | return 0x8000 | (a << 10) | (b << 5) | c; |
167 | 352k | } |
168 | | |
169 | | /* 0x1F21 - 0x2C7E, 0x4A21 - 0x7C7E |
170 | | (0x1F21 - 0x207E are not in KS X 1001) */ |
171 | 48.9k | #define johab3_to_ksx1001(ub, lb) \ |
172 | 48.9k | { \ |
173 | 48.9k | if (ub < 0xe0) { \ |
174 | 10.3k | ub = ((ub - 0xd8) << 1) + 0x1f; \ |
175 | 38.5k | } else { \ |
176 | 38.5k | ub = ((ub - 0xe0) << 1) + 0x4a; \ |
177 | 38.5k | } \ |
178 | 48.9k | if (lb < 0xa1) { \ |
179 | 17.7k | lb -= (lb < 0x91) ? 0x10 : 0x22; \ |
180 | 31.1k | } else { \ |
181 | 31.1k | ub++; \ |
182 | 31.1k | lb -= 0x80; \ |
183 | 31.1k | } \ |
184 | 48.9k | } |
185 | | |
186 | 3.26M | #define ksx1001_to_johab3(ub, lb) \ |
187 | 3.26M | { \ |
188 | 3.26M | if (ub < 0x4a) { \ |
189 | 3.24M | ub -= 0x1f; \ |
190 | 3.24M | lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \ |
191 | 3.24M | ub = (ub >> 1) + 0xd8; \ |
192 | 3.24M | } else { \ |
193 | 22.1k | ub -= 0x4a; \ |
194 | 22.1k | lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \ |
195 | 22.1k | ub = (ub >> 1) + 0xe0; \ |
196 | 22.1k | } \ |
197 | 3.26M | } |
198 | | |
199 | | wc_wchar_t |
200 | | wc_johab_to_cs128w(wc_wchar_t cc) |
201 | 944k | { |
202 | 944k | wc_uint32 n; |
203 | 944k | wc_uchar ub, lb; |
204 | | |
205 | 944k | if (cc.code < 0xD800) { |
206 | 895k | n = WC_JOHAB1_N(cc.code); |
207 | 895k | if (n != WC_C_JOHAB_ERROR) { |
208 | 849k | cc.code = WC_N_CS94x128(n); |
209 | 849k | cc.ccs = WC_CCS_JOHAB_1; |
210 | 849k | } else { |
211 | 46.0k | n = WC_JOHAB2_N(cc.code); |
212 | 46.0k | cc.code = WC_N_CS128W(n); |
213 | 46.0k | cc.ccs = WC_CCS_JOHAB_2; |
214 | 46.0k | } |
215 | 895k | } else { |
216 | 48.9k | ub = cc.code >> 8; |
217 | 48.9k | lb = cc.code & 0xff; |
218 | 48.9k | johab3_to_ksx1001(ub, lb); |
219 | 48.9k | cc.code = ((wc_uint32)ub << 8) | lb; |
220 | 48.9k | cc.ccs = WC_CCS_JOHAB_3; |
221 | 48.9k | } |
222 | 944k | return cc; |
223 | 944k | } |
224 | | |
225 | | wc_wchar_t |
226 | | wc_cs128w_to_johab(wc_wchar_t cc) |
227 | 3.63M | { |
228 | 3.63M | wc_uint32 n; |
229 | 3.63M | wc_uchar ub, lb; |
230 | | |
231 | 3.63M | switch (cc.ccs) { |
232 | 350k | case WC_CCS_JOHAB_1: |
233 | 350k | n = WC_CS94x128_N(cc.code); |
234 | 350k | cc.code = WC_N_JOHAB1(n); |
235 | 350k | break; |
236 | 18.6k | case WC_CCS_JOHAB_2: |
237 | 18.6k | n = WC_CS128W_N(cc.code); |
238 | 18.6k | cc.code = WC_N_JOHAB2(n); |
239 | 18.6k | break; |
240 | 3.26M | case WC_CCS_JOHAB_3: |
241 | 3.26M | ub = (cc.code >> 8) & 0x7f; |
242 | 3.26M | lb = cc.code & 0x7f; |
243 | 3.26M | ksx1001_to_johab3(ub, lb); |
244 | 3.26M | cc.code = ((wc_uint32)ub << 8) | lb; |
245 | 3.63M | } |
246 | 3.63M | cc.ccs = WC_CCS_JOHAB; |
247 | 3.63M | return cc; |
248 | 3.63M | } |
249 | | |
250 | | Str |
251 | | wc_conv_from_johab(Str is, wc_ces ces) |
252 | 344 | { |
253 | 344 | Str os; |
254 | 344 | wc_uchar *sp = (wc_uchar *)is->ptr; |
255 | 344 | wc_uchar *ep = sp + is->length; |
256 | 344 | wc_uchar *p; |
257 | 344 | int state = WC_JOHAB_NOSTATE; |
258 | | |
259 | 612 | for (p = sp; p < ep && *p < 0x80; p++) |
260 | 268 | ; |
261 | 344 | if (p == ep) |
262 | 26 | return is; |
263 | 318 | os = Strnew_size(is->length); |
264 | 318 | if (p > sp) |
265 | 11 | Strcat_charp_n(os, is->ptr, (int)(p - sp)); |
266 | | |
267 | 1.18M | for (; p < ep; p++) { |
268 | 1.18M | switch (state) { |
269 | 756k | case WC_JOHAB_NOSTATE: |
270 | 756k | switch (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_1) { |
271 | 402k | case WC_JOHAB_MAP_UJ: |
272 | 402k | state = WC_JOHAB_HANGUL1; |
273 | 402k | break; |
274 | 30.5k | case WC_JOHAB_MAP_UH: |
275 | 30.5k | state = WC_JOHAB_HANJA1; |
276 | 30.5k | break; |
277 | 98.3k | case WC_JOHAB_MAP_C1: |
278 | 98.3k | wtf_push_unknown(os, p, 1); |
279 | 98.3k | break; |
280 | 224k | default: |
281 | 224k | Strcat_char(os, (char)*p); |
282 | 224k | break; |
283 | 756k | } |
284 | 756k | break; |
285 | 756k | case WC_JOHAB_HANGUL1: |
286 | 402k | if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LJ) |
287 | 368k | wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p); |
288 | 33.6k | else |
289 | 33.6k | wtf_push_unknown(os, p-1, 2); |
290 | 402k | state = WC_JOHAB_NOSTATE; |
291 | 402k | break; |
292 | 30.5k | case WC_JOHAB_HANJA1: |
293 | 30.5k | if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LH) |
294 | 24.2k | wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p); |
295 | 6.30k | else |
296 | 6.30k | wtf_push_unknown(os, p-1, 2); |
297 | 30.5k | state = WC_JOHAB_NOSTATE; |
298 | 30.5k | break; |
299 | 1.18M | } |
300 | 1.18M | } |
301 | 318 | switch (state) { |
302 | 14 | case WC_JOHAB_HANGUL1: |
303 | 25 | case WC_JOHAB_HANJA1: |
304 | 25 | wtf_push_unknown(os, p-1, 1); |
305 | 25 | break; |
306 | 318 | } |
307 | 318 | return os; |
308 | 318 | } |
309 | | |
310 | | void |
311 | | wc_push_to_johab(Str os, wc_wchar_t cc, wc_status *st) |
312 | 8.75M | { |
313 | 17.2M | while (1) { |
314 | 17.2M | switch (cc.ccs) { |
315 | 2.02M | case WC_CCS_US_ASCII: |
316 | 2.02M | Strcat_char(os, (char)cc.code); |
317 | 2.02M | return; |
318 | 0 | case WC_CCS_JOHAB_1: |
319 | 209 | case WC_CCS_JOHAB_2: |
320 | 3.24M | case WC_CCS_JOHAB_3: |
321 | 3.24M | cc = wc_cs128w_to_johab(cc); |
322 | 3.24M | case WC_CCS_JOHAB: |
323 | 3.24M | Strcat_char(os, (char)(cc.code >> 8)); |
324 | 3.24M | Strcat_char(os, (char)(cc.code & 0xff)); |
325 | 3.24M | return; |
326 | 2.67k | case WC_CCS_KS_X_1001: |
327 | 2.67k | cc = wc_ksx1001_to_johab(cc); |
328 | 2.67k | continue; |
329 | 3.20M | case WC_CCS_UNKNOWN_W: |
330 | 3.20M | if (!WcOption.no_replace) |
331 | 3.20M | Strcat_charp(os, WC_REPLACE_W); |
332 | 3.20M | return; |
333 | 283k | case WC_CCS_UNKNOWN: |
334 | 283k | if (!WcOption.no_replace) |
335 | 283k | Strcat_charp(os, WC_REPLACE); |
336 | 283k | return; |
337 | 8.48M | default: |
338 | 8.48M | #ifdef USE_UNICODE |
339 | 8.48M | if (WcOption.ucs_conv) |
340 | 8.48M | cc = wc_any_to_any_ces(cc, st); |
341 | 0 | else |
342 | 0 | #endif |
343 | 0 | cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
344 | 8.48M | continue; |
345 | 17.2M | } |
346 | 17.2M | } |
347 | 8.75M | } |
348 | | |
349 | | Str |
350 | | wc_char_conv_from_johab(wc_uchar c, wc_status *st) |
351 | 0 | { |
352 | 0 | static Str os; |
353 | 0 | static wc_uchar johabu; |
354 | |
|
355 | 0 | if (st->state == -1) { |
356 | 0 | st->state = WC_JOHAB_NOSTATE; |
357 | 0 | os = Strnew_size(8); |
358 | 0 | } |
359 | |
|
360 | 0 | switch (st->state) { |
361 | 0 | case WC_JOHAB_NOSTATE: |
362 | 0 | switch (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_1) { |
363 | 0 | case WC_JOHAB_MAP_UJ: |
364 | 0 | johabu = c; |
365 | 0 | st->state = WC_JOHAB_HANGUL1; |
366 | 0 | return NULL; |
367 | 0 | case WC_JOHAB_MAP_UH: |
368 | 0 | johabu = c; |
369 | 0 | st->state = WC_JOHAB_HANJA1; |
370 | 0 | return NULL; |
371 | 0 | case WC_JOHAB_MAP_C1: |
372 | 0 | break; |
373 | 0 | default: |
374 | 0 | Strcat_char(os, (char)c); |
375 | 0 | break; |
376 | 0 | } |
377 | 0 | break; |
378 | 0 | case WC_JOHAB_HANGUL1: |
379 | 0 | if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LJ) |
380 | 0 | wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c); |
381 | 0 | break; |
382 | 0 | case WC_JOHAB_HANJA1: |
383 | 0 | if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LH) |
384 | 0 | wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c); |
385 | 0 | break; |
386 | 0 | } |
387 | 0 | st->state = -1; |
388 | 0 | return os; |
389 | 0 | } |