Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "johab.h" |
4 | | #include "wtf.h" |
5 | | #ifdef USE_UNICODE |
6 | | #include "ucs.h" |
7 | | #endif |
8 | | |
9 | | #define C0 WC_JOHAB_MAP_C0 |
10 | | #define GL WC_JOHAB_MAP_GL |
11 | | #define C1 WC_JOHAB_MAP_C1 |
12 | | #define GH WC_JOHAB_MAP_GH |
13 | | #define GB WC_JOHAB_MAP_GB |
14 | | #define JJ WC_JOHAB_MAP_JJ |
15 | | #define JB WC_JOHAB_MAP_JB |
16 | | #define HB WC_JOHAB_MAP_HB |
17 | | #define CJ WC_JOHAB_MAP_CJ |
18 | | #define CB WC_JOHAB_MAP_CB |
19 | | |
20 | | /* |
21 | | 00-1F 20-30 31-40 41-7E 7F 80 81-83 84-90 91-D3 D4-D7 D8-DE DF E0-F9 FA-FE FF |
22 | | C0 GL GL GL C0 - - J J - H - H - - |
23 | | - - J B - - J J B B B B B B - |
24 | | |
25 | | C0 GL GH GB C0 C1 CJ JJ JB CB HB CB HB CB C1 |
26 | | */ |
27 | | |
28 | | wc_uint8 WC_JOHAB_MAP[ 0x100 ] = { |
29 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
30 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
31 | | /* 20 */ |
32 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
33 | | /* 30 31 */ |
34 | | GL, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, GH, |
35 | | /* 40 41 */ |
36 | | GH, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, |
37 | | GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, |
38 | | GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, |
39 | | GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, GB, C0, |
40 | | |
41 | | /* 80 83 84 */ |
42 | | C1, CJ, CJ, CJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, JJ, |
43 | | /* 90 91 */ |
44 | | JJ, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, |
45 | | JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, |
46 | | JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, |
47 | | JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, JB, |
48 | | /* D3 D4 D7 D8 DF */ |
49 | | JB, JB, JB, JB, CB, CB, CB, CB, HB, HB, HB, HB, HB, HB, HB, CB, |
50 | | HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, |
51 | | /* F9 FA FE FF */ |
52 | | HB, HB, HB, HB, HB, HB, HB, HB, HB, HB, CB, CB, CB, CB, CB, C1, |
53 | | }; |
54 | | |
55 | | static wc_uint8 johab1_N_map[ 3 ][ 32 ] = { |
56 | | { 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14, |
57 | | 15,16,17,18,19, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
58 | | { 0, 0, 0, 1, 2, 3, 4, 5, 0, 0, 6, 7, 8, 9,10,11, |
59 | | 0, 0,12,13,14,15,16,17, 0, 0,18,19,20,21, 0, 0 }, |
60 | | { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15, |
61 | | 16,17, 0,18,19,20,21,22,23,24,25,26,27,28, 0, 0 } |
62 | | }; |
63 | | |
64 | | static wc_uint8 N_johab1_map[ 3 ][ 32 ] = { |
65 | | { 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16,17, |
66 | | 18,19,20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
67 | | { 3, 4, 5, 6, 7,10,11,12,13,14,15,18,19,20,21,22, |
68 | | 23,26,27,28,29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, |
69 | | { 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15,16, |
70 | | 17,19,20,21,22,23,24,25,26,27,28,29, 0, 0, 0, 0 } |
71 | | }; |
72 | | |
73 | | wc_wchar_t |
74 | | wc_johab_to_ksx1001(wc_wchar_t cc) |
75 | 3.65k | { |
76 | 3.65k | #ifdef USE_UNICODE |
77 | 3.65k | static wc_table *t = NULL; |
78 | 3.65k | #endif |
79 | | |
80 | 3.65k | switch (cc.ccs) { |
81 | 1.82k | case WC_CCS_JOHAB: |
82 | 1.82k | return wc_johab_to_ksx1001(wc_johab_to_cs128w(cc)); |
83 | 758 | case WC_CCS_JOHAB_1: |
84 | 1.33k | case WC_CCS_JOHAB_2: |
85 | 1.33k | #ifdef USE_UNICODE |
86 | 1.33k | if (WcOption.ucs_conv) { |
87 | 1.33k | if (t == NULL) |
88 | 1 | t = wc_get_ucs_table(WC_CCS_KS_X_1001); |
89 | 1.33k | cc = wc_any_to_any(cc, t); |
90 | 1.33k | } else |
91 | 0 | #endif |
92 | 0 | cc.ccs = WC_CCS_UNKNOWN_W; |
93 | 1.33k | break; |
94 | 496 | case WC_CCS_JOHAB_3: |
95 | 496 | if (cc.code >= 0x2121) |
96 | 210 | cc.ccs = WC_CCS_KS_X_1001; |
97 | 286 | else |
98 | 286 | cc.ccs = WC_CCS_UNKNOWN_W; |
99 | 496 | break; |
100 | 3.65k | } |
101 | 1.82k | return cc; |
102 | 3.65k | } |
103 | | |
104 | | wc_wchar_t |
105 | | wc_ksx1001_to_johab(wc_wchar_t cc) |
106 | 23.6k | { |
107 | 23.6k | cc.code &= 0x7f7f; |
108 | 23.6k | if ((cc.code >= 0x2121 && cc.code < 0x2421) || |
109 | 20.8k | (cc.code > 0x2453 && cc.code <= 0x2C7E) || |
110 | 20.0k | (cc.code >= 0x4A21 && cc.code <= 0x7D7E)) { |
111 | 20.0k | cc.ccs = WC_CCS_JOHAB_3; |
112 | 20.0k | return cc; |
113 | 20.0k | } |
114 | 3.67k | #ifdef USE_UNICODE |
115 | 3.67k | if (WcOption.ucs_conv) |
116 | 3.67k | cc = wc_ucs_to_johab(wc_any_to_ucs(cc)); |
117 | 0 | else |
118 | 0 | #endif |
119 | 0 | cc.ccs = WC_CCS_UNKNOWN_W; |
120 | 3.67k | return cc; |
121 | 23.6k | } |
122 | | |
123 | | #ifdef USE_UNICODE |
124 | | wc_wchar_t |
125 | | wc_ucs_to_johab(wc_uint32 ucs) |
126 | 168k | { |
127 | 168k | wc_table *t; |
128 | 168k | wc_wchar_t cc; |
129 | | |
130 | 168k | if (ucs >= WC_C_UCS2_HANGUL && ucs <= WC_C_UCS2_HANGUL_END) { |
131 | 5.04k | ucs -= WC_C_UCS2_HANGUL; |
132 | 5.04k | cc.code = WC_N_JOHAB1(ucs); |
133 | 5.04k | cc.ccs = WC_CCS_JOHAB; |
134 | 163k | } else if (ucs >= 0x3131 && ucs <= 0x3163) { |
135 | 221 | t = wc_get_ucs_table(WC_CCS_JOHAB_2); |
136 | 221 | cc = wc_ucs_to_any(ucs, t); |
137 | 162k | } else { |
138 | 162k | t = wc_get_ucs_table(WC_CCS_JOHAB_3); |
139 | 162k | cc = wc_ucs_to_any(ucs, t); |
140 | 162k | } |
141 | 168k | return cc; |
142 | 168k | } |
143 | | #endif |
144 | | |
145 | | wc_uint32 |
146 | | wc_johab1_to_N(wc_uint32 code) |
147 | 491k | { |
148 | 491k | wc_uint32 a, b, c; |
149 | | |
150 | 491k | a = johab1_N_map[0][(code >> 10) & 0x1F]; |
151 | 491k | b = johab1_N_map[1][(code >> 5) & 0x1F]; |
152 | 491k | c = johab1_N_map[2][ code & 0x1F]; |
153 | 491k | if (a && b && c) |
154 | 424k | return ((a - 1) * 21 + (b - 1)) * 28 + (c - 1); |
155 | 67.7k | return WC_C_JOHAB_ERROR; |
156 | 491k | } |
157 | | |
158 | | wc_uint32 |
159 | | wc_N_to_johab1(wc_uint32 code) |
160 | 217k | { |
161 | 217k | wc_uint32 a, b, c; |
162 | | |
163 | 217k | a = N_johab1_map[0][(code / 28) / 21 & 0x1F]; |
164 | 217k | b = N_johab1_map[1][(code / 28) % 21 & 0x1F]; |
165 | 217k | c = N_johab1_map[2][ code % 28 & 0x1F]; |
166 | 217k | return 0x8000 | (a << 10) | (b << 5) | c; |
167 | 217k | } |
168 | | |
169 | | /* 0x1F21 - 0x2C7E, 0x4A21 - 0x7C7E |
170 | | (0x1F21 - 0x207E are not in KS X 1001) */ |
171 | 6.66k | #define johab3_to_ksx1001(ub, lb) \ |
172 | 6.66k | { \ |
173 | 6.66k | if (ub < 0xe0) { \ |
174 | 3.60k | ub = ((ub - 0xd8) << 1) + 0x1f; \ |
175 | 3.60k | } else { \ |
176 | 3.06k | ub = ((ub - 0xe0) << 1) + 0x4a; \ |
177 | 3.06k | } \ |
178 | 6.66k | if (lb < 0xa1) { \ |
179 | 1.51k | lb -= (lb < 0x91) ? 0x10 : 0x22; \ |
180 | 5.15k | } else { \ |
181 | 5.15k | ub++; \ |
182 | 5.15k | lb -= 0x80; \ |
183 | 5.15k | } \ |
184 | 6.66k | } |
185 | | |
186 | 141k | #define ksx1001_to_johab3(ub, lb) \ |
187 | 141k | { \ |
188 | 141k | if (ub < 0x4a) { \ |
189 | 126k | ub -= 0x1f; \ |
190 | 126k | lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \ |
191 | 126k | ub = (ub >> 1) + 0xd8; \ |
192 | 126k | } else { \ |
193 | 15.3k | ub -= 0x4a; \ |
194 | 15.3k | lb += (ub & 0x1) ? 0x80 : ((lb < 0x6f) ? 0x10 : 0x22); \ |
195 | 15.3k | ub = (ub >> 1) + 0xe0; \ |
196 | 15.3k | } \ |
197 | 141k | } |
198 | | |
199 | | wc_wchar_t |
200 | | wc_johab_to_cs128w(wc_wchar_t cc) |
201 | 498k | { |
202 | 498k | wc_uint32 n; |
203 | 498k | wc_uchar ub, lb; |
204 | | |
205 | 498k | if (cc.code < 0xD800) { |
206 | 491k | n = WC_JOHAB1_N(cc.code); |
207 | 491k | if (n != WC_C_JOHAB_ERROR) { |
208 | 424k | cc.code = WC_N_CS94x128(n); |
209 | 424k | cc.ccs = WC_CCS_JOHAB_1; |
210 | 424k | } else { |
211 | 67.7k | n = WC_JOHAB2_N(cc.code); |
212 | 67.7k | cc.code = WC_N_CS128W(n); |
213 | 67.7k | cc.ccs = WC_CCS_JOHAB_2; |
214 | 67.7k | } |
215 | 491k | } else { |
216 | 6.66k | ub = cc.code >> 8; |
217 | 6.66k | lb = cc.code & 0xff; |
218 | 6.66k | johab3_to_ksx1001(ub, lb); |
219 | 6.66k | cc.code = ((wc_uint32)ub << 8) | lb; |
220 | 6.66k | cc.ccs = WC_CCS_JOHAB_3; |
221 | 6.66k | } |
222 | 498k | return cc; |
223 | 498k | } |
224 | | |
225 | | wc_wchar_t |
226 | | wc_cs128w_to_johab(wc_wchar_t cc) |
227 | 387k | { |
228 | 387k | wc_uint32 n; |
229 | 387k | wc_uchar ub, lb; |
230 | | |
231 | 387k | switch (cc.ccs) { |
232 | 212k | case WC_CCS_JOHAB_1: |
233 | 212k | n = WC_CS94x128_N(cc.code); |
234 | 212k | cc.code = WC_N_JOHAB1(n); |
235 | 212k | break; |
236 | 33.6k | case WC_CCS_JOHAB_2: |
237 | 33.6k | n = WC_CS128W_N(cc.code); |
238 | 33.6k | cc.code = WC_N_JOHAB2(n); |
239 | 33.6k | break; |
240 | 141k | case WC_CCS_JOHAB_3: |
241 | 141k | ub = (cc.code >> 8) & 0x7f; |
242 | 141k | lb = cc.code & 0x7f; |
243 | 141k | ksx1001_to_johab3(ub, lb); |
244 | 141k | cc.code = ((wc_uint32)ub << 8) | lb; |
245 | 387k | } |
246 | 387k | cc.ccs = WC_CCS_JOHAB; |
247 | 387k | return cc; |
248 | 387k | } |
249 | | |
250 | | Str |
251 | | wc_conv_from_johab(Str is, wc_ces ces) |
252 | 336 | { |
253 | 336 | Str os; |
254 | 336 | wc_uchar *sp = (wc_uchar *)is->ptr; |
255 | 336 | wc_uchar *ep = sp + is->length; |
256 | 336 | wc_uchar *p; |
257 | 336 | int state = WC_JOHAB_NOSTATE; |
258 | | |
259 | 547 | for (p = sp; p < ep && *p < 0x80; p++) |
260 | 211 | ; |
261 | 336 | if (p == ep) |
262 | 24 | return is; |
263 | 312 | os = Strnew_size(is->length); |
264 | 312 | if (p > sp) |
265 | 11 | Strcat_charp_n(os, is->ptr, (int)(p - sp)); |
266 | | |
267 | 564k | for (; p < ep; p++) { |
268 | 563k | switch (state) { |
269 | 311k | case WC_JOHAB_NOSTATE: |
270 | 311k | switch (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_1) { |
271 | 248k | case WC_JOHAB_MAP_UJ: |
272 | 248k | state = WC_JOHAB_HANGUL1; |
273 | 248k | break; |
274 | 3.82k | case WC_JOHAB_MAP_UH: |
275 | 3.82k | state = WC_JOHAB_HANJA1; |
276 | 3.82k | break; |
277 | 13.4k | case WC_JOHAB_MAP_C1: |
278 | 13.4k | wtf_push_unknown(os, p, 1); |
279 | 13.4k | break; |
280 | 46.0k | default: |
281 | 46.0k | Strcat_char(os, (char)*p); |
282 | 46.0k | break; |
283 | 311k | } |
284 | 311k | break; |
285 | 311k | case WC_JOHAB_HANGUL1: |
286 | 248k | if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LJ) |
287 | 245k | wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p); |
288 | 2.88k | else |
289 | 2.88k | wtf_push_unknown(os, p-1, 2); |
290 | 248k | state = WC_JOHAB_NOSTATE; |
291 | 248k | break; |
292 | 3.81k | case WC_JOHAB_HANJA1: |
293 | 3.81k | if (WC_JOHAB_MAP[*p] & WC_JOHAB_MAP_LH) |
294 | 3.18k | wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)*(p-1) << 8) | *p); |
295 | 631 | else |
296 | 631 | wtf_push_unknown(os, p-1, 2); |
297 | 3.81k | state = WC_JOHAB_NOSTATE; |
298 | 3.81k | break; |
299 | 563k | } |
300 | 563k | } |
301 | 312 | switch (state) { |
302 | 13 | case WC_JOHAB_HANGUL1: |
303 | 20 | case WC_JOHAB_HANJA1: |
304 | 20 | wtf_push_unknown(os, p-1, 1); |
305 | 20 | break; |
306 | 312 | } |
307 | 312 | return os; |
308 | 312 | } |
309 | | |
310 | | void |
311 | | wc_push_to_johab(Str os, wc_wchar_t cc, wc_status *st) |
312 | 1.83M | { |
313 | 3.52M | while (1) { |
314 | 3.52M | switch (cc.ccs) { |
315 | 1.49M | case WC_CCS_US_ASCII: |
316 | 1.49M | Strcat_char(os, (char)cc.code); |
317 | 1.49M | return; |
318 | 0 | case WC_CCS_JOHAB_1: |
319 | 221 | case WC_CCS_JOHAB_2: |
320 | 138k | case WC_CCS_JOHAB_3: |
321 | 138k | cc = wc_cs128w_to_johab(cc); |
322 | 143k | case WC_CCS_JOHAB: |
323 | 143k | Strcat_char(os, (char)(cc.code >> 8)); |
324 | 143k | Strcat_char(os, (char)(cc.code & 0xff)); |
325 | 143k | return; |
326 | 23.6k | case WC_CCS_KS_X_1001: |
327 | 23.6k | cc = wc_ksx1001_to_johab(cc); |
328 | 23.6k | continue; |
329 | 33.1k | case WC_CCS_UNKNOWN_W: |
330 | 33.1k | if (!WcOption.no_replace) |
331 | 33.1k | Strcat_charp(os, WC_REPLACE_W); |
332 | 33.1k | return; |
333 | 166k | case WC_CCS_UNKNOWN: |
334 | 166k | if (!WcOption.no_replace) |
335 | 166k | Strcat_charp(os, WC_REPLACE); |
336 | 166k | return; |
337 | 1.66M | default: |
338 | 1.66M | #ifdef USE_UNICODE |
339 | 1.66M | if (WcOption.ucs_conv) |
340 | 1.66M | cc = wc_any_to_any_ces(cc, st); |
341 | 0 | else |
342 | 0 | #endif |
343 | 0 | cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
344 | 1.66M | continue; |
345 | 3.52M | } |
346 | 3.52M | } |
347 | 1.83M | } |
348 | | |
349 | | Str |
350 | | wc_char_conv_from_johab(wc_uchar c, wc_status *st) |
351 | 0 | { |
352 | 0 | static Str os; |
353 | 0 | static wc_uchar johabu; |
354 | |
|
355 | 0 | if (st->state == -1) { |
356 | 0 | st->state = WC_JOHAB_NOSTATE; |
357 | 0 | os = Strnew_size(8); |
358 | 0 | } |
359 | |
|
360 | 0 | switch (st->state) { |
361 | 0 | case WC_JOHAB_NOSTATE: |
362 | 0 | switch (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_1) { |
363 | 0 | case WC_JOHAB_MAP_UJ: |
364 | 0 | johabu = c; |
365 | 0 | st->state = WC_JOHAB_HANGUL1; |
366 | 0 | return NULL; |
367 | 0 | case WC_JOHAB_MAP_UH: |
368 | 0 | johabu = c; |
369 | 0 | st->state = WC_JOHAB_HANJA1; |
370 | 0 | return NULL; |
371 | 0 | case WC_JOHAB_MAP_C1: |
372 | 0 | break; |
373 | 0 | default: |
374 | 0 | Strcat_char(os, (char)c); |
375 | 0 | break; |
376 | 0 | } |
377 | 0 | break; |
378 | 0 | case WC_JOHAB_HANGUL1: |
379 | 0 | if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LJ) |
380 | 0 | wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c); |
381 | 0 | break; |
382 | 0 | case WC_JOHAB_HANJA1: |
383 | 0 | if (WC_JOHAB_MAP[c] & WC_JOHAB_MAP_LH) |
384 | 0 | wtf_push(os, WC_CCS_JOHAB, ((wc_uint32)johabu << 8) | c); |
385 | 0 | break; |
386 | 0 | } |
387 | 0 | st->state = -1; |
388 | 0 | return os; |
389 | 0 | } |