Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "uhc.h" |
4 | | #include "wtf.h" |
5 | | #ifdef USE_UNICODE |
6 | | #include "ucs.h" |
7 | | #endif |
8 | | |
9 | | #define C0 WC_UHC_MAP_C0 |
10 | | #define GL WC_UHC_MAP_GL |
11 | 206k | #define C1 WC_UHC_MAP_C1 |
12 | 2.53M | #define LB WC_UHC_MAP_LB |
13 | 2.53M | #define UB WC_UHC_MAP_UB |
14 | | |
15 | | wc_uint8 WC_UHC_MAP[ 0x100 ] = { |
16 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
17 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
18 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
19 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
20 | | GL, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
21 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, GL, GL, GL, GL, GL, |
22 | | GL, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
23 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, GL, GL, GL, GL, C0, |
24 | | |
25 | | C1, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
26 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
27 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
28 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
29 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
30 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
31 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
32 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1, |
33 | | }; |
34 | | |
35 | | wc_wchar_t |
36 | | wc_uhc_to_cs128w(wc_wchar_t cc) |
37 | 2.91M | { |
38 | 2.91M | cc.code = WC_UHC_N(cc.code); |
39 | 2.91M | if (cc.code < 0x4000) |
40 | 2.88M | cc.ccs = WC_CCS_UHC_1; |
41 | 35.5k | else { |
42 | 35.5k | cc.ccs = WC_CCS_UHC_2; |
43 | 35.5k | cc.code -= 0x4000; |
44 | 35.5k | } |
45 | 2.91M | cc.code = WC_N_CS128W(cc.code); |
46 | 2.91M | return cc; |
47 | 2.91M | } |
48 | | |
49 | | wc_wchar_t |
50 | | wc_cs128w_to_uhc(wc_wchar_t cc) |
51 | 2.91M | { |
52 | 2.91M | cc.code = WC_CS128W_N(cc.code); |
53 | 2.91M | if (cc.ccs == WC_CCS_UHC_2) |
54 | 35.5k | cc.code += 0x4000; |
55 | 2.91M | cc.ccs = WC_CCS_UHC; |
56 | 2.91M | cc.code = WC_N_UHC(cc.code); |
57 | 2.91M | return cc; |
58 | 2.91M | } |
59 | | |
60 | | wc_uint32 |
61 | | wc_uhc_to_N(wc_uint32 c) |
62 | 875k | { |
63 | 875k | if (c <= 0xA1A0) /* 0x8141 - 0xA1A0 */ |
64 | 842k | return WC_UHC_N(c); |
65 | 32.5k | if (c <= 0xA2A0) /* 0xA240 - 0xA2A0 */ |
66 | 1.25k | return WC_UHC_N(c) - 0x5E; |
67 | 31.2k | if (c <= 0xA2E7) /* 0xA2E6 - 0xA2E7 */ |
68 | 376 | return WC_UHC_N(0xA2A0) - 0x5E + c - 0xA2E5; |
69 | | /* 0xA340 - 0xFEA0 */ |
70 | 30.8k | return WC_UHC_N(c) - ((c >> 8) - 0xA1) * 0x5E + 2; |
71 | 31.2k | } |
72 | | |
73 | | Str |
74 | | wc_conv_from_uhc(Str is, wc_ces ces) |
75 | 241 | { |
76 | 241 | Str os; |
77 | 241 | wc_uchar *sp = (wc_uchar *)is->ptr; |
78 | 241 | wc_uchar *ep = sp + is->length; |
79 | 241 | wc_uchar *p; |
80 | 241 | int state = WC_UHC_NOSTATE; |
81 | 241 | wc_uint32 uhc; |
82 | | |
83 | 700 | for (p = sp; p < ep && *p < 0x80; p++) |
84 | 459 | ; |
85 | 241 | if (p == ep) |
86 | 13 | return is; |
87 | 228 | os = Strnew_size(is->length); |
88 | 228 | if (p > sp) |
89 | 29 | Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp)); |
90 | | |
91 | 5.95M | for (; p < ep; p++) { |
92 | 5.95M | switch (state) { |
93 | 3.42M | case WC_UHC_NOSTATE: |
94 | 3.42M | switch (WC_UHC_MAP[*p]) { |
95 | 2.53M | case UB: |
96 | 2.53M | state = WC_UHC_MBYTE1; |
97 | 2.53M | break; |
98 | 206k | case C1: |
99 | 206k | wtf_push_unknown(os, p, 1); |
100 | 206k | break; |
101 | 685k | default: |
102 | 685k | Strcat_char(os, (char)*p); |
103 | 685k | break; |
104 | 3.42M | } |
105 | 3.42M | break; |
106 | 3.42M | case WC_UHC_MBYTE1: |
107 | 2.53M | if (WC_UHC_MAP[*p] & LB) { |
108 | 2.38M | uhc = ((wc_uint32)*(p-1) << 8) | *p; |
109 | 2.38M | if (*(p-1) >= 0xA1 && *p >= 0xA1 && |
110 | 874k | uhc != 0xA2E6 && uhc != 0xA2E7) |
111 | 873k | wtf_push(os, WC_CCS_KS_X_1001, uhc); |
112 | 1.51M | else |
113 | 1.51M | wtf_push(os, WC_CCS_UHC, uhc); |
114 | 2.38M | } else |
115 | 144k | wtf_push_unknown(os, p-1, 2); |
116 | 2.53M | state = WC_UHC_NOSTATE; |
117 | 2.53M | break; |
118 | 5.95M | } |
119 | 5.95M | } |
120 | 228 | switch (state) { |
121 | 39 | case WC_UHC_MBYTE1: |
122 | 39 | wtf_push_unknown(os, p-1, 1); |
123 | 39 | break; |
124 | 228 | } |
125 | 228 | return os; |
126 | 228 | } |
127 | | |
128 | | void |
129 | | wc_push_to_uhc(Str os, wc_wchar_t cc, wc_status *st) |
130 | 8.99M | { |
131 | 16.2M | while (1) { |
132 | 16.2M | switch (cc.ccs) { |
133 | 5.33M | case WC_CCS_US_ASCII: |
134 | 5.33M | Strcat_char(os, (char)cc.code); |
135 | 5.33M | return; |
136 | 392k | case WC_CCS_KS_X_1001: |
137 | 392k | Strcat_char(os, (char)((cc.code >> 8) | 0x80)); |
138 | 392k | Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); |
139 | 392k | return; |
140 | 1.40M | case WC_CCS_UHC_1: |
141 | 1.40M | case WC_CCS_UHC_2: |
142 | 1.40M | cc = wc_cs128w_to_uhc(cc); |
143 | 1.99M | case WC_CCS_UHC: |
144 | 1.99M | Strcat_char(os, (char)(cc.code >> 8)); |
145 | 1.99M | Strcat_char(os, (char)(cc.code & 0xff)); |
146 | 1.99M | return; |
147 | 216k | case WC_CCS_UNKNOWN_W: |
148 | 216k | if (!WcOption.no_replace) |
149 | 216k | Strcat_charp(os, WC_REPLACE_W); |
150 | 216k | return; |
151 | 1.05M | case WC_CCS_UNKNOWN: |
152 | 1.05M | if (!WcOption.no_replace) |
153 | 1.05M | Strcat_charp(os, WC_REPLACE); |
154 | 1.05M | return; |
155 | 7.29M | default: |
156 | 7.29M | #ifdef USE_UNICODE |
157 | 7.29M | if (WcOption.ucs_conv) |
158 | 7.29M | cc = wc_any_to_any_ces(cc, st); |
159 | 0 | else |
160 | 0 | #endif |
161 | 0 | cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
162 | 7.29M | continue; |
163 | 16.2M | } |
164 | 16.2M | } |
165 | 8.99M | } |
166 | | |
167 | | Str |
168 | | wc_char_conv_from_uhc(wc_uchar c, wc_status *st) |
169 | 0 | { |
170 | 0 | static Str os; |
171 | 0 | static wc_uchar uhcu; |
172 | 0 | wc_uint32 uhc; |
173 | |
|
174 | 0 | if (st->state == -1) { |
175 | 0 | st->state = WC_UHC_NOSTATE; |
176 | 0 | os = Strnew_size(8); |
177 | 0 | } |
178 | |
|
179 | 0 | switch (st->state) { |
180 | 0 | case WC_UHC_NOSTATE: |
181 | 0 | switch (WC_UHC_MAP[c]) { |
182 | 0 | case UB: |
183 | 0 | uhcu = c; |
184 | 0 | st->state = WC_UHC_MBYTE1; |
185 | 0 | return NULL; |
186 | 0 | case C1: |
187 | 0 | break; |
188 | 0 | default: |
189 | 0 | Strcat_char(os, (char)c); |
190 | 0 | break; |
191 | 0 | } |
192 | 0 | break; |
193 | 0 | case WC_UHC_MBYTE1: |
194 | 0 | if (WC_UHC_MAP[c] & LB) { |
195 | 0 | uhc = ((wc_uint32)uhcu << 8) | c; |
196 | 0 | if (uhcu >= 0xA1 && c >= 0xA1 && |
197 | 0 | uhc != 0xA2E6 && uhc != 0xA2E7) |
198 | 0 | wtf_push(os, WC_CCS_KS_X_1001, uhc); |
199 | 0 | else |
200 | 0 | wtf_push(os, WC_CCS_UHC, uhc); |
201 | 0 | } |
202 | 0 | break; |
203 | 0 | } |
204 | 0 | st->state = -1; |
205 | 0 | return os; |
206 | 0 | } |