Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "big5.h" |
4 | | #include "hkscs.h" |
5 | | #include "search.h" |
6 | | #include "wtf.h" |
7 | | #ifdef USE_UNICODE |
8 | | #include "ucs.h" |
9 | | #endif |
10 | | |
11 | | #define C0 WC_HKSCS_MAP_C0 |
12 | | #define GL WC_HKSCS_MAP_GL |
13 | 151k | #define C1 WC_HKSCS_MAP_C1 |
14 | 687k | #define LB WC_HKSCS_MAP_LB |
15 | 435k | #define UB WC_HKSCS_MAP_UB |
16 | 687k | #define UH WC_HKSCS_MAP_UH |
17 | | |
18 | | wc_uint8 WC_HKSCS_MAP[ 0x100 ] = { |
19 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
20 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
21 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
22 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
23 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
24 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
25 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
26 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0, |
27 | | |
28 | | C1, C1, C1, C1, C1, C1, C1, C1, UH, UH, UH, UH, UH, UH, UH, UH, |
29 | | UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, UH, |
30 | | UH, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
31 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
32 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
33 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
34 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
35 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1, |
36 | | }; |
37 | | |
38 | | wc_wchar_t |
39 | | wc_hkscs_to_cs128w(wc_wchar_t cc) |
40 | 107k | { |
41 | 107k | cc.code = WC_HKSCS_N(cc.code); |
42 | 107k | if (cc.code < 0x4000) |
43 | 86.7k | cc.ccs = WC_CCS_HKSCS_1; |
44 | 20.5k | else { |
45 | 20.5k | cc.ccs = WC_CCS_HKSCS_2; |
46 | 20.5k | cc.code -= 0x4000; |
47 | 20.5k | } |
48 | 107k | cc.code = WC_N_CS128W(cc.code); |
49 | 107k | return cc; |
50 | 107k | } |
51 | | |
52 | | wc_wchar_t |
53 | | wc_cs128w_to_hkscs(wc_wchar_t cc) |
54 | 107k | { |
55 | 107k | cc.code = WC_CS128W_N(cc.code); |
56 | 107k | if (cc.ccs == WC_CCS_HKSCS_2) |
57 | 20.5k | cc.code += 0x4000; |
58 | 107k | cc.ccs = WC_CCS_HKSCS; |
59 | 107k | cc.code = WC_N_HKSCS(cc.code); |
60 | 107k | return cc; |
61 | 107k | } |
62 | | |
63 | | wc_uint32 |
64 | | wc_hkscs_to_N(wc_uint32 c) |
65 | 6.58k | { |
66 | 6.58k | if (c < 0xA140) /* 0x8840 - 0xA0FE */ |
67 | 5.32k | return WC_HKSCS_N(c); |
68 | | /* 0xFA40 - 0xFEFE */ |
69 | 1.26k | return WC_HKSCS_N(c) - 0x59 * 0x9D; |
70 | 6.58k | } |
71 | | |
72 | | Str |
73 | | wc_conv_from_hkscs(Str is, wc_ces ces) |
74 | 186 | { |
75 | 186 | Str os; |
76 | 186 | wc_uchar *sp = (wc_uchar *)is->ptr; |
77 | 186 | wc_uchar *ep = sp + is->length; |
78 | 186 | wc_uchar *p; |
79 | 186 | int state = WC_HKSCS_NOSTATE; |
80 | 186 | wc_uint32 hkscs; |
81 | | |
82 | 480 | for (p = sp; p < ep && *p < 0x80; p++) |
83 | 294 | ; |
84 | 186 | if (p == ep) |
85 | 29 | return is; |
86 | 157 | os = Strnew_size(is->length); |
87 | 157 | if (p > sp) |
88 | 16 | Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp)); |
89 | | |
90 | 1.84M | for (; p < ep; p++) { |
91 | 1.84M | switch (state) { |
92 | 1.16M | case WC_HKSCS_NOSTATE: |
93 | 1.16M | switch (WC_HKSCS_MAP[*p]) { |
94 | 435k | case UB: |
95 | 687k | case UH: |
96 | 687k | state = WC_HKSCS_MBYTE1; |
97 | 687k | break; |
98 | 151k | case C1: |
99 | 151k | wtf_push_unknown(os, p, 1); |
100 | 151k | break; |
101 | 322k | default: |
102 | 322k | Strcat_char(os, (char)*p); |
103 | 322k | break; |
104 | 1.16M | } |
105 | 1.16M | break; |
106 | 1.16M | case WC_HKSCS_MBYTE1: |
107 | 687k | if (WC_HKSCS_MAP[*p] & LB) { |
108 | 349k | hkscs = ((wc_uint32)*(p-1) << 8) | *p; |
109 | 349k | if (*(p-1) >= 0xA1 && *(p-1) <= 0xF9) |
110 | 323k | wtf_push(os, WC_CCS_BIG5, hkscs); |
111 | 25.5k | else |
112 | 25.5k | wtf_push(os, WC_CCS_HKSCS, hkscs); |
113 | 349k | } else |
114 | 338k | wtf_push_unknown(os, p-1, 2); |
115 | 687k | state = WC_HKSCS_NOSTATE; |
116 | 687k | break; |
117 | 1.84M | } |
118 | 1.84M | } |
119 | 157 | switch (state) { |
120 | 22 | case WC_HKSCS_MBYTE1: |
121 | 22 | wtf_push_unknown(os, p-1, 1); |
122 | 22 | break; |
123 | 157 | } |
124 | 157 | return os; |
125 | 157 | } |
126 | | |
127 | | void |
128 | | wc_push_to_hkscs(Str os, wc_wchar_t cc, wc_status *st) |
129 | 10.8M | { |
130 | 20.5M | while (1) { |
131 | 20.5M | switch (cc.ccs) { |
132 | 1.89M | case WC_CCS_US_ASCII: |
133 | 1.89M | Strcat_char(os, (char)cc.code); |
134 | 1.89M | return; |
135 | 3.72M | case WC_CCS_BIG5_1: |
136 | 4.65M | case WC_CCS_BIG5_2: |
137 | 4.65M | cc = wc_cs94w_to_big5(cc); |
138 | 4.94M | case WC_CCS_BIG5: |
139 | 4.94M | Strcat_char(os, (char)(cc.code >> 8)); |
140 | 4.94M | Strcat_char(os, (char)(cc.code & 0xff)); |
141 | 4.94M | return; |
142 | 70.2k | case WC_CCS_HKSCS_1: |
143 | 81.6k | case WC_CCS_HKSCS_2: |
144 | 81.6k | cc = wc_cs128w_to_hkscs(cc); |
145 | 98.1k | case WC_CCS_HKSCS: |
146 | 98.1k | Strcat_char(os, (char)(cc.code >> 8)); |
147 | 98.1k | Strcat_char(os, (char)(cc.code & 0xff)); |
148 | 98.1k | return; |
149 | 2.89M | case WC_CCS_UNKNOWN_W: |
150 | 2.89M | if (!WcOption.no_replace) |
151 | 2.89M | Strcat_charp(os, WC_REPLACE_W); |
152 | 2.89M | return; |
153 | 1.00M | case WC_CCS_UNKNOWN: |
154 | 1.00M | if (!WcOption.no_replace) |
155 | 1.00M | Strcat_charp(os, WC_REPLACE); |
156 | 1.00M | return; |
157 | 9.67M | default: |
158 | 9.67M | #ifdef USE_UNICODE |
159 | 9.67M | if (WcOption.ucs_conv) |
160 | 9.67M | cc = wc_any_to_any_ces(cc, st); |
161 | 0 | else |
162 | 0 | #endif |
163 | 0 | cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
164 | 9.67M | continue; |
165 | 20.5M | } |
166 | 20.5M | } |
167 | 10.8M | } |
168 | | |
169 | | Str |
170 | | wc_char_conv_from_hkscs(wc_uchar c, wc_status *st) |
171 | 0 | { |
172 | 0 | static Str os; |
173 | 0 | static wc_uchar hkscsu; |
174 | 0 | wc_uint32 hkscs; |
175 | |
|
176 | 0 | if (st->state == -1) { |
177 | 0 | st->state = WC_HKSCS_NOSTATE; |
178 | 0 | os = Strnew_size(8); |
179 | 0 | } |
180 | |
|
181 | 0 | switch (st->state) { |
182 | 0 | case WC_HKSCS_NOSTATE: |
183 | 0 | switch (WC_HKSCS_MAP[c]) { |
184 | 0 | case UB: |
185 | 0 | case UH: |
186 | 0 | hkscsu = c; |
187 | 0 | st->state = WC_HKSCS_MBYTE1; |
188 | 0 | return NULL; |
189 | 0 | case C1: |
190 | 0 | break; |
191 | 0 | default: |
192 | 0 | Strcat_char(os, (char)c); |
193 | 0 | break; |
194 | 0 | } |
195 | 0 | break; |
196 | 0 | case WC_HKSCS_MBYTE1: |
197 | 0 | if (WC_HKSCS_MAP[c] & LB) { |
198 | 0 | hkscs = ((wc_uint32)hkscsu << 8) | c; |
199 | 0 | if (hkscsu >= 0xA1 && hkscsu <= 0xF9 && c >= 0xA1) |
200 | 0 | wtf_push(os, WC_CCS_BIG5, hkscs); |
201 | 0 | else |
202 | 0 | wtf_push(os, WC_CCS_HKSCS, hkscs); |
203 | 0 | } |
204 | 0 | break; |
205 | 0 | } |
206 | 0 | st->state = -1; |
207 | 0 | return os; |
208 | 0 | } |