Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | #include "wc.h" |
3 | | #include "gbk.h" |
4 | | #include "search.h" |
5 | | #include "wtf.h" |
6 | | #ifdef USE_UNICODE |
7 | | #include "ucs.h" |
8 | | #endif |
9 | | |
10 | | #include "map/gb2312_gbk.map" |
11 | | |
12 | | #define C0 WC_GBK_MAP_C0 |
13 | | #define GL WC_GBK_MAP_GL |
14 | 0 | #define C1 WC_GBK_MAP_C1 |
15 | 0 | #define LB WC_GBK_MAP_LB |
16 | 0 | #define UB WC_GBK_MAP_UB |
17 | 0 | #define C80 WC_GBK_MAP_80 |
18 | | |
19 | | wc_uint8 WC_GBK_MAP[ 0x100 ] = { |
20 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
21 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
22 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
23 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
24 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
25 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
26 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
27 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0, |
28 | | |
29 | | C80,UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
30 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
31 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
32 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
33 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
34 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
35 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
36 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1, |
37 | | }; |
38 | | |
39 | | wc_ccs |
40 | 0 | wc_gb2312_or_gbk(wc_uint16 code) { |
41 | 0 | return wc_map_range_search(code, |
42 | 0 | gb2312_gbk_map, N_gb2312_gbk_map) |
43 | 0 | ? WC_CCS_GBK : WC_CCS_GB_2312; |
44 | 0 | } |
45 | | |
46 | | wc_wchar_t |
47 | | wc_gbk_to_cs128w(wc_wchar_t cc) |
48 | 437 | { |
49 | 437 | cc.code = WC_GBK_N(cc.code); |
50 | 437 | if (cc.code < 0x4000) |
51 | 437 | cc.ccs = WC_CCS_GBK_1; |
52 | 0 | else { |
53 | 0 | cc.ccs = WC_CCS_GBK_2; |
54 | 0 | cc.code -= 0x4000; |
55 | 0 | } |
56 | 437 | cc.code = WC_N_CS128W(cc.code); |
57 | 437 | return cc; |
58 | 437 | } |
59 | | |
60 | | wc_wchar_t |
61 | | wc_cs128w_to_gbk(wc_wchar_t cc) |
62 | 437 | { |
63 | 437 | cc.code = WC_CS128W_N(cc.code); |
64 | 437 | if (cc.ccs == WC_CCS_GBK_2) |
65 | 0 | cc.code += 0x4000; |
66 | 437 | cc.ccs = WC_CCS_GBK; |
67 | 437 | cc.code = WC_N_GBK(cc.code); |
68 | 437 | return cc; |
69 | 437 | } |
70 | | |
71 | | wc_uint32 |
72 | | wc_gbk_to_N(wc_uint32 c) |
73 | 0 | { |
74 | 0 | if (c <= 0xA1A0) /* 0x8140 - 0xA1A0 */ |
75 | 0 | return WC_GBK_N(c); |
76 | 0 | if (c <= 0xA2AA) /* 0xA240 - 0xA2A0, 0xA2A1 - 0xA2AA */ |
77 | 0 | return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E; |
78 | 0 | if (c <= 0xA6A0) /* 0xA240 - 0xA6A0 */ |
79 | 0 | return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A; |
80 | 0 | if (c <= 0xA6F5) /* 0xA6E0 - 0xA6F5 */ |
81 | 0 | return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A - 0x3F; |
82 | 0 | if (c <= 0xA8A0) /* 0xA7A0 - 0xA8A0 */ |
83 | 0 | return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16; |
84 | 0 | if (c <= 0xA8C0) /* 0xA8BB - 0xA8C0 */ |
85 | 0 | return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 - 0x1A; |
86 | | /* 0xA940 - 0xFEA0 */ |
87 | 0 | return WC_GBK_N(c) - ((c >> 8) - 0xA1) * 0x5E + 0x0A + 0x16 + 0x06; |
88 | 0 | } |
89 | | |
90 | | Str |
91 | | wc_conv_from_gbk(Str is, wc_ces ces) |
92 | 0 | { |
93 | 0 | Str os; |
94 | 0 | wc_uchar *sp = (wc_uchar *)is->ptr; |
95 | 0 | wc_uchar *ep = sp + is->length; |
96 | 0 | wc_uchar *p; |
97 | 0 | int state = WC_GBK_NOSTATE; |
98 | 0 | wc_uint32 gbk; |
99 | |
|
100 | 0 | for (p = sp; p < ep && *p < 0x80; p++) |
101 | 0 | ; |
102 | 0 | if (p == ep) |
103 | 0 | return is; |
104 | 0 | os = Strnew_size(is->length); |
105 | 0 | if (p > sp) |
106 | 0 | Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp)); |
107 | |
|
108 | 0 | for (; p < ep; p++) { |
109 | 0 | switch (state) { |
110 | 0 | case WC_GBK_NOSTATE: |
111 | 0 | switch (WC_GBK_MAP[*p]) { |
112 | 0 | case UB: |
113 | 0 | state = WC_GBK_MBYTE1; |
114 | 0 | break; |
115 | 0 | case C80: |
116 | 0 | wtf_push(os, WC_CCS_GBK_80, *p); |
117 | 0 | break; |
118 | 0 | case C1: |
119 | 0 | wtf_push_unknown(os, p, 1); |
120 | 0 | break; |
121 | 0 | default: |
122 | 0 | Strcat_char(os, (char)*p); |
123 | 0 | break; |
124 | 0 | } |
125 | 0 | break; |
126 | 0 | case WC_GBK_MBYTE1: |
127 | 0 | if (WC_GBK_MAP[*p] & LB) { |
128 | 0 | gbk = ((wc_uint32)*(p-1) << 8) | *p; |
129 | 0 | if (*(p-1) >= 0xA1 && *p >= 0xA1) |
130 | 0 | wtf_push(os, wc_gb2312_or_gbk(gbk), gbk); |
131 | 0 | else |
132 | 0 | wtf_push(os, WC_CCS_GBK, gbk); |
133 | 0 | } else |
134 | 0 | wtf_push_unknown(os, p-1, 2); |
135 | 0 | state = WC_GBK_NOSTATE; |
136 | 0 | break; |
137 | 0 | } |
138 | 0 | } |
139 | 0 | switch (state) { |
140 | 0 | case WC_GBK_MBYTE1: |
141 | 0 | wtf_push_unknown(os, p-1, 1); |
142 | 0 | break; |
143 | 0 | } |
144 | 0 | return os; |
145 | 0 | } |
146 | | |
147 | | void |
148 | | wc_push_to_gbk(Str os, wc_wchar_t cc, wc_status *st) |
149 | 928k | { |
150 | 1.85M | while (1) { |
151 | 1.85M | switch (cc.ccs) { |
152 | 16 | case WC_CCS_US_ASCII: |
153 | 16 | Strcat_char(os, (char)cc.code); |
154 | 16 | return; |
155 | 9.69k | case WC_CCS_GB_2312: |
156 | 9.69k | Strcat_char(os, (char)((cc.code >> 8) | 0x80)); |
157 | 9.69k | Strcat_char(os, (char)((cc.code & 0xff) | 0x80)); |
158 | 9.69k | return; |
159 | 0 | case WC_CCS_GBK_80: |
160 | 0 | Strcat_char(os, (char)(cc.code | 0x80)); |
161 | 0 | return; |
162 | 437 | case WC_CCS_GBK_1: |
163 | 437 | case WC_CCS_GBK_2: |
164 | 437 | cc = wc_cs128w_to_gbk(cc); |
165 | 437 | case WC_CCS_GBK: |
166 | 437 | Strcat_char(os, (char)(cc.code >> 8)); |
167 | 437 | Strcat_char(os, (char)(cc.code & 0xff)); |
168 | 437 | return; |
169 | 869k | case WC_CCS_UNKNOWN_W: |
170 | 869k | if (!WcOption.no_replace) |
171 | 869k | Strcat_charp(os, WC_REPLACE_W); |
172 | 869k | return; |
173 | 48.2k | case WC_CCS_UNKNOWN: |
174 | 48.2k | if (!WcOption.no_replace) |
175 | 48.2k | Strcat_charp(os, WC_REPLACE); |
176 | 48.2k | return; |
177 | 927k | default: |
178 | 927k | #ifdef USE_UNICODE |
179 | 927k | if (WcOption.ucs_conv) |
180 | 927k | cc = wc_any_to_any_ces(cc, st); |
181 | 0 | else |
182 | 0 | #endif |
183 | 0 | cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
184 | 927k | continue; |
185 | 1.85M | } |
186 | 1.85M | } |
187 | 928k | } |
188 | | |
189 | | Str |
190 | | wc_char_conv_from_gbk(wc_uchar c, wc_status *st) |
191 | 0 | { |
192 | 0 | static Str os; |
193 | 0 | static wc_uchar gbku; |
194 | 0 | wc_uint32 gbk; |
195 | |
|
196 | 0 | if (st->state == -1) { |
197 | 0 | st->state = WC_GBK_NOSTATE; |
198 | 0 | os = Strnew_size(8); |
199 | 0 | } |
200 | |
|
201 | 0 | switch (st->state) { |
202 | 0 | case WC_GBK_NOSTATE: |
203 | 0 | switch (WC_GBK_MAP[c]) { |
204 | 0 | case UB: |
205 | 0 | gbku = c; |
206 | 0 | st->state = WC_GBK_MBYTE1; |
207 | 0 | return NULL; |
208 | 0 | case C80: |
209 | 0 | wtf_push(os, WC_CCS_GBK_80, c); |
210 | 0 | break; |
211 | 0 | case C1: |
212 | 0 | break; |
213 | 0 | default: |
214 | 0 | Strcat_char(os, (char)c); |
215 | 0 | break; |
216 | 0 | } |
217 | 0 | break; |
218 | 0 | case WC_GBK_MBYTE1: |
219 | 0 | if (WC_GBK_MAP[c] & LB) { |
220 | 0 | gbk = ((wc_uint32)gbku << 8) | c; |
221 | 0 | if (gbku >= 0xA1 && c >= 0xA1) |
222 | 0 | wtf_push(os, wc_gb2312_or_gbk(gbk), gbk); |
223 | 0 | else |
224 | 0 | wtf_push(os, WC_CCS_GBK, gbk); |
225 | 0 | } |
226 | 0 | break; |
227 | 0 | } |
228 | 0 | st->state = -1; |
229 | 0 | return os; |
230 | 0 | } |