Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "viet.h" |
4 | | #include "wtf.h" |
5 | | #include "search.h" |
6 | | #ifdef USE_UNICODE |
7 | | #include "ucs.h" |
8 | | #endif |
9 | | #include "map/tcvn57123_tcvn5712.map" |
10 | | |
11 | | wc_uint8 wc_c0_tcvn57122_map[ 0x20 ] = { |
12 | | 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
13 | | 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, |
14 | | }; |
15 | | wc_uint8 wc_c0_viscii112_map[ 0x20 ] = { |
16 | | 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
17 | | 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, |
18 | | }; |
19 | | wc_uint8 wc_c0_vps2_map[ 0x20 ] = { |
20 | | 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
21 | | 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, |
22 | | }; |
23 | | static wc_uint8 tcvn5712_precompose_map[ 0x100 ] = { |
24 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
25 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
26 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
27 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
28 | | /* A E I O */ |
29 | | 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, |
30 | | /* U Y */ |
31 | | 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, |
32 | | /* a e i o */ |
33 | | 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, |
34 | | /* u y */ |
35 | | 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, |
36 | | |
37 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
38 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
39 | | /* A( A^ E^ O^ O+ U+ a( a^ e^ o^ o+ u+ */ |
40 | | 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, |
41 | | /* ` ? ~ ' . */ |
42 | | 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
43 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
44 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
45 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
46 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
47 | | }; |
48 | | static wc_uint8 cp1258_precompose_map[ 0x100 ] = { |
49 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
50 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
51 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
52 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
53 | | /* A E I O */ |
54 | | 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, |
55 | | /* U Y */ |
56 | | 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, |
57 | | /* a e i o */ |
58 | | 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, |
59 | | /* u y */ |
60 | | 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, |
61 | | |
62 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
63 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
64 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
65 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
66 | | /* A^ A( E^ ` */ |
67 | | 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, |
68 | | /* ? O^ O+ U+ ~ */ |
69 | | 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 2, 0, |
70 | | /* a^ a( e^ ' */ |
71 | | 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, |
72 | | /* . o^ o+ u+ */ |
73 | | 0, 0, 2, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, |
74 | | }; |
75 | | |
76 | | wc_uint32 |
77 | | wc_tcvn5712_precompose(wc_uchar c1, wc_uchar c2) |
78 | 19.7k | { |
79 | 19.7k | if (tcvn5712_precompose_map[c1] == 1 && tcvn5712_precompose_map[c2] == 2) |
80 | 2.13k | return ((wc_uint32)c1 << 8) | c2; |
81 | 17.6k | else |
82 | 17.6k | return 0; |
83 | 19.7k | } |
84 | | |
85 | | wc_wchar_t |
86 | | wc_tcvn57123_to_tcvn5712(wc_wchar_t cc) |
87 | 2.14k | { |
88 | 2.14k | wc_map *map; |
89 | | |
90 | 2.14k | map = wc_map_search((wc_uint16)(cc.code & 0x7f7f), |
91 | 2.14k | tcvn57123_tcvn5712_map, N_tcvn57123_tcvn5712_map); |
92 | 2.14k | if (map) { |
93 | 0 | cc.ccs = (map->code2 < 0x20) ? WC_CCS_TCVN_5712_2 : WC_CCS_TCVN_5712_1; |
94 | 0 | cc.code = map->code2 | 0x80; |
95 | 2.14k | } else { |
96 | 2.14k | cc.ccs = WC_CCS_UNKNOWN; |
97 | 2.14k | } |
98 | 2.14k | return cc; |
99 | 2.14k | } |
100 | | |
101 | | wc_uint32 |
102 | | wc_cp1258_precompose(wc_uchar c1, wc_uchar c2) |
103 | 40.2k | { |
104 | 40.2k | if (cp1258_precompose_map[c1] == 1 && cp1258_precompose_map[c2] == 2) |
105 | 3.97k | return ((wc_uint32)c1 << 8) | c2; |
106 | 36.2k | else |
107 | 36.2k | return 0; |
108 | 40.2k | } |
109 | | |
110 | | Str |
111 | | wc_conv_from_viet(Str is, wc_ces ces) |
112 | 383 | { |
113 | 383 | Str os; |
114 | 383 | wc_uchar *sp = (wc_uchar *)is->ptr; |
115 | 383 | wc_uchar *ep = sp + is->length; |
116 | 383 | wc_uchar *p; |
117 | 383 | wc_ccs ccs1 = WcCesInfo[WC_CCS_INDEX(ces)].gset[1].ccs; |
118 | 383 | wc_ccs ccs2 = WcCesInfo[WC_CCS_INDEX(ces)].gset[2].ccs; |
119 | 383 | wc_uint8 *map = NULL; |
120 | | |
121 | 383 | switch (ces) { |
122 | 109 | case WC_CES_TCVN_5712: |
123 | 109 | map = wc_c0_tcvn57122_map; |
124 | 109 | break; |
125 | 262 | case WC_CES_VISCII_11: |
126 | 262 | map = wc_c0_viscii112_map; |
127 | 262 | break; |
128 | 12 | case WC_CES_VPS: |
129 | 12 | map = wc_c0_vps2_map; |
130 | 12 | break; |
131 | 383 | } |
132 | | |
133 | 383 | wc_create_detect_map(ces, WC_FALSE); |
134 | 17.9k | for (p = sp; p < ep && ! WC_DETECT_MAP[*p]; p++) |
135 | 17.5k | ; |
136 | 383 | if (p == ep) |
137 | 82 | return is; |
138 | 301 | os = Strnew_size(is->length); |
139 | 301 | if (p > sp) |
140 | 44 | Strcat_charp_n(os, is->ptr, (int)(p - sp)); |
141 | | |
142 | 14.6M | for (; p < ep; p++) { |
143 | 14.6M | if (*p & 0x80) |
144 | 12.6M | wtf_push(os, ccs1, (wc_uint32)*p); |
145 | 1.91M | else if (*p < 0x20 && map[*p]) |
146 | 81.5k | wtf_push(os, ccs2, (wc_uint32)*p); |
147 | 1.83M | else |
148 | 1.83M | Strcat_char(os, (char)*p); |
149 | 14.6M | } |
150 | 301 | return os; |
151 | 383 | } |
152 | | |
153 | | void |
154 | | wc_push_to_viet(Str os, wc_wchar_t cc, wc_status *st) |
155 | 16.1M | { |
156 | 16.1M | wc_ccs ccs1 = st->ces_info->gset[1].ccs; |
157 | 16.1M | wc_ccs ccs2 = 0, ccs3 = 0; |
158 | 16.1M | wc_uint8 *map = NULL; |
159 | | |
160 | 16.1M | switch (st->ces_info->id) { |
161 | 6.45M | case WC_CES_CP1258: |
162 | 6.45M | ccs3 = st->ces_info->gset[2].ccs; |
163 | 6.45M | break; |
164 | 5.54M | case WC_CES_TCVN_5712: |
165 | 5.54M | map = wc_c0_tcvn57122_map; |
166 | 5.54M | ccs2 = st->ces_info->gset[2].ccs; |
167 | 5.54M | ccs3 = st->ces_info->gset[3].ccs; |
168 | 5.54M | break; |
169 | 1.83M | case WC_CES_VISCII_11: |
170 | 1.83M | map = wc_c0_viscii112_map; |
171 | 1.83M | ccs2 = st->ces_info->gset[2].ccs; |
172 | 1.83M | break; |
173 | 2.33M | case WC_CES_VPS: |
174 | 2.33M | map = wc_c0_vps2_map; |
175 | 2.33M | ccs2 = st->ces_info->gset[2].ccs; |
176 | 2.33M | break; |
177 | 16.1M | } |
178 | | |
179 | 28.3M | while (1) { |
180 | 28.3M | if (cc.ccs == ccs1) { |
181 | 1.86M | Strcat_char(os, (char)(cc.code | 0x80)); |
182 | 1.86M | return; |
183 | 26.4M | } else if (cc.ccs == ccs2) { |
184 | 250k | Strcat_char(os, (char)(cc.code & 0x7f)); |
185 | 250k | return; |
186 | 26.2M | } else if (cc.ccs == ccs3) { |
187 | 6.38M | Strcat_char(os, (char)((cc.code >> 8) & 0xff)); |
188 | 6.38M | Strcat_char(os, (char)(cc.code & 0xff)); |
189 | 6.38M | return; |
190 | 6.38M | } |
191 | 19.8M | switch (cc.ccs) { |
192 | 2.52M | case WC_CCS_US_ASCII: |
193 | 2.52M | if (cc.code < 0x20 && map && map[cc.code]) |
194 | 117k | Strcat_char(os, ' '); |
195 | 2.40M | else |
196 | 2.40M | Strcat_char(os, (char)cc.code); |
197 | 2.52M | return; |
198 | 4.29M | case WC_CCS_UNKNOWN_W: |
199 | 4.29M | if (!WcOption.no_replace) |
200 | 4.29M | Strcat_charp(os, WC_REPLACE_W); |
201 | 4.29M | return; |
202 | 845k | case WC_CCS_UNKNOWN: |
203 | 845k | if (!WcOption.no_replace) |
204 | 845k | Strcat_charp(os, WC_REPLACE); |
205 | 845k | return; |
206 | 12.1M | default: |
207 | 12.1M | #ifdef USE_UNICODE |
208 | 12.1M | if (WcOption.ucs_conv) |
209 | 12.1M | cc = wc_any_to_any_ces(cc, st); |
210 | 0 | else |
211 | 0 | #endif |
212 | 0 | cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
213 | 12.1M | continue; |
214 | 19.8M | } |
215 | 19.8M | } |
216 | 16.1M | } |
217 | | |
218 | | Str |
219 | | wc_char_conv_from_viet(wc_uchar c, wc_status *st) |
220 | 0 | { |
221 | 0 | Str os = Strnew_size(1); |
222 | 0 | wc_uint8 *map = NULL; |
223 | |
|
224 | 0 | switch (st->ces_info->id) { |
225 | 0 | case WC_CES_TCVN_5712: |
226 | 0 | map = wc_c0_tcvn57122_map; |
227 | 0 | break; |
228 | 0 | case WC_CES_VISCII_11: |
229 | 0 | map = wc_c0_viscii112_map; |
230 | 0 | break; |
231 | 0 | case WC_CES_VPS: |
232 | 0 | map = wc_c0_vps2_map; |
233 | 0 | break; |
234 | 0 | } |
235 | | |
236 | 0 | if (c & 0x80) |
237 | 0 | wtf_push(os, st->ces_info->gset[1].ccs, (wc_uint32)c); |
238 | 0 | else if (c < 0x20 && map[c]) |
239 | 0 | wtf_push(os, st->ces_info->gset[2].ccs, (wc_uint32)c); |
240 | 0 | else |
241 | | Strcat_char(os, (char)c); |
242 | 0 | return os; |
243 | 0 | } |