Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "big5.h" |
4 | | #include "search.h" |
5 | | #include "wtf.h" |
6 | | #ifdef USE_UNICODE |
7 | | #include "ucs.h" |
8 | | #endif |
9 | | |
10 | | #define C0 WC_BIG5_MAP_C0 |
11 | | #define GL WC_BIG5_MAP_GL |
12 | 105k | #define C1 WC_BIG5_MAP_C1 |
13 | 420k | #define LB WC_BIG5_MAP_LB |
14 | 420k | #define UB WC_BIG5_MAP_UB |
15 | | |
16 | | wc_uint8 WC_BIG5_MAP[ 0x100 ] = { |
17 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
18 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
19 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
20 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
21 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
22 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
23 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
24 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0, |
25 | | |
26 | | C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, |
27 | | C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, |
28 | | C1, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
29 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
30 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
31 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
32 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
33 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1, |
34 | | }; |
35 | | |
36 | | wc_wchar_t |
37 | | wc_big5_to_cs94w(wc_wchar_t cc) |
38 | 8.96M | { |
39 | 8.96M | cc.code = WC_BIG5_N(cc.code); |
40 | 8.96M | if (cc.code < WC_C_BIG5_2_BASE) |
41 | 7.16M | cc.ccs = WC_CCS_BIG5_1; |
42 | 1.80M | else { |
43 | 1.80M | cc.ccs = WC_CCS_BIG5_2; |
44 | 1.80M | cc.code -= WC_C_BIG5_2_BASE; |
45 | 1.80M | } |
46 | 8.96M | cc.code = WC_N_CS94W(cc.code); |
47 | 8.96M | return cc; |
48 | 8.96M | } |
49 | | |
50 | | wc_wchar_t |
51 | | wc_cs94w_to_big5(wc_wchar_t cc) |
52 | 8.96M | { |
53 | 8.96M | cc.code = WC_CS94W_N(cc.code); |
54 | 8.96M | if (cc.ccs == WC_CCS_BIG5_2) |
55 | 1.80M | cc.code += WC_C_BIG5_2_BASE; |
56 | 8.96M | cc.code = WC_N_BIG5(cc.code); |
57 | 8.96M | cc.ccs = WC_CCS_BIG5; |
58 | 8.96M | return cc; |
59 | 8.96M | } |
60 | | |
61 | | Str |
62 | | wc_conv_from_big5(Str is, wc_ces ces) |
63 | 219 | { |
64 | 219 | Str os; |
65 | 219 | wc_uchar *sp = (wc_uchar *)is->ptr; |
66 | 219 | wc_uchar *ep = sp + is->length; |
67 | 219 | wc_uchar *p; |
68 | 219 | int state = WC_BIG5_NOSTATE; |
69 | | |
70 | 1.04M | for (p = sp; p < ep && *p < 0x80; p++) |
71 | 1.04M | ; |
72 | 219 | if (p == ep) |
73 | 49 | return is; |
74 | 170 | os = Strnew_size(is->length); |
75 | 170 | if (p > sp) |
76 | 20 | Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp)); |
77 | | |
78 | 1.19M | for (; p < ep; p++) { |
79 | 1.19M | switch (state) { |
80 | 774k | case WC_BIG5_NOSTATE: |
81 | 774k | switch (WC_BIG5_MAP[*p]) { |
82 | 420k | case UB: |
83 | 420k | state = WC_BIG5_MBYTE1; |
84 | 420k | break; |
85 | 105k | case C1: |
86 | 105k | wtf_push_unknown(os, p, 1); |
87 | 105k | break; |
88 | 249k | default: |
89 | 249k | Strcat_char(os, (char)*p); |
90 | 249k | break; |
91 | 774k | } |
92 | 774k | break; |
93 | 774k | case WC_BIG5_MBYTE1: |
94 | 420k | if (WC_BIG5_MAP[*p] & LB) |
95 | 372k | wtf_push(os, WC_CCS_BIG5, ((wc_uint32)*(p-1) << 8) | *p); |
96 | 47.5k | else |
97 | 47.5k | wtf_push_unknown(os, p-1, 2); |
98 | 420k | state = WC_BIG5_NOSTATE; |
99 | 420k | break; |
100 | 1.19M | } |
101 | 1.19M | } |
102 | 170 | switch (state) { |
103 | 20 | case WC_BIG5_MBYTE1: |
104 | 20 | wtf_push_unknown(os, p-1, 1); |
105 | 20 | break; |
106 | 170 | } |
107 | 170 | return os; |
108 | 170 | } |
109 | | |
110 | | void |
111 | | wc_push_to_big5(Str os, wc_wchar_t cc, wc_status *st) |
112 | 5.82M | { |
113 | 11.4M | while (1) { |
114 | 11.4M | switch (cc.ccs) { |
115 | 20.5k | case WC_CCS_US_ASCII: |
116 | 20.5k | Strcat_char(os, (char)cc.code); |
117 | 20.5k | return; |
118 | 2.57M | case WC_CCS_BIG5_1: |
119 | 2.60M | case WC_CCS_BIG5_2: |
120 | 2.60M | cc = wc_cs94w_to_big5(cc); |
121 | 2.60M | case WC_CCS_BIG5: |
122 | 2.60M | Strcat_char(os, (char)(cc.code >> 8)); |
123 | 2.60M | Strcat_char(os, (char)(cc.code & 0xff)); |
124 | 2.60M | return; |
125 | 1.66M | case WC_CCS_UNKNOWN_W: |
126 | 1.66M | if (!WcOption.no_replace) |
127 | 1.66M | Strcat_charp(os, WC_REPLACE_W); |
128 | 1.66M | return; |
129 | 1.53M | case WC_CCS_UNKNOWN: |
130 | 1.53M | if (!WcOption.no_replace) |
131 | 1.53M | Strcat_charp(os, WC_REPLACE); |
132 | 1.53M | return; |
133 | 5.62M | default: |
134 | 5.62M | #ifdef USE_UNICODE |
135 | 5.62M | if (WcOption.ucs_conv) |
136 | 5.62M | cc = wc_any_to_any_ces(cc, st); |
137 | 0 | else |
138 | 0 | #endif |
139 | 0 | cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
140 | 5.62M | continue; |
141 | 11.4M | } |
142 | 11.4M | } |
143 | 5.82M | } |
144 | | |
145 | | Str |
146 | | wc_char_conv_from_big5(wc_uchar c, wc_status *st) |
147 | 0 | { |
148 | 0 | static Str os; |
149 | 0 | static wc_uchar big5u; |
150 | |
|
151 | 0 | if (st->state == -1) { |
152 | 0 | st->state = WC_BIG5_NOSTATE; |
153 | 0 | os = Strnew_size(8); |
154 | 0 | } |
155 | |
|
156 | 0 | switch (st->state) { |
157 | 0 | case WC_BIG5_NOSTATE: |
158 | 0 | switch (WC_BIG5_MAP[c]) { |
159 | 0 | case UB: |
160 | 0 | big5u = c; |
161 | 0 | st->state = WC_BIG5_MBYTE1; |
162 | 0 | return NULL; |
163 | 0 | case C1: |
164 | 0 | break; |
165 | 0 | default: |
166 | 0 | Strcat_char(os, (char)c); |
167 | 0 | break; |
168 | 0 | } |
169 | 0 | break; |
170 | 0 | case WC_BIG5_MBYTE1: |
171 | 0 | if (WC_BIG5_MAP[c] & LB) |
172 | 0 | wtf_push(os, WC_CCS_BIG5, ((wc_uint32)big5u << 8) | c); |
173 | 0 | break; |
174 | 0 | } |
175 | 0 | st->state = -1; |
176 | 0 | return os; |
177 | 0 | } |