Line | Count | Source |
1 | | |
2 | | #include "wc.h" |
3 | | #include "big5.h" |
4 | | #include "search.h" |
5 | | #include "wtf.h" |
6 | | #ifdef USE_UNICODE |
7 | | #include "ucs.h" |
8 | | #endif |
9 | | |
10 | | #define C0 WC_BIG5_MAP_C0 |
11 | | #define GL WC_BIG5_MAP_GL |
12 | 50.5k | #define C1 WC_BIG5_MAP_C1 |
13 | 124k | #define LB WC_BIG5_MAP_LB |
14 | 124k | #define UB WC_BIG5_MAP_UB |
15 | | |
16 | | wc_uint8 WC_BIG5_MAP[ 0x100 ] = { |
17 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
18 | | C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, C0, |
19 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
20 | | GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, GL, |
21 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
22 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
23 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, |
24 | | LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, LB, C0, |
25 | | |
26 | | C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, |
27 | | C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, C1, |
28 | | C1, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
29 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
30 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
31 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
32 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, |
33 | | UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, UB, C1, |
34 | | }; |
35 | | |
36 | | wc_wchar_t |
37 | | wc_big5_to_cs94w(wc_wchar_t cc) |
38 | 5.99M | { |
39 | 5.99M | cc.code = WC_BIG5_N(cc.code); |
40 | 5.99M | if (cc.code < WC_C_BIG5_2_BASE) |
41 | 5.64M | cc.ccs = WC_CCS_BIG5_1; |
42 | 350k | else { |
43 | 350k | cc.ccs = WC_CCS_BIG5_2; |
44 | 350k | cc.code -= WC_C_BIG5_2_BASE; |
45 | 350k | } |
46 | 5.99M | cc.code = WC_N_CS94W(cc.code); |
47 | 5.99M | return cc; |
48 | 5.99M | } |
49 | | |
50 | | wc_wchar_t |
51 | | wc_cs94w_to_big5(wc_wchar_t cc) |
52 | 5.99M | { |
53 | 5.99M | cc.code = WC_CS94W_N(cc.code); |
54 | 5.99M | if (cc.ccs == WC_CCS_BIG5_2) |
55 | 350k | cc.code += WC_C_BIG5_2_BASE; |
56 | 5.99M | cc.code = WC_N_BIG5(cc.code); |
57 | 5.99M | cc.ccs = WC_CCS_BIG5; |
58 | 5.99M | return cc; |
59 | 5.99M | } |
60 | | |
61 | | Str |
62 | | wc_conv_from_big5(Str is, wc_ces ces) |
63 | 236 | { |
64 | 236 | Str os; |
65 | 236 | wc_uchar *sp = (wc_uchar *)is->ptr; |
66 | 236 | wc_uchar *ep = sp + is->length; |
67 | 236 | wc_uchar *p; |
68 | 236 | int state = WC_BIG5_NOSTATE; |
69 | | |
70 | 512 | for (p = sp; p < ep && *p < 0x80; p++) |
71 | 276 | ; |
72 | 236 | if (p == ep) |
73 | 47 | return is; |
74 | 189 | os = Strnew_size(is->length); |
75 | 189 | if (p > sp) |
76 | 14 | Strcat_charp_n(os, (char *)is->ptr, (int)(p - sp)); |
77 | | |
78 | 337k | for (; p < ep; p++) { |
79 | 337k | switch (state) { |
80 | 212k | case WC_BIG5_NOSTATE: |
81 | 212k | switch (WC_BIG5_MAP[*p]) { |
82 | 124k | case UB: |
83 | 124k | state = WC_BIG5_MBYTE1; |
84 | 124k | break; |
85 | 50.5k | case C1: |
86 | 50.5k | wtf_push_unknown(os, p, 1); |
87 | 50.5k | break; |
88 | 37.3k | default: |
89 | 37.3k | Strcat_char(os, (char)*p); |
90 | 37.3k | break; |
91 | 212k | } |
92 | 212k | break; |
93 | 212k | case WC_BIG5_MBYTE1: |
94 | 124k | if (WC_BIG5_MAP[*p] & LB) |
95 | 99.7k | wtf_push(os, WC_CCS_BIG5, ((wc_uint32)*(p-1) << 8) | *p); |
96 | 25.0k | else |
97 | 25.0k | wtf_push_unknown(os, p-1, 2); |
98 | 124k | state = WC_BIG5_NOSTATE; |
99 | 124k | break; |
100 | 337k | } |
101 | 337k | } |
102 | 189 | switch (state) { |
103 | 21 | case WC_BIG5_MBYTE1: |
104 | 21 | wtf_push_unknown(os, p-1, 1); |
105 | 21 | break; |
106 | 189 | } |
107 | 189 | return os; |
108 | 189 | } |
109 | | |
110 | | void |
111 | | wc_push_to_big5(Str os, wc_wchar_t cc, wc_status *st) |
112 | 2.98M | { |
113 | 5.90M | while (1) { |
114 | 5.90M | switch (cc.ccs) { |
115 | 27.5k | case WC_CCS_US_ASCII: |
116 | 27.5k | Strcat_char(os, (char)cc.code); |
117 | 27.5k | return; |
118 | 1.61M | case WC_CCS_BIG5_1: |
119 | 1.61M | case WC_CCS_BIG5_2: |
120 | 1.61M | cc = wc_cs94w_to_big5(cc); |
121 | 1.61M | case WC_CCS_BIG5: |
122 | 1.61M | Strcat_char(os, (char)(cc.code >> 8)); |
123 | 1.61M | Strcat_char(os, (char)(cc.code & 0xff)); |
124 | 1.61M | return; |
125 | 1.24M | case WC_CCS_UNKNOWN_W: |
126 | 1.24M | if (!WcOption.no_replace) |
127 | 1.24M | Strcat_charp(os, WC_REPLACE_W); |
128 | 1.24M | return; |
129 | 93.1k | case WC_CCS_UNKNOWN: |
130 | 93.1k | if (!WcOption.no_replace) |
131 | 93.1k | Strcat_charp(os, WC_REPLACE); |
132 | 93.1k | return; |
133 | 2.92M | default: |
134 | 2.92M | #ifdef USE_UNICODE |
135 | 2.92M | if (WcOption.ucs_conv) |
136 | 2.92M | cc = wc_any_to_any_ces(cc, st); |
137 | 0 | else |
138 | 0 | #endif |
139 | 0 | cc.ccs = WC_CCS_IS_WIDE(cc.ccs) ? WC_CCS_UNKNOWN_W : WC_CCS_UNKNOWN; |
140 | 2.92M | continue; |
141 | 5.90M | } |
142 | 5.90M | } |
143 | 2.98M | } |
144 | | |
145 | | Str |
146 | | wc_char_conv_from_big5(wc_uchar c, wc_status *st) |
147 | 0 | { |
148 | 0 | static Str os; |
149 | 0 | static wc_uchar big5u; |
150 | |
|
151 | 0 | if (st->state == -1) { |
152 | 0 | st->state = WC_BIG5_NOSTATE; |
153 | 0 | os = Strnew_size(8); |
154 | 0 | } |
155 | |
|
156 | 0 | switch (st->state) { |
157 | 0 | case WC_BIG5_NOSTATE: |
158 | 0 | switch (WC_BIG5_MAP[c]) { |
159 | 0 | case UB: |
160 | 0 | big5u = c; |
161 | 0 | st->state = WC_BIG5_MBYTE1; |
162 | 0 | return NULL; |
163 | 0 | case C1: |
164 | 0 | break; |
165 | 0 | default: |
166 | 0 | Strcat_char(os, (char)c); |
167 | 0 | break; |
168 | 0 | } |
169 | 0 | break; |
170 | 0 | case WC_BIG5_MBYTE1: |
171 | 0 | if (WC_BIG5_MAP[c] & LB) |
172 | 0 | wtf_push(os, WC_CCS_BIG5, ((wc_uint32)big5u << 8) | c); |
173 | 0 | break; |
174 | 0 | } |
175 | 0 | st->state = -1; |
176 | 0 | return os; |
177 | 0 | } |