/src/dcmtk/oficonv/libsrc/citrus_gbk2k.c
Line | Count | Source |
1 | | /*- |
2 | | * Copyright (c)2003 Citrus Project, |
3 | | * All rights reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer. |
10 | | * 2. Redistributions in binary form must reproduce the above copyright |
11 | | * notice, this list of conditions and the following disclaimer in the |
12 | | * documentation and/or other materials provided with the distribution. |
13 | | * |
14 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
15 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
17 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
18 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
19 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
20 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
21 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
22 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
23 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
24 | | * SUCH DAMAGE. |
25 | | */ |
26 | | |
27 | | #include "dcmtk/config/osconfig.h" |
28 | | #include "citrus_gbk2k.h" |
29 | | |
30 | | #include <sys/types.h> |
31 | | #include <errno.h> |
32 | | #include <limits.h> |
33 | | #include <stdbool.h> |
34 | | #include <stddef.h> |
35 | | #include <stdio.h> |
36 | | #include <stdlib.h> |
37 | | #include <string.h> |
38 | | #include <wchar.h> |
39 | | |
40 | | #include "citrus_bcs.h" |
41 | | #include "citrus_types.h" |
42 | | #include "citrus_module.h" |
43 | | #include "citrus_stdenc.h" |
44 | | |
45 | | |
46 | | /* ---------------------------------------------------------------------- |
47 | | * private stuffs used by templates |
48 | | */ |
49 | | |
50 | | typedef struct _GBK2KState { |
51 | | int chlen; |
52 | | char ch[4]; |
53 | | } _GBK2KState; |
54 | | |
55 | | typedef struct { |
56 | | int mb_cur_max; |
57 | | } _GBK2KEncodingInfo; |
58 | | |
59 | | #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) |
60 | | #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ |
61 | | |
62 | 0 | #define _FUNCNAME(m) _citrus_GBK2K_##m |
63 | 0 | #define _ENCODING_INFO _GBK2KEncodingInfo |
64 | | #define _ENCODING_STATE _GBK2KState |
65 | 0 | #define _ENCODING_MB_CUR_MAX(_ei_) (_ei_)->mb_cur_max |
66 | | #define _ENCODING_IS_STATE_DEPENDENT 0 |
67 | | #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 |
68 | | |
69 | | static __inline void |
70 | | /*ARGSUSED*/ |
71 | | _citrus_GBK2K_init_state(_GBK2KEncodingInfo * ei , |
72 | | _GBK2KState * s) |
73 | 0 | { |
74 | 0 | (void) ei; |
75 | 0 | memset(s, 0, sizeof(*s)); |
76 | 0 | } |
77 | | |
78 | | static __inline bool |
79 | | _mb_singlebyte(int c) |
80 | 0 | { |
81 | |
|
82 | 0 | return ((c & 0xff) <= 0x7f); |
83 | 0 | } |
84 | | |
85 | | static __inline bool |
86 | | _mb_leadbyte(int c) |
87 | 0 | { |
88 | |
|
89 | 0 | c &= 0xff; |
90 | 0 | return (0x81 <= c && c <= 0xfe); |
91 | 0 | } |
92 | | |
93 | | static __inline bool |
94 | | _mb_trailbyte(int c) |
95 | 0 | { |
96 | |
|
97 | 0 | c &= 0xff; |
98 | 0 | return ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfe)); |
99 | 0 | } |
100 | | |
101 | | static __inline bool |
102 | | _mb_surrogate(int c) |
103 | 0 | { |
104 | |
|
105 | 0 | c &= 0xff; |
106 | 0 | return (0x30 <= c && c <= 0x39); |
107 | 0 | } |
108 | | |
109 | | static __inline int |
110 | | _mb_count(_citrus_wc_t v) |
111 | 0 | { |
112 | 0 | uint32_t c; |
113 | |
|
114 | 0 | c = (uint32_t)v; /* XXX */ |
115 | 0 | if (!(c & 0xffffff00)) |
116 | 0 | return (1); |
117 | 0 | if (!(c & 0xffff0000)) |
118 | 0 | return (2); |
119 | 0 | return (4); |
120 | 0 | } |
121 | | |
122 | 0 | #define _PSENC (psenc->ch[psenc->chlen - 1]) |
123 | 0 | #define _PUSH_PSENC(c) (psenc->ch[psenc->chlen++] = (char)(c)) |
124 | | |
125 | | static int |
126 | | _citrus_GBK2K_mbrtowc_priv(_GBK2KEncodingInfo * ei, |
127 | | _citrus_wc_t * pwc, char ** s, size_t n, |
128 | | _GBK2KState * psenc, size_t * nresult) |
129 | 0 | { |
130 | 0 | char *s0, *s1; |
131 | 0 | _citrus_wc_t wc; |
132 | 0 | int chlenbak, len; |
133 | |
|
134 | 0 | s0 = *s; |
135 | |
|
136 | 0 | if (s0 == NULL) { |
137 | | /* _citrus_GBK2K_init_state(ei, psenc); */ |
138 | 0 | psenc->chlen = 0; |
139 | 0 | *nresult = 0; |
140 | 0 | return (0); |
141 | 0 | } |
142 | | |
143 | 0 | chlenbak = psenc->chlen; |
144 | |
|
145 | 0 | switch (psenc->chlen) { |
146 | 0 | case 3: |
147 | 0 | if (!_mb_leadbyte (_PSENC)) |
148 | 0 | goto invalid; |
149 | | /* FALLTHROUGH */ |
150 | 0 | case 2: |
151 | 0 | if (!_mb_surrogate(_PSENC) || _mb_trailbyte(_PSENC)) |
152 | 0 | goto invalid; |
153 | | /* FALLTHROUGH */ |
154 | 0 | case 1: |
155 | 0 | if (!_mb_leadbyte (_PSENC)) |
156 | 0 | goto invalid; |
157 | | /* FALLTHOROUGH */ |
158 | 0 | case 0: |
159 | 0 | break; |
160 | 0 | default: |
161 | 0 | goto invalid; |
162 | 0 | } |
163 | | |
164 | 0 | for (;;) { |
165 | 0 | if (n-- < 1) |
166 | 0 | goto restart; |
167 | | |
168 | 0 | _PUSH_PSENC(*s0++); |
169 | |
|
170 | 0 | switch (psenc->chlen) { |
171 | 0 | case 1: |
172 | 0 | if (_mb_singlebyte(_PSENC)) |
173 | 0 | goto convert; |
174 | 0 | if (_mb_leadbyte (_PSENC)) |
175 | 0 | continue; |
176 | 0 | goto ilseq; |
177 | 0 | case 2: |
178 | 0 | if (_mb_trailbyte (_PSENC)) |
179 | 0 | goto convert; |
180 | 0 | if (ei->mb_cur_max == 4 && |
181 | 0 | _mb_surrogate (_PSENC)) |
182 | 0 | continue; |
183 | 0 | goto ilseq; |
184 | 0 | case 3: |
185 | 0 | if (_mb_leadbyte (_PSENC)) |
186 | 0 | continue; |
187 | 0 | goto ilseq; |
188 | 0 | case 4: |
189 | 0 | if (_mb_surrogate (_PSENC)) |
190 | 0 | goto convert; |
191 | 0 | goto ilseq; |
192 | 0 | } |
193 | 0 | } |
194 | | |
195 | 0 | convert: |
196 | 0 | len = psenc->chlen; |
197 | 0 | s1 = &psenc->ch[0]; |
198 | 0 | wc = 0; |
199 | 0 | while (len-- > 0) |
200 | 0 | wc = (wc << 8) | (*s1++ & 0xff); |
201 | |
|
202 | 0 | if (pwc != NULL) |
203 | 0 | *pwc = wc; |
204 | 0 | *s = s0; |
205 | 0 | *nresult = (wc == 0) ? 0 : psenc->chlen - chlenbak; |
206 | | /* _citrus_GBK2K_init_state(ei, psenc); */ |
207 | 0 | psenc->chlen = 0; |
208 | |
|
209 | 0 | return (0); |
210 | | |
211 | 0 | restart: |
212 | 0 | *s = s0; |
213 | 0 | *nresult = (size_t)-2; |
214 | |
|
215 | 0 | return (0); |
216 | | |
217 | 0 | invalid: |
218 | 0 | return (EINVAL); |
219 | | |
220 | 0 | ilseq: |
221 | 0 | *nresult = (size_t)-1; |
222 | 0 | return (EILSEQ); |
223 | 0 | } |
224 | | |
225 | | static int |
226 | | _citrus_GBK2K_wcrtomb_priv(_GBK2KEncodingInfo * ei, |
227 | | char * s, size_t n, _citrus_wc_t wc, _GBK2KState * psenc, |
228 | | size_t * nresult) |
229 | 0 | { |
230 | 0 | size_t len; |
231 | 0 | int ret; |
232 | |
|
233 | 0 | if (psenc->chlen != 0) { |
234 | 0 | ret = EINVAL; |
235 | 0 | goto err; |
236 | 0 | } |
237 | | |
238 | 0 | len = _mb_count(wc); |
239 | 0 | if (n < len) { |
240 | 0 | ret = E2BIG; |
241 | 0 | goto err; |
242 | 0 | } |
243 | | |
244 | 0 | switch (len) { |
245 | 0 | case 1: |
246 | 0 | if (!_mb_singlebyte(_PUSH_PSENC(wc ))) { |
247 | 0 | ret = EILSEQ; |
248 | 0 | goto err; |
249 | 0 | } |
250 | 0 | break; |
251 | 0 | case 2: |
252 | 0 | if (!_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || |
253 | 0 | !_mb_trailbyte (_PUSH_PSENC(wc))) { |
254 | 0 | ret = EILSEQ; |
255 | 0 | goto err; |
256 | 0 | } |
257 | 0 | break; |
258 | 0 | case 4: |
259 | 0 | if (ei->mb_cur_max != 4 || |
260 | 0 | !_mb_leadbyte (_PUSH_PSENC(wc >> 24)) || |
261 | 0 | !_mb_surrogate (_PUSH_PSENC(wc >> 16)) || |
262 | 0 | !_mb_leadbyte (_PUSH_PSENC(wc >> 8)) || |
263 | 0 | !_mb_surrogate (_PUSH_PSENC(wc))) { |
264 | 0 | ret = EILSEQ; |
265 | 0 | goto err; |
266 | 0 | } |
267 | 0 | break; |
268 | 0 | } |
269 | | |
270 | 0 | memcpy(s, psenc->ch, psenc->chlen); |
271 | 0 | *nresult = psenc->chlen; |
272 | | /* _citrus_GBK2K_init_state(ei, psenc); */ |
273 | 0 | psenc->chlen = 0; |
274 | |
|
275 | 0 | return (0); |
276 | | |
277 | 0 | err: |
278 | 0 | *nresult = (size_t)-1; |
279 | 0 | return (ret); |
280 | 0 | } |
281 | | |
282 | | static __inline int |
283 | | /*ARGSUSED*/ |
284 | | _citrus_GBK2K_stdenc_wctocs(_GBK2KEncodingInfo * ei , |
285 | | _citrus_csid_t * csid, _citrus_index_t * idx, _citrus_wc_t wc) |
286 | 0 | { |
287 | 0 | uint8_t ch, cl; |
288 | 0 | (void) ei; |
289 | |
|
290 | 0 | if ((uint32_t)wc < 0x80) { |
291 | | /* ISO646 */ |
292 | 0 | *csid = 0; |
293 | 0 | *idx = (_citrus_index_t)wc; |
294 | 0 | } else if ((uint32_t)wc >= 0x10000) { |
295 | | /* GBKUCS : XXX */ |
296 | 0 | *csid = 3; |
297 | 0 | *idx = (_citrus_index_t)wc; |
298 | 0 | } else { |
299 | 0 | ch = (uint8_t)(wc >> 8); |
300 | 0 | cl = (uint8_t)wc; |
301 | 0 | if (ch >= 0xA1 && cl >= 0xA1) { |
302 | | /* EUC G1 */ |
303 | 0 | *csid = 1; |
304 | 0 | *idx = (_citrus_index_t)wc & 0x7F7FU; |
305 | 0 | } else { |
306 | | /* extended area (0x8140-) */ |
307 | 0 | *csid = 2; |
308 | 0 | *idx = (_citrus_index_t)wc; |
309 | 0 | } |
310 | 0 | } |
311 | |
|
312 | 0 | return (0); |
313 | 0 | } |
314 | | |
315 | | static __inline int |
316 | | /*ARGSUSED*/ |
317 | | _citrus_GBK2K_stdenc_cstowc(_GBK2KEncodingInfo * ei, |
318 | | _citrus_wc_t * wc, _citrus_csid_t csid, _citrus_index_t idx) |
319 | 0 | { |
320 | |
|
321 | 0 | switch (csid) { |
322 | 0 | case 0: |
323 | | /* ISO646 */ |
324 | 0 | *wc = (_citrus_wc_t)idx; |
325 | 0 | break; |
326 | 0 | case 1: |
327 | | /* EUC G1 */ |
328 | 0 | *wc = (_citrus_wc_t)idx | 0x8080U; |
329 | 0 | break; |
330 | 0 | case 2: |
331 | | /* extended area */ |
332 | 0 | *wc = (_citrus_wc_t)idx; |
333 | 0 | break; |
334 | 0 | case 3: |
335 | | /* GBKUCS : XXX */ |
336 | 0 | if (ei->mb_cur_max != 4) |
337 | 0 | return (EINVAL); |
338 | 0 | *wc = (_citrus_wc_t)idx; |
339 | 0 | break; |
340 | 0 | default: |
341 | 0 | return (EILSEQ); |
342 | 0 | } |
343 | | |
344 | 0 | return (0); |
345 | 0 | } |
346 | | |
347 | | static __inline int |
348 | | /*ARGSUSED*/ |
349 | | _citrus_GBK2K_stdenc_get_state_desc_generic(_GBK2KEncodingInfo * ei , |
350 | | _GBK2KState * psenc, int * rstate) |
351 | 0 | { |
352 | 0 | (void) ei; |
353 | 0 | *rstate = (psenc->chlen == 0) ? _CITRUS_STDENC_SDGEN_INITIAL : |
354 | 0 | _CITRUS_STDENC_SDGEN_INCOMPLETE_CHAR; |
355 | 0 | return (0); |
356 | 0 | } |
357 | | |
358 | | static int |
359 | | /*ARGSUSED*/ |
360 | | _citrus_GBK2K_encoding_module_init(_GBK2KEncodingInfo * ei, |
361 | | const void * var, size_t lenvar) |
362 | 0 | { |
363 | 0 | const char *p; |
364 | |
|
365 | 0 | p = var; |
366 | 0 | memset((void *)ei, 0, sizeof(*ei)); |
367 | 0 | ei->mb_cur_max = 4; |
368 | 0 | while (lenvar > 0) { |
369 | 0 | switch (_citrus_bcs_tolower(*p)) { |
370 | 0 | case '2': |
371 | 0 | MATCH("2byte", ei->mb_cur_max = 2); |
372 | 0 | break; |
373 | 0 | } |
374 | 0 | p++; |
375 | 0 | lenvar--; |
376 | 0 | } |
377 | | |
378 | 0 | return (0); |
379 | 0 | } |
380 | | |
381 | | static void |
382 | | /*ARGSUSED*/ |
383 | | _citrus_GBK2K_encoding_module_uninit(_GBK2KEncodingInfo *ei ) |
384 | 0 | { |
385 | 0 | (void) ei; |
386 | 0 | } |
387 | | |
388 | | /* ---------------------------------------------------------------------- |
389 | | * public interface for stdenc |
390 | | */ |
391 | | |
392 | | _CITRUS_STDENC_DECLS(GBK2K); |
393 | | _CITRUS_STDENC_DEF_OPS(GBK2K); |
394 | | |
395 | | #include "citrus_stdenc_template.h" |