/src/dcmtk/oficonv/libsrc/citrus_utf7.c
Line | Count | Source |
1 | | /*- |
2 | | * Copyright (c)2004, 2005 Citrus Project, |
3 | | * All rights reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer. |
10 | | * 2. Redistributions in binary form must reproduce the above copyright |
11 | | * notice, this list of conditions and the following disclaimer in the |
12 | | * documentation and/or other materials provided with the distribution. |
13 | | * |
14 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
15 | | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
16 | | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
17 | | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE |
18 | | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
19 | | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
20 | | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
21 | | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
22 | | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
23 | | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
24 | | * SUCH DAMAGE. |
25 | | * |
26 | | */ |
27 | | |
28 | | #include "dcmtk/config/osconfig.h" |
29 | | #include "citrus_utf7.h" |
30 | | |
31 | | |
32 | | #include <errno.h> |
33 | | #include <limits.h> |
34 | | #include <stdio.h> |
35 | | #include <stdint.h> |
36 | | #include <stdlib.h> |
37 | | #include <string.h> |
38 | | #include <wchar.h> |
39 | | |
40 | | #include "citrus_bcs.h" |
41 | | #include "citrus_types.h" |
42 | | #include "citrus_module.h" |
43 | | #include "citrus_stdenc.h" |
44 | | |
45 | | /* ---------------------------------------------------------------------- |
46 | | * private stuffs used by templates |
47 | | */ |
48 | | |
49 | 0 | #define EI_MASK UINT16_C(0xff) |
50 | 0 | #define EI_DIRECT UINT16_C(0x100) |
51 | 0 | #define EI_OPTION UINT16_C(0x200) |
52 | 0 | #define EI_SPACE UINT16_C(0x400) |
53 | | |
54 | | typedef struct { |
55 | | uint16_t cell[0x80]; |
56 | | } _UTF7EncodingInfo; |
57 | | |
58 | | typedef struct { |
59 | | unsigned int |
60 | | mode: 1, /* whether base64 mode */ |
61 | | bits: 4, /* need to hold 0 - 15 */ |
62 | | cache: 22; /* 22 = BASE64_BIT + UTF16_BIT */ |
63 | | int chlen; |
64 | | char ch[4]; /* BASE64_IN, 3 * 6 = 18, most closed to UTF16_BIT */ |
65 | | } _UTF7State; |
66 | | |
67 | | #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) |
68 | | #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ |
69 | | |
70 | 0 | #define _FUNCNAME(m) _citrus_UTF7_##m |
71 | 0 | #define _ENCODING_INFO _UTF7EncodingInfo |
72 | | #define _ENCODING_STATE _UTF7State |
73 | 0 | #define _ENCODING_MB_CUR_MAX(_ei_) 4 |
74 | 0 | #define _ENCODING_IS_STATE_DEPENDENT 1 |
75 | | #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0 |
76 | | |
77 | | static __inline void |
78 | | /*ARGSUSED*/ |
79 | | _citrus_UTF7_init_state(_UTF7EncodingInfo * ei , |
80 | | _UTF7State * s) |
81 | 0 | { |
82 | 0 | (void) ei; |
83 | 0 | memset((void *)s, 0, sizeof(*s)); |
84 | 0 | } |
85 | | |
86 | | static const char base64[] = |
87 | | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
88 | | "abcdefghijklmnopqrstuvwxyz" |
89 | | "0123456789+/"; |
90 | | |
91 | | static const char direct[] = |
92 | | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
93 | | "abcdefghijklmnopqrstuvwxyz" |
94 | | "0123456789'(),-./:?"; |
95 | | |
96 | | static const char option[] = "!\"#$%&*;<=>@[]^_`{|}"; |
97 | | static const char spaces[] = " \t\r\n"; |
98 | | |
99 | 0 | #define BASE64_BIT 6 |
100 | 0 | #define UTF16_BIT 16 |
101 | | |
102 | 0 | #define BASE64_MAX 0x3f |
103 | 0 | #define UTF16_MAX UINT16_C(0xffff) |
104 | 0 | #define UTF32_MAX UINT32_C(0x10ffff) |
105 | | |
106 | 0 | #define BASE64_IN '+' |
107 | 0 | #define BASE64_OUT '-' |
108 | | |
109 | 0 | #define SHIFT7BIT(c) ((c) >> 7) |
110 | 0 | #define ISSPECIAL(c) ((c) == '\0' || (c) == BASE64_IN) |
111 | | |
112 | | #define FINDLEN(ei, c) \ |
113 | 0 | (SHIFT7BIT((c)) ? -1 : (((ei)->cell[(c)] & EI_MASK) - 1)) |
114 | | |
115 | 0 | #define ISDIRECT(ei, c) (!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \ |
116 | 0 | ei->cell[(c)] & (EI_DIRECT | EI_OPTION | EI_SPACE))) |
117 | | |
118 | 0 | #define ISSAFE(ei, c) (!SHIFT7BIT((c)) && (ISSPECIAL((c)) || \ |
119 | 0 | (c < 0x80 && ei->cell[(c)] & (EI_DIRECT | EI_SPACE)))) |
120 | | |
121 | | /* surrogate pair */ |
122 | 0 | #define SRG_BASE UINT32_C(0x10000) |
123 | 0 | #define HISRG_MIN UINT16_C(0xd800) |
124 | 0 | #define HISRG_MAX UINT16_C(0xdbff) |
125 | 0 | #define LOSRG_MIN UINT16_C(0xdc00) |
126 | 0 | #define LOSRG_MAX UINT16_C(0xdfff) |
127 | | |
128 | | static int |
129 | | _citrus_UTF7_mbtoutf16(_UTF7EncodingInfo * ei, |
130 | | uint16_t * u16, char ** s, size_t n, |
131 | | _UTF7State * psenc, size_t * nresult) |
132 | 0 | { |
133 | 0 | char *s0; |
134 | 0 | int done, i, len; |
135 | |
|
136 | 0 | *nresult = 0; |
137 | 0 | s0 = *s; |
138 | |
|
139 | 0 | for (i = 0, done = 0; done == 0; i++) { |
140 | 0 | if (i == psenc->chlen) { |
141 | 0 | if (n-- < 1) { |
142 | 0 | *nresult = (size_t)-2; |
143 | 0 | *s = s0; |
144 | 0 | return (0); |
145 | 0 | } |
146 | 0 | psenc->ch[psenc->chlen++] = *s0++; |
147 | 0 | } |
148 | 0 | if (SHIFT7BIT((int)psenc->ch[i])) |
149 | 0 | goto ilseq; |
150 | 0 | if (!psenc->mode) { |
151 | 0 | if (psenc->bits > 0 || psenc->cache > 0) |
152 | 0 | return (EINVAL); |
153 | 0 | if (psenc->ch[i] == BASE64_IN) |
154 | 0 | psenc->mode = 1; |
155 | 0 | else { |
156 | 0 | if (!ISDIRECT(ei, (int)psenc->ch[i])) |
157 | 0 | goto ilseq; |
158 | 0 | *u16 = (uint16_t)psenc->ch[i]; |
159 | 0 | done = 1; |
160 | 0 | continue; |
161 | 0 | } |
162 | 0 | } else { |
163 | 0 | if (psenc->ch[i] == BASE64_OUT && psenc->cache == 0) { |
164 | 0 | psenc->mode = 0; |
165 | 0 | *u16 = (uint16_t)BASE64_IN; |
166 | 0 | done = 1; |
167 | 0 | continue; |
168 | 0 | } |
169 | 0 | len = FINDLEN(ei, (int)psenc->ch[i]); |
170 | 0 | if (len < 0) { |
171 | 0 | if (psenc->bits >= BASE64_BIT) |
172 | 0 | return (EINVAL); |
173 | 0 | psenc->mode = 0; |
174 | 0 | psenc->bits = psenc->cache = 0; |
175 | 0 | if (psenc->ch[i] != BASE64_OUT) { |
176 | 0 | if (!ISDIRECT(ei, (int)psenc->ch[i])) |
177 | 0 | goto ilseq; |
178 | 0 | *u16 = (uint16_t)psenc->ch[i]; |
179 | 0 | done = 1; |
180 | 0 | } else { |
181 | 0 | psenc->chlen--; |
182 | 0 | i--; |
183 | 0 | } |
184 | 0 | } else { |
185 | 0 | psenc->cache = |
186 | 0 | (psenc->cache << BASE64_BIT) | len; |
187 | 0 | switch (psenc->bits) { |
188 | 0 | case 0: case 2: case 4: case 6: case 8: |
189 | 0 | psenc->bits += BASE64_BIT; |
190 | 0 | break; |
191 | 0 | case 10: case 12: case 14: |
192 | 0 | psenc->bits -= (UTF16_BIT - BASE64_BIT); |
193 | 0 | *u16 = (psenc->cache >> psenc->bits) & |
194 | 0 | UTF16_MAX; |
195 | 0 | done = 1; |
196 | 0 | break; |
197 | 0 | default: |
198 | 0 | return (EINVAL); |
199 | 0 | } |
200 | 0 | } |
201 | 0 | } |
202 | 0 | } |
203 | | |
204 | 0 | if (psenc->chlen > i) |
205 | 0 | return (EINVAL); |
206 | 0 | psenc->chlen = 0; |
207 | 0 | *nresult = (size_t)((*u16 == 0) ? 0 : s0 - *s); |
208 | 0 | *s = s0; |
209 | |
|
210 | 0 | return (0); |
211 | | |
212 | 0 | ilseq: |
213 | 0 | *nresult = (size_t)-1; |
214 | 0 | return (EILSEQ); |
215 | 0 | } |
216 | | |
217 | | static int |
218 | | _citrus_UTF7_mbrtowc_priv(_UTF7EncodingInfo * ei, |
219 | | _citrus_wc_t * pwc, char ** s, size_t n, |
220 | | _UTF7State * psenc, size_t * nresult) |
221 | 0 | { |
222 | 0 | uint32_t u32; |
223 | 0 | uint16_t hi, lo; |
224 | 0 | size_t nr, siz; |
225 | 0 | int err; |
226 | |
|
227 | 0 | if (*s == NULL) { |
228 | 0 | _citrus_UTF7_init_state(ei, psenc); |
229 | 0 | *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; |
230 | 0 | return (0); |
231 | 0 | } |
232 | 0 | err = _citrus_UTF7_mbtoutf16(ei, &hi, s, n, psenc, &nr); |
233 | 0 | if (nr == (size_t)-1 || nr == (size_t)-2) { |
234 | 0 | *nresult = nr; |
235 | 0 | return (err); |
236 | 0 | } |
237 | 0 | if (err != 0) |
238 | 0 | return (err); |
239 | 0 | n -= nr; |
240 | 0 | siz = nr; |
241 | 0 | if (hi < HISRG_MIN || hi > HISRG_MAX) { |
242 | 0 | u32 = (uint32_t)hi; |
243 | 0 | goto done; |
244 | 0 | } |
245 | 0 | err = _citrus_UTF7_mbtoutf16(ei, &lo, s, n, psenc, &nr); |
246 | 0 | if (nr == (size_t)-1 || nr == (size_t)-2) { |
247 | 0 | psenc->chlen = 1; /* make get_state_desc return incomplete */ |
248 | 0 | *nresult = nr; |
249 | 0 | return (err); |
250 | 0 | } |
251 | 0 | if (err != 0) |
252 | 0 | return (err); |
253 | 0 | if (lo < LOSRG_MIN || lo > LOSRG_MAX) { |
254 | 0 | *nresult = (size_t)-1; |
255 | 0 | return (EILSEQ); |
256 | 0 | } |
257 | 0 | hi -= HISRG_MIN; |
258 | 0 | lo -= LOSRG_MIN; |
259 | 0 | u32 = (hi << 10 | lo) + SRG_BASE; |
260 | 0 | siz += nr; |
261 | 0 | done: |
262 | 0 | if (pwc != NULL) |
263 | 0 | *pwc = (_citrus_wc_t)u32; |
264 | 0 | if (u32 == (uint32_t)0) { |
265 | 0 | *nresult = (size_t)0; |
266 | 0 | _citrus_UTF7_init_state(ei, psenc); |
267 | 0 | } else { |
268 | 0 | *nresult = siz; |
269 | 0 | } |
270 | 0 | return (err); |
271 | 0 | } |
272 | | |
273 | | static int |
274 | | _citrus_UTF7_utf16tomb(_UTF7EncodingInfo * ei, |
275 | | char * s, size_t n , uint16_t u16, |
276 | | _UTF7State * psenc, size_t * nresult) |
277 | 0 | { |
278 | 0 | int bits, i; |
279 | |
|
280 | 0 | (void) n; |
281 | |
|
282 | 0 | if (psenc->chlen != 0 || psenc->bits > BASE64_BIT) |
283 | 0 | return (EINVAL); |
284 | | |
285 | 0 | if (ISSAFE(ei, u16)) { |
286 | 0 | if (psenc->mode) { |
287 | 0 | if (psenc->bits > 0) { |
288 | 0 | bits = BASE64_BIT - psenc->bits; |
289 | 0 | i = (psenc->cache << bits) & BASE64_MAX; |
290 | 0 | psenc->ch[psenc->chlen++] = base64[i]; |
291 | 0 | psenc->bits = psenc->cache = 0; |
292 | 0 | } |
293 | 0 | if (u16 == BASE64_OUT || FINDLEN(ei, u16) >= 0) |
294 | 0 | psenc->ch[psenc->chlen++] = BASE64_OUT; |
295 | 0 | psenc->mode = 0; |
296 | 0 | } |
297 | 0 | if (psenc->bits != 0) |
298 | 0 | return (EINVAL); |
299 | 0 | psenc->ch[psenc->chlen++] = (char)u16; |
300 | 0 | if (u16 == BASE64_IN) |
301 | 0 | psenc->ch[psenc->chlen++] = BASE64_OUT; |
302 | 0 | } else { |
303 | 0 | if (!psenc->mode) { |
304 | 0 | if (psenc->bits > 0) |
305 | 0 | return (EINVAL); |
306 | 0 | psenc->ch[psenc->chlen++] = BASE64_IN; |
307 | 0 | psenc->mode = 1; |
308 | 0 | } |
309 | 0 | psenc->cache = (psenc->cache << UTF16_BIT) | u16; |
310 | 0 | bits = UTF16_BIT + psenc->bits; |
311 | 0 | psenc->bits = bits % BASE64_BIT; |
312 | 0 | while ((bits -= BASE64_BIT) >= 0) { |
313 | 0 | i = (psenc->cache >> bits) & BASE64_MAX; |
314 | 0 | psenc->ch[psenc->chlen++] = base64[i]; |
315 | 0 | } |
316 | 0 | } |
317 | 0 | memcpy(s, psenc->ch, psenc->chlen); |
318 | 0 | *nresult = psenc->chlen; |
319 | 0 | psenc->chlen = 0; |
320 | |
|
321 | 0 | return (0); |
322 | 0 | } |
323 | | |
324 | | static int |
325 | | _citrus_UTF7_wcrtomb_priv(_UTF7EncodingInfo * ei, |
326 | | char * s, size_t n, _citrus_wc_t wchar, |
327 | | _UTF7State * psenc, size_t * nresult) |
328 | 0 | { |
329 | 0 | uint32_t u32; |
330 | 0 | uint16_t u16[2]; |
331 | 0 | int err, i, len; |
332 | 0 | size_t nr, siz; |
333 | |
|
334 | 0 | u32 = (uint32_t)wchar; |
335 | 0 | if (u32 <= UTF16_MAX) { |
336 | 0 | u16[0] = (uint16_t)u32; |
337 | 0 | len = 1; |
338 | 0 | } else if (u32 <= UTF32_MAX) { |
339 | 0 | u32 -= SRG_BASE; |
340 | 0 | u16[0] = (uint16_t) ((u32 >> 10) + HISRG_MIN); |
341 | 0 | u16[1] = ((uint16_t)(u32 & UINT32_C(0x3ff))) + LOSRG_MIN; |
342 | 0 | len = 2; |
343 | 0 | } else { |
344 | 0 | *nresult = (size_t)-1; |
345 | 0 | return (EILSEQ); |
346 | 0 | } |
347 | 0 | siz = 0; |
348 | 0 | for (i = 0; i < len; ++i) { |
349 | 0 | err = _citrus_UTF7_utf16tomb(ei, s, n, u16[i], psenc, &nr); |
350 | 0 | if (err != 0) |
351 | 0 | return (err); /* XXX: state has been modified */ |
352 | 0 | s += nr; |
353 | 0 | n -= nr; |
354 | 0 | siz += nr; |
355 | 0 | } |
356 | 0 | *nresult = siz; |
357 | |
|
358 | 0 | return (0); |
359 | 0 | } |
360 | | |
361 | | static int |
362 | | /* ARGSUSED */ |
363 | | _citrus_UTF7_put_state_reset(_UTF7EncodingInfo * ei , |
364 | | char * s, size_t n, _UTF7State * psenc, |
365 | | size_t * nresult) |
366 | 0 | { |
367 | 0 | int bits, pos; |
368 | 0 | (void) ei; |
369 | |
|
370 | 0 | if (psenc->chlen != 0 || psenc->bits > BASE64_BIT) |
371 | 0 | return (EINVAL); |
372 | | |
373 | 0 | if (psenc->mode) { |
374 | 0 | if (psenc->bits > 0) { |
375 | 0 | if (n-- < 1) |
376 | 0 | return (E2BIG); |
377 | 0 | bits = BASE64_BIT - psenc->bits; |
378 | 0 | pos = (psenc->cache << bits) & BASE64_MAX; |
379 | 0 | psenc->ch[psenc->chlen++] = base64[pos]; |
380 | 0 | psenc->ch[psenc->chlen++] = BASE64_OUT; |
381 | 0 | psenc->bits = psenc->cache = 0; |
382 | 0 | } |
383 | 0 | psenc->mode = 0; |
384 | 0 | } |
385 | 0 | if (psenc->bits != 0) |
386 | 0 | return (EINVAL); |
387 | 0 | if (n-- < 1) |
388 | 0 | return (E2BIG); |
389 | | |
390 | 0 | *nresult = (size_t)psenc->chlen; |
391 | 0 | if (psenc->chlen > 0) { |
392 | 0 | memcpy(s, psenc->ch, psenc->chlen); |
393 | 0 | psenc->chlen = 0; |
394 | 0 | } |
395 | |
|
396 | 0 | return (0); |
397 | 0 | } |
398 | | |
399 | | static __inline int |
400 | | /*ARGSUSED*/ |
401 | | _citrus_UTF7_stdenc_wctocs(_UTF7EncodingInfo * ei , |
402 | | _citrus_csid_t * csid, _citrus_index_t * idx, _citrus_wc_t wc) |
403 | 0 | { |
404 | 0 | (void) ei; |
405 | |
|
406 | 0 | *csid = 0; |
407 | 0 | *idx = (_citrus_index_t)wc; |
408 | |
|
409 | 0 | return (0); |
410 | 0 | } |
411 | | |
412 | | static __inline int |
413 | | /*ARGSUSED*/ |
414 | | _citrus_UTF7_stdenc_cstowc(_UTF7EncodingInfo * ei , |
415 | | _citrus_wc_t * wc, _citrus_csid_t csid, _citrus_index_t idx) |
416 | 0 | { |
417 | 0 | (void) ei; |
418 | |
|
419 | 0 | if (csid != 0) |
420 | 0 | return (EILSEQ); |
421 | 0 | *wc = (_citrus_wc_t)idx; |
422 | |
|
423 | 0 | return (0); |
424 | 0 | } |
425 | | |
426 | | static __inline int |
427 | | /*ARGSUSED*/ |
428 | | _citrus_UTF7_stdenc_get_state_desc_generic(_UTF7EncodingInfo * ei , |
429 | | _UTF7State * psenc, int * rstate) |
430 | 0 | { |
431 | 0 | (void) ei; |
432 | |
|
433 | 0 | *rstate = (psenc->chlen == 0) ? _CITRUS_STDENC_SDGEN_INITIAL : |
434 | 0 | _CITRUS_STDENC_SDGEN_INCOMPLETE_CHAR; |
435 | 0 | return (0); |
436 | 0 | } |
437 | | |
438 | | static void |
439 | | /*ARGSUSED*/ |
440 | | _citrus_UTF7_encoding_module_uninit(_UTF7EncodingInfo *ei ) |
441 | 0 | { |
442 | 0 | (void) ei; |
443 | | |
444 | | /* ei seems to be unused */ |
445 | 0 | } |
446 | | |
447 | | static int |
448 | | /*ARGSUSED*/ |
449 | | _citrus_UTF7_encoding_module_init(_UTF7EncodingInfo * ei, |
450 | | const void * var , size_t lenvar ) |
451 | 0 | { |
452 | 0 | const char *s; |
453 | 0 | (void) var; |
454 | 0 | (void) lenvar; |
455 | |
|
456 | 0 | memset(ei, 0, sizeof(*ei)); |
457 | |
|
458 | 0 | #define FILL(str, flag) \ |
459 | 0 | do { \ |
460 | 0 | for (s = str; *s != '\0'; s++) \ |
461 | 0 | ei->cell[*s & 0x7f] |= flag; \ |
462 | 0 | } while (/*CONSTCOND*/0) |
463 | |
|
464 | 0 | FILL(base64, (s - base64) + 1); |
465 | 0 | FILL(direct, EI_DIRECT); |
466 | 0 | FILL(option, EI_OPTION); |
467 | 0 | FILL(spaces, EI_SPACE); |
468 | |
|
469 | 0 | return (0); |
470 | 0 | } |
471 | | |
472 | | /* ---------------------------------------------------------------------- |
473 | | * public interface for stdenc |
474 | | */ |
475 | | |
476 | | _CITRUS_STDENC_DECLS(UTF7); |
477 | | _CITRUS_STDENC_DEF_OPS(UTF7); |
478 | | |
479 | | #include "citrus_stdenc_template.h" |