/src/dovecot/src/lib-charset/charset-utf8.c
Line | Count | Source |
1 | | /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ |
2 | | |
3 | | #include "lib.h" |
4 | | #include "buffer.h" |
5 | | #include "str.h" |
6 | | #include "charset-utf8-private.h" |
7 | | |
8 | | #include <ctype.h> |
9 | | |
10 | | const struct charset_utf8_vfuncs *charset_utf8_vfuncs = &charset_iconv; |
11 | | |
12 | | bool charset_is_utf8(const char *charset) |
13 | 13.2k | { |
14 | 13.2k | return strcasecmp(charset, "us-ascii") == 0 || |
15 | 13.0k | strcasecmp(charset, "ascii") == 0 || |
16 | 12.7k | strcasecmp(charset, "UTF-8") == 0 || |
17 | 11.8k | strcasecmp(charset, "UTF8") == 0; |
18 | 13.2k | } |
19 | | |
20 | | int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer, |
21 | | const char *input, string_t *output, |
22 | | enum charset_result *result_r) |
23 | 13.2k | { |
24 | 13.2k | struct charset_translation *t; |
25 | 13.2k | size_t len = strlen(input); |
26 | | |
27 | 13.2k | if (charset_to_utf8_begin(charset, normalizer, &t) < 0) |
28 | 6.00k | return -1; |
29 | | |
30 | 7.27k | *result_r = charset_to_utf8(t, (const unsigned char *)input, |
31 | 7.27k | &len, output); |
32 | 7.27k | charset_to_utf8_end(&t); |
33 | 7.27k | return 0; |
34 | 13.2k | } |
35 | | |
36 | | struct charset_translation * |
37 | | charset_utf8_to_utf8_begin(normalizer_func_t *normalizer) |
38 | 0 | { |
39 | 0 | struct charset_translation *trans; |
40 | |
|
41 | 0 | if (charset_to_utf8_begin("UTF-8", normalizer, &trans) < 0) |
42 | 0 | i_unreached(); |
43 | 0 | return trans; |
44 | 0 | } |
45 | | |
46 | | enum charset_result |
47 | | charset_utf8_to_utf8(normalizer_func_t *normalizer, |
48 | | const unsigned char *src, size_t *src_size, buffer_t *dest) |
49 | 278k | { |
50 | 278k | enum charset_result res = CHARSET_RET_OK; |
51 | 278k | size_t pos; |
52 | | |
53 | 278k | uni_utf8_partial_strlen_n(src, *src_size, &pos); |
54 | 278k | if (pos < *src_size) { |
55 | 243 | i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE); |
56 | 243 | *src_size = pos; |
57 | 243 | res = CHARSET_RET_INCOMPLETE_INPUT; |
58 | 243 | } |
59 | | |
60 | 278k | if (normalizer != NULL) { |
61 | 0 | if (normalizer(src, *src_size, dest) < 0) |
62 | 0 | return CHARSET_RET_INVALID_INPUT; |
63 | 278k | } else if (!uni_utf8_get_valid_data(src, *src_size, dest)) { |
64 | 2.84k | return CHARSET_RET_INVALID_INPUT; |
65 | 275k | } else { |
66 | 275k | buffer_append(dest, src, *src_size); |
67 | 275k | } |
68 | 275k | return res; |
69 | 278k | } |
70 | | |
71 | | int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer, |
72 | | struct charset_translation **t_r) |
73 | 13.2k | { |
74 | 13.2k | return charset_utf8_vfuncs->to_utf8_begin(charset, normalizer, t_r); |
75 | 13.2k | } |
76 | | |
77 | | void charset_to_utf8_end(struct charset_translation **_t) |
78 | 7.27k | { |
79 | 7.27k | struct charset_translation *t = *_t; |
80 | | |
81 | 7.27k | *_t = NULL; |
82 | 7.27k | charset_utf8_vfuncs->to_utf8_end(t); |
83 | 7.27k | } |
84 | | |
85 | | void charset_to_utf8_reset(struct charset_translation *t) |
86 | 0 | { |
87 | 0 | charset_utf8_vfuncs->to_utf8_reset(t); |
88 | 0 | } |
89 | | |
90 | | enum charset_result |
91 | | charset_to_utf8(struct charset_translation *t, |
92 | | const unsigned char *src, size_t *src_size, buffer_t *dest) |
93 | 7.27k | { |
94 | 7.27k | return charset_utf8_vfuncs->to_utf8(t, src, src_size, dest); |
95 | 7.27k | } |