/src/dovecot/src/lib-charset/charset-utf8.c
Line | Count | Source |
1 | | /* Copyright (c) 2002-2018 Dovecot authors, see the included COPYING file */ |
2 | | |
3 | | #include "lib.h" |
4 | | #include "buffer.h" |
5 | | #include "str.h" |
6 | | #include "charset-utf8-private.h" |
7 | | |
8 | | #include <ctype.h> |
9 | | |
10 | | const struct charset_utf8_vfuncs *charset_utf8_vfuncs = &charset_iconv; |
11 | | |
12 | | bool charset_is_utf8(const char *charset) |
13 | 427k | { |
14 | 427k | return strcasecmp(charset, "us-ascii") == 0 || |
15 | 426k | strcasecmp(charset, "ascii") == 0 || |
16 | 425k | strcasecmp(charset, "UTF-8") == 0 || |
17 | 60.8k | strcasecmp(charset, "UTF8") == 0; |
18 | 427k | } |
19 | | |
20 | | int charset_to_utf8_str(const char *charset, normalizer_func_t *normalizer, |
21 | | const char *input, string_t *output, |
22 | | enum charset_result *result_r) |
23 | 22.9k | { |
24 | 22.9k | struct charset_translation *t; |
25 | 22.9k | size_t len = strlen(input); |
26 | | |
27 | 22.9k | if (charset_to_utf8_begin(charset, normalizer, &t) < 0) |
28 | 10.9k | return -1; |
29 | | |
30 | 11.9k | *result_r = charset_to_utf8(t, (const unsigned char *)input, |
31 | 11.9k | &len, output); |
32 | 11.9k | charset_to_utf8_end(&t); |
33 | 11.9k | return 0; |
34 | 22.9k | } |
35 | | |
36 | | struct charset_translation * |
37 | | charset_utf8_to_utf8_begin(normalizer_func_t *normalizer) |
38 | 684 | { |
39 | 684 | struct charset_translation *trans; |
40 | | |
41 | 684 | if (charset_to_utf8_begin("UTF-8", normalizer, &trans) < 0) |
42 | 0 | i_unreached(); |
43 | 684 | return trans; |
44 | 684 | } |
45 | | |
46 | | enum charset_result |
47 | | charset_utf8_to_utf8(normalizer_func_t *normalizer, |
48 | | const unsigned char *src, size_t *src_size, buffer_t *dest) |
49 | 7.35M | { |
50 | 7.35M | enum charset_result res = CHARSET_RET_OK; |
51 | 7.35M | size_t pos; |
52 | | |
53 | 7.35M | uni_utf8_partial_strlen_n(src, *src_size, &pos); |
54 | 7.35M | if (pos < *src_size) { |
55 | 4.88k | i_assert(*src_size - pos <= CHARSET_MAX_PENDING_BUF_SIZE); |
56 | 4.88k | *src_size = pos; |
57 | 4.88k | res = CHARSET_RET_INCOMPLETE_INPUT; |
58 | 4.88k | } |
59 | | |
60 | 7.35M | if (normalizer != NULL) { |
61 | 1.22M | if (normalizer(src, *src_size, dest) < 0) |
62 | 8.25k | return CHARSET_RET_INVALID_INPUT; |
63 | 6.12M | } else if (!uni_utf8_get_valid_data(src, *src_size, dest)) { |
64 | 5.91k | return CHARSET_RET_INVALID_INPUT; |
65 | 6.12M | } else { |
66 | 6.12M | buffer_append(dest, src, *src_size); |
67 | 6.12M | } |
68 | 7.33M | return res; |
69 | 7.35M | } |
70 | | |
71 | | int charset_to_utf8_begin(const char *charset, normalizer_func_t *normalizer, |
72 | | struct charset_translation **t_r) |
73 | 407k | { |
74 | 407k | return charset_utf8_vfuncs->to_utf8_begin(charset, normalizer, t_r); |
75 | 407k | } |
76 | | |
77 | | void charset_to_utf8_end(struct charset_translation **_t) |
78 | 393k | { |
79 | 393k | struct charset_translation *t = *_t; |
80 | | |
81 | 393k | *_t = NULL; |
82 | 393k | charset_utf8_vfuncs->to_utf8_end(t); |
83 | 393k | } |
84 | | |
85 | | void charset_to_utf8_reset(struct charset_translation *t) |
86 | 784 | { |
87 | 784 | charset_utf8_vfuncs->to_utf8_reset(t); |
88 | 784 | } |
89 | | |
90 | | enum charset_result |
91 | | charset_to_utf8(struct charset_translation *t, |
92 | | const unsigned char *src, size_t *src_size, buffer_t *dest) |
93 | 56.2k | { |
94 | 56.2k | return charset_utf8_vfuncs->to_utf8(t, src, src_size, dest); |
95 | 56.2k | } |