/src/dovecot/src/lib/str-sanitize.c
Line | Count | Source |
1 | | /* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */ |
2 | | |
3 | | #include "lib.h" |
4 | | #include "unichar.h" |
5 | | #include "str.h" |
6 | | #include "str-sanitize.h" |
7 | | #include <ctype.h> |
8 | | |
9 | | static size_t str_sanitize_skip_start(const char *src, size_t max_bytes) |
10 | 12.3k | { |
11 | 12.3k | unichar_t chr; |
12 | 12.3k | size_t i; |
13 | | |
14 | 231k | for (i = 0; i < max_bytes && src[i] != '\0'; ) { |
15 | 218k | int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr); |
16 | 218k | if (len <= 0) |
17 | 160 | break; |
18 | 218k | if (i_iscntrl(src[i])) |
19 | 34 | break; |
20 | 218k | i += len; |
21 | 218k | } |
22 | 12.3k | i_assert(i <= max_bytes); |
23 | 12.3k | return i; |
24 | 12.3k | } |
25 | | |
26 | | |
27 | | static size_t |
28 | | str_sanitize_skip_start_utf8(const char *src, uintmax_t max_chars) |
29 | 0 | { |
30 | 0 | unichar_t chr; |
31 | 0 | uintmax_t c; |
32 | 0 | size_t i; |
33 | |
|
34 | 0 | for (i = 0, c = 0; c < max_chars && src[i] != '\0'; ) { |
35 | 0 | int len = uni_utf8_get_char(src+i, &chr); |
36 | 0 | if (len <= 0) |
37 | 0 | break; |
38 | 0 | if (i_iscntrl(src[i])) |
39 | 0 | break; |
40 | 0 | c++; |
41 | 0 | i += len; |
42 | 0 | } |
43 | 0 | i_assert(c <= max_chars); |
44 | 0 | return i; |
45 | 0 | } |
46 | | |
47 | | static void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos) |
48 | 4.01k | { |
49 | 4.01k | const unsigned char *data = str_data(dest); |
50 | 4.01k | size_t len = str_len(dest); |
51 | | |
52 | 4.01k | i_assert(len >= initial_pos); |
53 | 4.01k | if (len == initial_pos) |
54 | 0 | return; |
55 | | |
56 | 4.01k | data += initial_pos; |
57 | 4.01k | len -= initial_pos; |
58 | 4.01k | str_truncate(dest, initial_pos + |
59 | 4.01k | uni_utf8_data_truncate(data, len, len-1)); |
60 | 4.01k | } |
61 | | |
62 | | void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes) |
63 | 1.51k | { |
64 | 1.51k | size_t initial_pos = str_len(dest); |
65 | 1.51k | unichar_t chr; |
66 | 1.51k | size_t i; |
67 | | |
68 | 172k | for (i = 0; i < max_bytes && src[i] != '\0'; ) { |
69 | 170k | int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr); |
70 | 170k | if (len == 0) |
71 | 9 | break; /* input ended too early */ |
72 | | |
73 | 170k | if (len < 0) { |
74 | | /* invalid UTF-8 */ |
75 | 1.17k | str_append_c(dest, '?'); |
76 | 1.17k | i++; |
77 | 1.17k | continue; |
78 | 1.17k | } |
79 | 169k | if (i_iscntrl(src[i])) |
80 | 128 | str_append_c(dest, '?'); |
81 | 169k | else |
82 | 169k | str_append_data(dest, src+i, len); |
83 | 169k | i += len; |
84 | 169k | } |
85 | | |
86 | 1.51k | if (src[i] != '\0') { |
87 | 1.34k | if (max_bytes < 3) |
88 | 0 | str_truncate(dest, initial_pos); |
89 | 1.34k | else { |
90 | 5.35k | while (str_len(dest) - initial_pos > max_bytes-3) |
91 | 4.01k | str_sanitize_truncate_char(dest, initial_pos); |
92 | 1.34k | } |
93 | 1.34k | str_append(dest, "..."); |
94 | 1.34k | } |
95 | 1.51k | } |
96 | | |
97 | | void str_sanitize_append_utf8(string_t *dest, const char *src, |
98 | | uintmax_t max_cps) |
99 | 0 | { |
100 | 0 | size_t last_pos = 0; |
101 | 0 | unichar_t chr; |
102 | 0 | uintmax_t c; |
103 | 0 | size_t i; |
104 | |
|
105 | 0 | i_assert(max_cps > 0); |
106 | | |
107 | 0 | for (i = 0, c = 0; c < max_cps && src[i] != '\0'; ) { |
108 | 0 | int len = uni_utf8_get_char(src+i, &chr); |
109 | 0 | if (len == 0) |
110 | 0 | break; /* input ended too early */ |
111 | | |
112 | 0 | last_pos = str_len(dest); |
113 | 0 | if (len < 0) { |
114 | | /* invalid UTF-8 */ |
115 | 0 | str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8); |
116 | 0 | i++; |
117 | 0 | continue; |
118 | 0 | } |
119 | 0 | if (i_iscntrl(src[i])) |
120 | 0 | str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8); |
121 | 0 | else |
122 | 0 | str_append_data(dest, src+i, len); |
123 | 0 | i += len; |
124 | 0 | c++; |
125 | 0 | } |
126 | |
|
127 | 0 | if (src[i] != '\0') { |
128 | 0 | str_truncate(dest, last_pos); |
129 | 0 | str_append(dest, UNICODE_HORIZONTAL_ELLIPSIS_CHAR_UTF8); |
130 | 0 | } |
131 | 0 | } |
132 | | |
133 | | const char *str_sanitize(const char *src, size_t max_bytes) |
134 | 12.3k | { |
135 | 12.3k | string_t *str; |
136 | 12.3k | size_t i; |
137 | | |
138 | 12.3k | if (src == NULL) |
139 | 0 | return NULL; |
140 | | |
141 | 12.3k | i = str_sanitize_skip_start(src, max_bytes); |
142 | 12.3k | if (src[i] == '\0') |
143 | 10.8k | return src; |
144 | | |
145 | 1.51k | str = t_str_new(I_MIN(max_bytes, 256)); |
146 | 1.51k | str_sanitize_append(str, src, max_bytes); |
147 | 1.51k | return str_c(str); |
148 | 12.3k | } |
149 | | |
150 | | const char *str_sanitize_utf8(const char *src, uintmax_t max_cps) |
151 | 0 | { |
152 | 0 | string_t *str; |
153 | 0 | size_t i; |
154 | |
|
155 | 0 | if (src == NULL) |
156 | 0 | return NULL; |
157 | | |
158 | 0 | i = str_sanitize_skip_start_utf8(src, max_cps); |
159 | 0 | if (src[i] == '\0') |
160 | 0 | return src; |
161 | | |
162 | 0 | str = t_str_new(I_MIN(max_cps, 256)); |
163 | 0 | str_sanitize_append_utf8(str, src, max_cps); |
164 | 0 | return str_c(str); |
165 | 0 | } |
166 | | |