/src/gnutls/lib/str-unicode.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (C) 2016, 2017 Red Hat, Inc. |
3 | | * |
4 | | * Author: Nikos Mavrogiannopoulos |
5 | | * |
6 | | * This file is part of GnuTLS. |
7 | | * |
8 | | * The GnuTLS is free software; you can redistribute it and/or |
9 | | * modify it under the terms of the GNU Lesser General Public License |
10 | | * as published by the Free Software Foundation; either version 2.1 of |
11 | | * the License, or (at your option) any later version. |
12 | | * |
13 | | * This library is distributed in the hope that it will be useful, but |
14 | | * WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 | | * Lesser General Public License for more details. |
17 | | * |
18 | | * You should have received a copy of the GNU Lesser General Public License |
19 | | * along with this program. If not, see <https://www.gnu.org/licenses/> |
20 | | * |
21 | | */ |
22 | | |
23 | | #include "gnutls_int.h" |
24 | | #include "errors.h" |
25 | | #include "str.h" |
26 | | #include <uninorm.h> |
27 | | #include <unistr.h> |
28 | | #include <unictype.h> |
29 | | |
30 | | /* rfc5892#section-2.6 exceptions |
31 | | */ |
32 | | inline static int is_allowed_exception(uint32_t ch) |
33 | 0 | { |
34 | 0 | switch (ch) { |
35 | 0 | case 0xB7: |
36 | 0 | case 0x0375: |
37 | 0 | case 0x05F3: |
38 | 0 | case 0x05F4: |
39 | 0 | case 0x30FB: |
40 | 0 | case 0x0660: |
41 | 0 | case 0x0661: |
42 | 0 | case 0x0662: |
43 | 0 | case 0x0663: |
44 | 0 | case 0x0664: |
45 | 0 | case 0x0665: |
46 | 0 | case 0x0666: |
47 | 0 | case 0x0667: |
48 | 0 | case 0x0668: |
49 | 0 | case 0x0669: |
50 | 0 | case 0x06F0: |
51 | 0 | case 0x06F1: |
52 | 0 | case 0x06F2: |
53 | 0 | case 0x06F3: |
54 | 0 | case 0x06F4: |
55 | 0 | case 0x06F5: |
56 | 0 | case 0x06F6: |
57 | 0 | case 0x06F7: |
58 | 0 | case 0x06F8: |
59 | 0 | case 0x06F9: |
60 | 0 | case 0x0640: |
61 | 0 | case 0x07FA: |
62 | 0 | case 0x302E: |
63 | 0 | case 0x302F: |
64 | 0 | case 0x3031: |
65 | 0 | case 0x3032: |
66 | 0 | case 0x3033: |
67 | 0 | case 0x3034: |
68 | 0 | case 0x3035: |
69 | 0 | case 0x303B: |
70 | 0 | return 0; /* disallowed */ |
71 | 0 | case 0xDF: |
72 | 0 | case 0x03C2: |
73 | 0 | case 0x06FD: |
74 | 0 | case 0x06FE: |
75 | 0 | case 0x0F0B: |
76 | 0 | case 0x3007: |
77 | 0 | return 1; /* allowed */ |
78 | 0 | default: |
79 | 0 | return -1; /* not exception */ |
80 | 0 | } |
81 | 0 | } |
82 | | |
83 | | /* Checks whether the provided string is in the valid set of FreeFormClass (RFC7564 |
84 | | * as an RFC7613 requirement), and converts all spaces to the ASCII-space. */ |
85 | | static int check_for_valid_freeformclass(uint32_t * ucs4, unsigned ucs4_size) |
86 | 0 | { |
87 | 0 | unsigned i; |
88 | 0 | int rc; |
89 | 0 | uint32_t tmp[4]; |
90 | 0 | size_t tmp_size; |
91 | 0 | uint32_t *nrm; |
92 | 0 | uc_general_category_t cat; |
93 | 0 | unsigned is_invalid; |
94 | | |
95 | | /* make the union of Valid categories, excluding any invalid (i.e., control) */ |
96 | 0 | cat = uc_general_category_or(UC_CATEGORY_Ll, UC_CATEGORY_Lu); /* LetterDigits */ |
97 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Lo); |
98 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Nd); |
99 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Lm); |
100 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Mn); |
101 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Mc); |
102 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Lt); /* OtherLetterDigits */ |
103 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Nl); |
104 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_No); |
105 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Me); |
106 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Sm); /* Symbols */ |
107 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Sc); |
108 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_So); |
109 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Sk); |
110 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Pc); /* Punctuation */ |
111 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Pd); |
112 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Ps); |
113 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Pe); |
114 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Pi); |
115 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Pf); |
116 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Po); |
117 | 0 | cat = uc_general_category_or(cat, UC_CATEGORY_Zs); /* Spaces */ |
118 | 0 | cat = uc_general_category_and_not(cat, UC_CATEGORY_Cc); /* Not in Control */ |
119 | | |
120 | | /* check for being in the allowed sets in rfc7564#section-4.3 */ |
121 | 0 | for (i = 0; i < ucs4_size; i++) { |
122 | 0 | is_invalid = 0; |
123 | | |
124 | | /* Disallowed |
125 | | o Old Hangul Jamo characters, i.e., the OldHangulJamo ("I") category |
126 | | (not handled in this code) |
127 | | |
128 | | o Control characters, i.e., the Controls ("L") category |
129 | | |
130 | | o Ignorable characters, i.e., the PrecisIgnorableProperties ("M") |
131 | | */ |
132 | 0 | if (uc_is_property_default_ignorable_code_point(ucs4[i]) || |
133 | 0 | uc_is_property_not_a_character(ucs4[i])) { |
134 | 0 | return gnutls_assert_val(GNUTLS_E_INVALID_UTF8_STRING); |
135 | 0 | } |
136 | | |
137 | | /* Contextual rules - we do not implement them / we reject chars from these sets |
138 | | o A number of characters from the Exceptions ("F") category defined |
139 | | |
140 | | o Joining characters, i.e., the JoinControl ("H") category defined |
141 | | */ |
142 | 0 | rc = is_allowed_exception(ucs4[i]); |
143 | 0 | if (rc == 0 || uc_is_property_join_control(ucs4[i])) |
144 | 0 | return gnutls_assert_val(GNUTLS_E_INVALID_UTF8_STRING); |
145 | | |
146 | 0 | if (rc == 1) /* exceptionally allowed, continue */ |
147 | 0 | continue; |
148 | | |
149 | | /* Replace all spaces; an RFC7613 requirement |
150 | | */ |
151 | 0 | if (uc_is_general_category(ucs4[i], UC_CATEGORY_Zs)) /* replace */ |
152 | 0 | ucs4[i] = 0x20; |
153 | | |
154 | | /* Valid */ |
155 | 0 | if ((ucs4[i] < 0x21 || ucs4[i] > 0x7E) |
156 | 0 | && !uc_is_general_category(ucs4[i], cat)) |
157 | 0 | is_invalid = 1; |
158 | | |
159 | | /* HasCompat */ |
160 | 0 | if (is_invalid) { |
161 | 0 | tmp_size = sizeof(tmp) / sizeof(tmp[0]); |
162 | 0 | nrm = |
163 | 0 | u32_normalize(UNINORM_NFKC, &ucs4[i], 1, tmp, |
164 | 0 | &tmp_size); |
165 | 0 | if (nrm == NULL || (tmp_size == 1 && nrm[0] == ucs4[i])) |
166 | 0 | return |
167 | 0 | gnutls_assert_val |
168 | 0 | (GNUTLS_E_INVALID_UTF8_STRING); |
169 | 0 | } |
170 | 0 | } |
171 | | |
172 | 0 | return 0; |
173 | 0 | } |
174 | | |
175 | | /** |
176 | | * gnutls_utf8_password_normalize: |
177 | | * @password: contain the UTF-8 formatted password |
178 | | * @plen: the length of the provided password |
179 | | * @out: the result in an null-terminated allocated string |
180 | | * @flags: should be zero |
181 | | * |
182 | | * This function will convert the provided UTF-8 password according |
183 | | * to the normalization rules in RFC7613. |
184 | | * |
185 | | * If the flag %GNUTLS_UTF8_IGNORE_ERRS is specified, any UTF-8 encoding |
186 | | * errors will be ignored, and in that case the output will be a copy of the input. |
187 | | * |
188 | | * Returns: %GNUTLS_E_INVALID_UTF8_STRING on invalid UTF-8 data, or 0 on success. |
189 | | * |
190 | | * Since: 3.5.7 |
191 | | **/ |
192 | | int gnutls_utf8_password_normalize(const unsigned char *password, unsigned plen, |
193 | | gnutls_datum_t * out, unsigned flags) |
194 | 0 | { |
195 | 0 | size_t ucs4_size = 0, nrm_size = 0; |
196 | 0 | size_t final_size = 0; |
197 | 0 | uint8_t *final = NULL; |
198 | 0 | uint32_t *ucs4 = NULL; |
199 | 0 | uint32_t *nrm = NULL; |
200 | 0 | uint8_t *nrmu8 = NULL; |
201 | 0 | int ret; |
202 | |
|
203 | 0 | if (plen == 0) { |
204 | 0 | out->data = (uint8_t *) gnutls_strdup(""); |
205 | 0 | out->size = 0; |
206 | 0 | if (out->data == NULL) |
207 | 0 | return gnutls_assert_val(GNUTLS_E_MEMORY_ERROR); |
208 | 0 | return 0; |
209 | 0 | } |
210 | | |
211 | | /* check for invalid UTF-8 */ |
212 | 0 | if (u8_check((uint8_t *) password, plen) != NULL) { |
213 | 0 | gnutls_assert(); |
214 | 0 | if (flags & GNUTLS_UTF8_IGNORE_ERRS) { |
215 | 0 | raw_copy: |
216 | 0 | out->data = gnutls_malloc(plen + 1); |
217 | 0 | if (out->data == NULL) |
218 | 0 | return gnutls_assert_val(GNUTLS_E_MEMORY_ERROR); |
219 | 0 | out->size = plen; |
220 | 0 | memcpy(out->data, password, plen); |
221 | 0 | out->data[plen] = 0; |
222 | 0 | return 0; |
223 | 0 | } else { |
224 | 0 | return GNUTLS_E_INVALID_UTF8_STRING; |
225 | 0 | } |
226 | 0 | } |
227 | | |
228 | | /* convert to UTF-32 */ |
229 | 0 | ucs4 = u8_to_u32((uint8_t *) password, plen, NULL, &ucs4_size); |
230 | 0 | if (ucs4 == NULL) { |
231 | 0 | gnutls_assert(); |
232 | 0 | ret = GNUTLS_E_PARSING_ERROR; |
233 | 0 | goto fail; |
234 | 0 | } |
235 | | |
236 | 0 | ret = check_for_valid_freeformclass(ucs4, ucs4_size); |
237 | 0 | if (ret < 0) { |
238 | 0 | gnutls_assert(); |
239 | 0 | if (flags & GNUTLS_UTF8_IGNORE_ERRS) { |
240 | 0 | free(ucs4); |
241 | 0 | goto raw_copy; |
242 | 0 | } |
243 | 0 | if (ret == GNUTLS_E_INVALID_UTF8_STRING) |
244 | 0 | ret = GNUTLS_E_INVALID_PASSWORD_STRING; |
245 | 0 | goto fail; |
246 | 0 | } |
247 | | |
248 | | /* normalize to NFC */ |
249 | 0 | nrm = u32_normalize(UNINORM_NFC, ucs4, ucs4_size, NULL, &nrm_size); |
250 | 0 | if (nrm == NULL) { |
251 | 0 | gnutls_assert(); |
252 | 0 | ret = GNUTLS_E_INVALID_PASSWORD_STRING; |
253 | 0 | goto fail; |
254 | 0 | } |
255 | | |
256 | | /* convert back to UTF-8 */ |
257 | 0 | final_size = 0; |
258 | 0 | nrmu8 = u32_to_u8(nrm, nrm_size, NULL, &final_size); |
259 | 0 | if (nrmu8 == NULL) { |
260 | 0 | gnutls_assert(); |
261 | 0 | ret = GNUTLS_E_INVALID_PASSWORD_STRING; |
262 | 0 | goto fail; |
263 | 0 | } |
264 | | |
265 | | /* copy to output with null terminator */ |
266 | 0 | final = gnutls_malloc(final_size + 1); |
267 | 0 | if (final == NULL) { |
268 | 0 | gnutls_assert(); |
269 | 0 | ret = GNUTLS_E_MEMORY_ERROR; |
270 | 0 | goto fail; |
271 | 0 | } |
272 | | |
273 | 0 | memcpy(final, nrmu8, final_size); |
274 | 0 | final[final_size] = 0; |
275 | |
|
276 | 0 | free(ucs4); |
277 | 0 | free(nrm); |
278 | 0 | free(nrmu8); |
279 | |
|
280 | 0 | out->data = final; |
281 | 0 | out->size = final_size; |
282 | |
|
283 | 0 | return 0; |
284 | | |
285 | 0 | fail: |
286 | 0 | gnutls_free(final); |
287 | 0 | free(ucs4); |
288 | 0 | free(nrm); |
289 | 0 | free(nrmu8); |
290 | 0 | return ret; |
291 | 0 | } |