/src/gnutls/lib/str-unicode.c
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | /*  | 
2  |  |  * Copyright (C) 2016, 2017 Red Hat, Inc.  | 
3  |  |  *  | 
4  |  |  * Author: Nikos Mavrogiannopoulos  | 
5  |  |  *  | 
6  |  |  * This file is part of GnuTLS.  | 
7  |  |  *  | 
8  |  |  * The GnuTLS is free software; you can redistribute it and/or  | 
9  |  |  * modify it under the terms of the GNU Lesser General Public License  | 
10  |  |  * as published by the Free Software Foundation; either version 2.1 of  | 
11  |  |  * the License, or (at your option) any later version.  | 
12  |  |  *  | 
13  |  |  * This library is distributed in the hope that it will be useful, but  | 
14  |  |  * WITHOUT ANY WARRANTY; without even the implied warranty of  | 
15  |  |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU  | 
16  |  |  * Lesser General Public License for more details.  | 
17  |  |  *  | 
18  |  |  * You should have received a copy of the GNU Lesser General Public License  | 
19  |  |  * along with this program.  If not, see <https://www.gnu.org/licenses/>  | 
20  |  |  *  | 
21  |  |  */  | 
22  |  |  | 
23  |  | #include "gnutls_int.h"  | 
24  |  | #include "errors.h"  | 
25  |  | #include "str.h"  | 
26  |  | #include <uninorm.h>  | 
27  |  | #include <unistr.h>  | 
28  |  | #include <unictype.h>  | 
29  |  |  | 
30  |  | /* rfc5892#section-2.6 exceptions  | 
31  |  |  */  | 
32  |  | inline static int is_allowed_exception(uint32_t ch)  | 
33  | 0  | { | 
34  | 0  |   switch (ch) { | 
35  | 0  |   case 0xB7:  | 
36  | 0  |   case 0x0375:  | 
37  | 0  |   case 0x05F3:  | 
38  | 0  |   case 0x05F4:  | 
39  | 0  |   case 0x30FB:  | 
40  | 0  |   case 0x0660:  | 
41  | 0  |   case 0x0661:  | 
42  | 0  |   case 0x0662:  | 
43  | 0  |   case 0x0663:  | 
44  | 0  |   case 0x0664:  | 
45  | 0  |   case 0x0665:  | 
46  | 0  |   case 0x0666:  | 
47  | 0  |   case 0x0667:  | 
48  | 0  |   case 0x0668:  | 
49  | 0  |   case 0x0669:  | 
50  | 0  |   case 0x06F0:  | 
51  | 0  |   case 0x06F1:  | 
52  | 0  |   case 0x06F2:  | 
53  | 0  |   case 0x06F3:  | 
54  | 0  |   case 0x06F4:  | 
55  | 0  |   case 0x06F5:  | 
56  | 0  |   case 0x06F6:  | 
57  | 0  |   case 0x06F7:  | 
58  | 0  |   case 0x06F8:  | 
59  | 0  |   case 0x06F9:  | 
60  | 0  |   case 0x0640:  | 
61  | 0  |   case 0x07FA:  | 
62  | 0  |   case 0x302E:  | 
63  | 0  |   case 0x302F:  | 
64  | 0  |   case 0x3031:  | 
65  | 0  |   case 0x3032:  | 
66  | 0  |   case 0x3033:  | 
67  | 0  |   case 0x3034:  | 
68  | 0  |   case 0x3035:  | 
69  | 0  |   case 0x303B:  | 
70  | 0  |     return 0; /* disallowed */  | 
71  | 0  |   case 0xDF:  | 
72  | 0  |   case 0x03C2:  | 
73  | 0  |   case 0x06FD:  | 
74  | 0  |   case 0x06FE:  | 
75  | 0  |   case 0x0F0B:  | 
76  | 0  |   case 0x3007:  | 
77  | 0  |     return 1; /* allowed */  | 
78  | 0  |   default:  | 
79  | 0  |     return -1; /* not exception */  | 
80  | 0  |   }  | 
81  | 0  | }  | 
82  |  |  | 
83  |  | /* Checks whether the provided string is in the valid set of FreeFormClass (RFC7564  | 
84  |  |  * as an RFC7613 requirement), and converts all spaces to the ASCII-space. */  | 
85  |  | static int check_for_valid_freeformclass(uint32_t *ucs4, unsigned ucs4_size)  | 
86  | 0  | { | 
87  | 0  |   unsigned i;  | 
88  | 0  |   int rc;  | 
89  | 0  |   uint32_t tmp[4];  | 
90  | 0  |   size_t tmp_size;  | 
91  | 0  |   uint32_t *nrm;  | 
92  | 0  |   uc_general_category_t cat;  | 
93  | 0  |   unsigned is_invalid;  | 
94  |  |  | 
95  |  |   /* make the union of Valid categories, excluding any invalid (i.e., control) */  | 
96  | 0  |   cat = uc_general_category_or(UC_CATEGORY_Ll,  | 
97  | 0  |              UC_CATEGORY_Lu); /* LetterDigits */  | 
98  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Lo);  | 
99  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Nd);  | 
100  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Lm);  | 
101  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Mn);  | 
102  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Mc);  | 
103  | 0  |   cat = uc_general_category_or(cat,  | 
104  | 0  |              UC_CATEGORY_Lt); /* OtherLetterDigits */  | 
105  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Nl);  | 
106  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_No);  | 
107  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Me);  | 
108  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Sm); /* Symbols */  | 
109  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Sc);  | 
110  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_So);  | 
111  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Sk);  | 
112  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Pc); /* Punctuation */  | 
113  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Pd);  | 
114  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Ps);  | 
115  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Pe);  | 
116  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Pi);  | 
117  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Pf);  | 
118  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Po);  | 
119  | 0  |   cat = uc_general_category_or(cat, UC_CATEGORY_Zs); /* Spaces */  | 
120  | 0  |   cat = uc_general_category_and_not(cat,  | 
121  | 0  |             UC_CATEGORY_Cc); /* Not in Control */  | 
122  |  |  | 
123  |  |   /* check for being in the allowed sets in rfc7564#section-4.3 */  | 
124  | 0  |   for (i = 0; i < ucs4_size; i++) { | 
125  | 0  |     is_invalid = 0;  | 
126  |  |  | 
127  |  |     /* Disallowed   | 
128  |  |        o  Old Hangul Jamo characters, i.e., the OldHangulJamo ("I") category | 
129  |  |        (not handled in this code)  | 
130  |  |  | 
131  |  |        o  Control characters, i.e., the Controls ("L") category | 
132  |  |  | 
133  |  |        o  Ignorable characters, i.e., the PrecisIgnorableProperties ("M") | 
134  |  |      */  | 
135  | 0  |     if (uc_is_property_default_ignorable_code_point(ucs4[i]) ||  | 
136  | 0  |         uc_is_property_not_a_character(ucs4[i])) { | 
137  | 0  |       return gnutls_assert_val(GNUTLS_E_INVALID_UTF8_STRING);  | 
138  | 0  |     }  | 
139  |  |  | 
140  |  |     /* Contextual rules - we do not implement them / we reject chars from these sets  | 
141  |  |        o  A number of characters from the Exceptions ("F") category defined | 
142  |  |  | 
143  |  |        o  Joining characters, i.e., the JoinControl ("H") category defined | 
144  |  |      */  | 
145  | 0  |     rc = is_allowed_exception(ucs4[i]);  | 
146  | 0  |     if (rc == 0 || uc_is_property_join_control(ucs4[i]))  | 
147  | 0  |       return gnutls_assert_val(GNUTLS_E_INVALID_UTF8_STRING);  | 
148  |  |  | 
149  | 0  |     if (rc == 1) /* exceptionally allowed, continue */  | 
150  | 0  |       continue;  | 
151  |  |  | 
152  |  |     /* Replace all spaces; an RFC7613 requirement  | 
153  |  |      */  | 
154  | 0  |     if (uc_is_general_category(ucs4[i],  | 
155  | 0  |              UC_CATEGORY_Zs)) /* replace */  | 
156  | 0  |       ucs4[i] = 0x20;  | 
157  |  |  | 
158  |  |     /* Valid */  | 
159  | 0  |     if ((ucs4[i] < 0x21 || ucs4[i] > 0x7E) &&  | 
160  | 0  |         !uc_is_general_category(ucs4[i], cat))  | 
161  | 0  |       is_invalid = 1;  | 
162  |  |  | 
163  |  |     /* HasCompat */  | 
164  | 0  |     if (is_invalid) { | 
165  | 0  |       tmp_size = sizeof(tmp) / sizeof(tmp[0]);  | 
166  | 0  |       nrm = u32_normalize(UNINORM_NFKC, &ucs4[i], 1, tmp,  | 
167  | 0  |               &tmp_size);  | 
168  | 0  |       if (nrm == NULL || (tmp_size == 1 && nrm[0] == ucs4[i]))  | 
169  | 0  |         return gnutls_assert_val(  | 
170  | 0  |           GNUTLS_E_INVALID_UTF8_STRING);  | 
171  | 0  |     }  | 
172  | 0  |   }  | 
173  |  |  | 
174  | 0  |   return 0;  | 
175  | 0  | }  | 
176  |  |  | 
177  |  | /**  | 
178  |  |  * gnutls_utf8_password_normalize:  | 
179  |  |  * @password: contain the UTF-8 formatted password  | 
180  |  |  * @plen: the length of the provided password  | 
181  |  |  * @out: the result in an null-terminated allocated string  | 
182  |  |  * @flags: should be zero  | 
183  |  |  *  | 
184  |  |  * This function will convert the provided UTF-8 password according  | 
185  |  |  * to the normalization rules in RFC7613.  | 
186  |  |  *  | 
187  |  |  * If the flag %GNUTLS_UTF8_IGNORE_ERRS is specified, any UTF-8 encoding  | 
188  |  |  * errors will be ignored, and in that case the output will be a copy of the input.  | 
189  |  |  *  | 
190  |  |  * Returns: %GNUTLS_E_INVALID_UTF8_STRING on invalid UTF-8 data, or 0 on success.  | 
191  |  |  *  | 
192  |  |  * Since: 3.5.7  | 
193  |  |  **/  | 
194  |  | int gnutls_utf8_password_normalize(const unsigned char *password, unsigned plen,  | 
195  |  |            gnutls_datum_t *out, unsigned flags)  | 
196  | 0  | { | 
197  | 0  |   size_t ucs4_size = 0, nrm_size = 0;  | 
198  | 0  |   size_t final_size = 0;  | 
199  | 0  |   uint8_t *final = NULL;  | 
200  | 0  |   uint32_t *ucs4 = NULL;  | 
201  | 0  |   uint32_t *nrm = NULL;  | 
202  | 0  |   uint8_t *nrmu8 = NULL;  | 
203  | 0  |   int ret;  | 
204  |  | 
  | 
205  | 0  |   if (plen == 0) { | 
206  | 0  |     out->data = (uint8_t *)gnutls_strdup(""); | 
207  | 0  |     out->size = 0;  | 
208  | 0  |     if (out->data == NULL)  | 
209  | 0  |       return gnutls_assert_val(GNUTLS_E_MEMORY_ERROR);  | 
210  | 0  |     return 0;  | 
211  | 0  |   }  | 
212  |  |  | 
213  |  |   /* check for invalid UTF-8 */  | 
214  | 0  |   if (u8_check((uint8_t *)password, plen) != NULL) { | 
215  | 0  |     gnutls_assert();  | 
216  | 0  |     if (flags & GNUTLS_UTF8_IGNORE_ERRS) { | 
217  | 0  |     raw_copy:  | 
218  | 0  |       out->data = gnutls_malloc(plen + 1);  | 
219  | 0  |       if (out->data == NULL)  | 
220  | 0  |         return gnutls_assert_val(GNUTLS_E_MEMORY_ERROR);  | 
221  | 0  |       out->size = plen;  | 
222  | 0  |       memcpy(out->data, password, plen);  | 
223  | 0  |       out->data[plen] = 0;  | 
224  | 0  |       return 0;  | 
225  | 0  |     } else { | 
226  | 0  |       return GNUTLS_E_INVALID_UTF8_STRING;  | 
227  | 0  |     }  | 
228  | 0  |   }  | 
229  |  |  | 
230  |  |   /* convert to UTF-32 */  | 
231  | 0  |   ucs4 = u8_to_u32((uint8_t *)password, plen, NULL, &ucs4_size);  | 
232  | 0  |   if (ucs4 == NULL) { | 
233  | 0  |     gnutls_assert();  | 
234  | 0  |     ret = GNUTLS_E_PARSING_ERROR;  | 
235  | 0  |     goto fail;  | 
236  | 0  |   }  | 
237  |  |  | 
238  | 0  |   ret = check_for_valid_freeformclass(ucs4, ucs4_size);  | 
239  | 0  |   if (ret < 0) { | 
240  | 0  |     gnutls_assert();  | 
241  | 0  |     if (flags & GNUTLS_UTF8_IGNORE_ERRS) { | 
242  | 0  |       free(ucs4);  | 
243  | 0  |       goto raw_copy;  | 
244  | 0  |     }  | 
245  | 0  |     if (ret == GNUTLS_E_INVALID_UTF8_STRING)  | 
246  | 0  |       ret = GNUTLS_E_INVALID_PASSWORD_STRING;  | 
247  | 0  |     goto fail;  | 
248  | 0  |   }  | 
249  |  |  | 
250  |  |   /* normalize to NFC */  | 
251  | 0  |   nrm = u32_normalize(UNINORM_NFC, ucs4, ucs4_size, NULL, &nrm_size);  | 
252  | 0  |   if (nrm == NULL) { | 
253  | 0  |     gnutls_assert();  | 
254  | 0  |     ret = GNUTLS_E_INVALID_PASSWORD_STRING;  | 
255  | 0  |     goto fail;  | 
256  | 0  |   }  | 
257  |  |  | 
258  |  |   /* convert back to UTF-8 */  | 
259  | 0  |   final_size = 0;  | 
260  | 0  |   nrmu8 = u32_to_u8(nrm, nrm_size, NULL, &final_size);  | 
261  | 0  |   if (nrmu8 == NULL) { | 
262  | 0  |     gnutls_assert();  | 
263  | 0  |     ret = GNUTLS_E_INVALID_PASSWORD_STRING;  | 
264  | 0  |     goto fail;  | 
265  | 0  |   }  | 
266  |  |  | 
267  |  |   /* copy to output with null terminator */  | 
268  | 0  |   final = gnutls_malloc(final_size + 1);  | 
269  | 0  |   if (final == NULL) { | 
270  | 0  |     gnutls_assert();  | 
271  | 0  |     ret = GNUTLS_E_MEMORY_ERROR;  | 
272  | 0  |     goto fail;  | 
273  | 0  |   }  | 
274  |  |  | 
275  | 0  |   memcpy(final, nrmu8, final_size);  | 
276  | 0  |   final[final_size] = 0;  | 
277  |  | 
  | 
278  | 0  |   free(ucs4);  | 
279  | 0  |   free(nrm);  | 
280  | 0  |   free(nrmu8);  | 
281  |  | 
  | 
282  | 0  |   out->data = final;  | 
283  | 0  |   out->size = final_size;  | 
284  |  | 
  | 
285  | 0  |   return 0;  | 
286  |  |  | 
287  | 0  | fail:  | 
288  | 0  |   gnutls_free(final);  | 
289  | 0  |   free(ucs4);  | 
290  | 0  |   free(nrm);  | 
291  | 0  |   free(nrmu8);  | 
292  | 0  |   return ret;  | 
293  | 0  | }  |