Line | Count | Source (jump to first uncovered line) |
1 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
2 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
4 | | |
5 | | #include "seccomon.h" |
6 | | #include "secport.h" |
7 | | |
8 | | /* |
9 | | * From RFC 2044: |
10 | | * |
11 | | * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
12 | | * 0000 0000-0000 007F 0xxxxxxx |
13 | | * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
14 | | * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
15 | | * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
16 | | * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
17 | | * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx |
18 | | */ |
19 | | |
20 | | /* |
21 | | * From http://www.imc.org/draft-hoffman-utf16 |
22 | | * |
23 | | * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000 |
24 | | * |
25 | | * U' = yyyyyyyyyyxxxxxxxxxx |
26 | | * W1 = 110110yyyyyyyyyy |
27 | | * W2 = 110111xxxxxxxxxx |
28 | | */ |
29 | | |
30 | | /* |
31 | | * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit |
32 | | * character values. If you wish to use this code for working with |
33 | | * host byte order values, define the following: |
34 | | * |
35 | | * #if IS_BIG_ENDIAN |
36 | | * #define L_0 0 |
37 | | * #define L_1 1 |
38 | | * #define L_2 2 |
39 | | * #define L_3 3 |
40 | | * #define H_0 0 |
41 | | * #define H_1 1 |
42 | | * #else / * not everyone has elif * / |
43 | | * #if IS_LITTLE_ENDIAN |
44 | | * #define L_0 3 |
45 | | * #define L_1 2 |
46 | | * #define L_2 1 |
47 | | * #define L_3 0 |
48 | | * #define H_0 1 |
49 | | * #define H_1 0 |
50 | | * #else |
51 | | * #error "PDP and NUXI support deferred" |
52 | | * #endif / * IS_LITTLE_ENDIAN * / |
53 | | * #endif / * IS_BIG_ENDIAN * / |
54 | | */ |
55 | | |
56 | 0 | #define L_0 0 |
57 | 0 | #define L_1 1 |
58 | 0 | #define L_2 2 |
59 | 0 | #define L_3 3 |
60 | 0 | #define H_0 0 |
61 | 0 | #define H_1 1 |
62 | | |
63 | 0 | #define BAD_UTF8 ((PRUint32)-1) |
64 | | |
65 | | /* |
66 | | * Parse a single UTF-8 character per the spec. in section 3.9 (D36) |
67 | | * of Unicode 4.0.0. |
68 | | * |
69 | | * Parameters: |
70 | | * index - Points to the byte offset in inBuf of character to read. On success, |
71 | | * updated to the offset of the following character. |
72 | | * inBuf - Input buffer, UTF-8 encoded |
73 | | * inbufLen - Length of input buffer, in bytes. |
74 | | * |
75 | | * Returns: |
76 | | * Success - The UCS4 encoded character |
77 | | * Failure - BAD_UTF8 |
78 | | */ |
79 | | static PRUint32 |
80 | | sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen) |
81 | 0 | { |
82 | 0 | PRUint32 result; |
83 | 0 | unsigned int i = *index; |
84 | 0 | int bytes_left; |
85 | 0 | PRUint32 min_value; |
86 | |
|
87 | 0 | PORT_Assert(i < inBufLen); |
88 | |
|
89 | 0 | if ((inBuf[i] & 0x80) == 0x00) { |
90 | 0 | result = inBuf[i++]; |
91 | 0 | bytes_left = 0; |
92 | 0 | min_value = 0; |
93 | 0 | } else if ((inBuf[i] & 0xE0) == 0xC0) { |
94 | 0 | result = inBuf[i++] & 0x1F; |
95 | 0 | bytes_left = 1; |
96 | 0 | min_value = 0x80; |
97 | 0 | } else if ((inBuf[i] & 0xF0) == 0xE0) { |
98 | 0 | result = inBuf[i++] & 0x0F; |
99 | 0 | bytes_left = 2; |
100 | 0 | min_value = 0x800; |
101 | 0 | } else if ((inBuf[i] & 0xF8) == 0xF0) { |
102 | 0 | result = inBuf[i++] & 0x07; |
103 | 0 | bytes_left = 3; |
104 | 0 | min_value = 0x10000; |
105 | 0 | } else { |
106 | 0 | return BAD_UTF8; |
107 | 0 | } |
108 | | |
109 | 0 | while (bytes_left--) { |
110 | 0 | if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) |
111 | 0 | return BAD_UTF8; |
112 | 0 | result = (result << 6) | (inBuf[i++] & 0x3F); |
113 | 0 | } |
114 | | |
115 | | /* Check for overlong sequences, surrogates, and outside unicode range */ |
116 | 0 | if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) { |
117 | 0 | return BAD_UTF8; |
118 | 0 | } |
119 | | |
120 | 0 | *index = i; |
121 | 0 | return result; |
122 | 0 | } |
123 | | |
124 | | PRBool |
125 | | sec_port_ucs4_utf8_conversion_function( |
126 | | PRBool toUnicode, |
127 | | unsigned char *inBuf, |
128 | | unsigned int inBufLen, |
129 | | unsigned char *outBuf, |
130 | | unsigned int maxOutBufLen, |
131 | | unsigned int *outBufLen) |
132 | 0 | { |
133 | 0 | PORT_Assert((unsigned int *)NULL != outBufLen); |
134 | |
|
135 | 0 | if (toUnicode) { |
136 | 0 | unsigned int i, len = 0; |
137 | |
|
138 | 0 | for (i = 0; i < inBufLen;) { |
139 | 0 | if ((inBuf[i] & 0x80) == 0x00) |
140 | 0 | i += 1; |
141 | 0 | else if ((inBuf[i] & 0xE0) == 0xC0) |
142 | 0 | i += 2; |
143 | 0 | else if ((inBuf[i] & 0xF0) == 0xE0) |
144 | 0 | i += 3; |
145 | 0 | else if ((inBuf[i] & 0xF8) == 0xF0) |
146 | 0 | i += 4; |
147 | 0 | else |
148 | 0 | return PR_FALSE; |
149 | | |
150 | 0 | len += 4; |
151 | 0 | } |
152 | | |
153 | 0 | if (len > maxOutBufLen) { |
154 | 0 | *outBufLen = len; |
155 | 0 | return PR_FALSE; |
156 | 0 | } |
157 | | |
158 | 0 | len = 0; |
159 | |
|
160 | 0 | for (i = 0; i < inBufLen;) { |
161 | 0 | PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); |
162 | |
|
163 | 0 | if (ucs4 == BAD_UTF8) |
164 | 0 | return PR_FALSE; |
165 | | |
166 | 0 | outBuf[len + L_0] = 0x00; |
167 | 0 | outBuf[len + L_1] = (unsigned char)(ucs4 >> 16); |
168 | 0 | outBuf[len + L_2] = (unsigned char)(ucs4 >> 8); |
169 | 0 | outBuf[len + L_3] = (unsigned char)ucs4; |
170 | |
|
171 | 0 | len += 4; |
172 | 0 | } |
173 | | |
174 | 0 | *outBufLen = len; |
175 | 0 | return PR_TRUE; |
176 | 0 | } else { |
177 | 0 | unsigned int i, len = 0; |
178 | 0 | PORT_Assert((inBufLen % 4) == 0); |
179 | 0 | if ((inBufLen % 4) != 0) { |
180 | 0 | *outBufLen = 0; |
181 | 0 | return PR_FALSE; |
182 | 0 | } |
183 | | |
184 | 0 | for (i = 0; i < inBufLen; i += 4) { |
185 | 0 | if ((inBuf[i + L_0] > 0x00) || (inBuf[i + L_1] > 0x10)) { |
186 | 0 | *outBufLen = 0; |
187 | 0 | return PR_FALSE; |
188 | 0 | } else if (inBuf[i + L_1] >= 0x01) |
189 | 0 | len += 4; |
190 | 0 | else if (inBuf[i + L_2] >= 0x08) |
191 | 0 | len += 3; |
192 | 0 | else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80)) |
193 | 0 | len += 2; |
194 | 0 | else |
195 | 0 | len += 1; |
196 | 0 | } |
197 | | |
198 | 0 | if (len > maxOutBufLen) { |
199 | 0 | *outBufLen = len; |
200 | 0 | return PR_FALSE; |
201 | 0 | } |
202 | | |
203 | 0 | len = 0; |
204 | |
|
205 | 0 | for (i = 0; i < inBufLen; i += 4) { |
206 | 0 | if (inBuf[i + L_1] >= 0x01) { |
207 | | /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
208 | | /* 00000000 000abcde fghijklm nopqrstu -> |
209 | | 11110abc 10defghi 10jklmno 10pqrstu */ |
210 | |
|
211 | 0 | outBuf[len + 0] = 0xF0 | ((inBuf[i + L_1] & 0x1C) >> 2); |
212 | 0 | outBuf[len + 1] = 0x80 | ((inBuf[i + L_1] & 0x03) << 4) | ((inBuf[i + L_2] & 0xF0) >> 4); |
213 | 0 | outBuf[len + 2] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6); |
214 | 0 | outBuf[len + 3] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0); |
215 | |
|
216 | 0 | len += 4; |
217 | 0 | } else if (inBuf[i + L_2] >= 0x08) { |
218 | | /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ |
219 | | /* 00000000 00000000 abcdefgh ijklmnop -> |
220 | | 1110abcd 10efghij 10klmnop */ |
221 | |
|
222 | 0 | outBuf[len + 0] = 0xE0 | ((inBuf[i + L_2] & 0xF0) >> 4); |
223 | 0 | outBuf[len + 1] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6); |
224 | 0 | outBuf[len + 2] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0); |
225 | |
|
226 | 0 | len += 3; |
227 | 0 | } else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80)) { |
228 | | /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ |
229 | | /* 00000000 00000000 00000abc defghijk -> |
230 | | 110abcde 10fghijk */ |
231 | |
|
232 | 0 | outBuf[len + 0] = 0xC0 | ((inBuf[i + L_2] & 0x07) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6); |
233 | 0 | outBuf[len + 1] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0); |
234 | |
|
235 | 0 | len += 2; |
236 | 0 | } else { |
237 | | /* 0000 0000-0000 007F -> 0xxxxxx */ |
238 | | /* 00000000 00000000 00000000 0abcdefg -> |
239 | | 0abcdefg */ |
240 | |
|
241 | 0 | outBuf[len + 0] = (inBuf[i + L_3] & 0x7F); |
242 | |
|
243 | 0 | len += 1; |
244 | 0 | } |
245 | 0 | } |
246 | |
|
247 | 0 | *outBufLen = len; |
248 | 0 | return PR_TRUE; |
249 | 0 | } |
250 | 0 | } |
251 | | |
252 | | PRBool |
253 | | sec_port_ucs2_utf8_conversion_function( |
254 | | PRBool toUnicode, |
255 | | unsigned char *inBuf, |
256 | | unsigned int inBufLen, |
257 | | unsigned char *outBuf, |
258 | | unsigned int maxOutBufLen, |
259 | | unsigned int *outBufLen) |
260 | 0 | { |
261 | 0 | PORT_Assert((unsigned int *)NULL != outBufLen); |
262 | |
|
263 | 0 | if (toUnicode) { |
264 | 0 | unsigned int i, len = 0; |
265 | |
|
266 | 0 | for (i = 0; i < inBufLen;) { |
267 | 0 | if ((inBuf[i] & 0x80) == 0x00) { |
268 | 0 | i += 1; |
269 | 0 | len += 2; |
270 | 0 | } else if ((inBuf[i] & 0xE0) == 0xC0) { |
271 | 0 | i += 2; |
272 | 0 | len += 2; |
273 | 0 | } else if ((inBuf[i] & 0xF0) == 0xE0) { |
274 | 0 | i += 3; |
275 | 0 | len += 2; |
276 | 0 | } else if ((inBuf[i] & 0xF8) == 0xF0) { |
277 | 0 | i += 4; |
278 | 0 | len += 4; |
279 | 0 | } else |
280 | 0 | return PR_FALSE; |
281 | 0 | } |
282 | | |
283 | 0 | if (len > maxOutBufLen) { |
284 | 0 | *outBufLen = len; |
285 | 0 | return PR_FALSE; |
286 | 0 | } |
287 | | |
288 | 0 | len = 0; |
289 | |
|
290 | 0 | for (i = 0; i < inBufLen;) { |
291 | 0 | PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); |
292 | |
|
293 | 0 | if (ucs4 == BAD_UTF8) |
294 | 0 | return PR_FALSE; |
295 | | |
296 | 0 | if (ucs4 < 0x10000) { |
297 | 0 | outBuf[len + H_0] = (unsigned char)(ucs4 >> 8); |
298 | 0 | outBuf[len + H_1] = (unsigned char)ucs4; |
299 | 0 | len += 2; |
300 | 0 | } else { |
301 | 0 | ucs4 -= 0x10000; |
302 | 0 | outBuf[len + 0 + H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3)); |
303 | 0 | outBuf[len + 0 + H_1] = (unsigned char)(ucs4 >> 10); |
304 | 0 | outBuf[len + 2 + H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3)); |
305 | 0 | outBuf[len + 2 + H_1] = (unsigned char)ucs4; |
306 | 0 | len += 4; |
307 | 0 | } |
308 | 0 | } |
309 | | |
310 | 0 | *outBufLen = len; |
311 | 0 | return PR_TRUE; |
312 | 0 | } else { |
313 | 0 | unsigned int i, len = 0; |
314 | 0 | PORT_Assert((inBufLen % 2) == 0); |
315 | 0 | if ((inBufLen % 2) != 0) { |
316 | 0 | *outBufLen = 0; |
317 | 0 | return PR_FALSE; |
318 | 0 | } |
319 | | |
320 | 0 | for (i = 0; i < inBufLen; i += 2) { |
321 | 0 | if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00)) |
322 | 0 | len += 1; |
323 | 0 | else if (inBuf[i + H_0] < 0x08) |
324 | 0 | len += 2; |
325 | 0 | else if (((inBuf[i + H_0] & 0xFC) == 0xD8)) { |
326 | 0 | if (((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC)) { |
327 | 0 | i += 2; |
328 | 0 | len += 4; |
329 | 0 | } else { |
330 | 0 | return PR_FALSE; |
331 | 0 | } |
332 | 0 | } else if ((inBuf[i + H_0] & 0xFC) == 0xDC) { |
333 | 0 | return PR_FALSE; |
334 | 0 | } else { |
335 | 0 | len += 3; |
336 | 0 | } |
337 | 0 | } |
338 | | |
339 | 0 | if (len > maxOutBufLen) { |
340 | 0 | *outBufLen = len; |
341 | 0 | return PR_FALSE; |
342 | 0 | } |
343 | | |
344 | 0 | len = 0; |
345 | |
|
346 | 0 | for (i = 0; i < inBufLen; i += 2) { |
347 | 0 | if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00)) { |
348 | | /* 0000-007F -> 0xxxxxx */ |
349 | | /* 00000000 0abcdefg -> 0abcdefg */ |
350 | |
|
351 | 0 | outBuf[len] = inBuf[i + H_1] & 0x7F; |
352 | |
|
353 | 0 | len += 1; |
354 | 0 | } else if (inBuf[i + H_0] < 0x08) { |
355 | | /* 0080-07FF -> 110xxxxx 10xxxxxx */ |
356 | | /* 00000abc defghijk -> 110abcde 10fghijk */ |
357 | |
|
358 | 0 | outBuf[len + 0] = 0xC0 | ((inBuf[i + H_0] & 0x07) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6); |
359 | 0 | outBuf[len + 1] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0); |
360 | |
|
361 | 0 | len += 2; |
362 | 0 | } else if ((inBuf[i + H_0] & 0xFC) == 0xD8) { |
363 | 0 | int abcde, BCDE; |
364 | |
|
365 | 0 | PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC)); |
366 | | |
367 | | /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
368 | | /* 110110BC DEfghijk 110111lm nopqrstu -> |
369 | | { Let abcde = BCDE + 1 } |
370 | | 11110abc 10defghi 10jklmno 10pqrstu */ |
371 | |
|
372 | 0 | BCDE = ((inBuf[i + H_0] & 0x03) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6); |
373 | 0 | abcde = BCDE + 1; |
374 | |
|
375 | 0 | outBuf[len + 0] = 0xF0 | ((abcde & 0x1C) >> 2); |
376 | 0 | outBuf[len + 1] = 0x80 | ((abcde & 0x03) << 4) | ((inBuf[i + 0 + H_1] & 0x3C) >> 2); |
377 | 0 | outBuf[len + 2] = 0x80 | ((inBuf[i + 0 + H_1] & 0x03) << 4) | ((inBuf[i + 2 + H_0] & 0x03) << 2) | ((inBuf[i + 2 + H_1] & 0xC0) >> 6); |
378 | 0 | outBuf[len + 3] = 0x80 | ((inBuf[i + 2 + H_1] & 0x3F) >> 0); |
379 | |
|
380 | 0 | i += 2; |
381 | 0 | len += 4; |
382 | 0 | } else { |
383 | | /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ |
384 | | /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */ |
385 | |
|
386 | 0 | outBuf[len + 0] = 0xE0 | ((inBuf[i + H_0] & 0xF0) >> 4); |
387 | 0 | outBuf[len + 1] = 0x80 | ((inBuf[i + H_0] & 0x0F) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6); |
388 | 0 | outBuf[len + 2] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0); |
389 | |
|
390 | 0 | len += 3; |
391 | 0 | } |
392 | 0 | } |
393 | |
|
394 | 0 | *outBufLen = len; |
395 | 0 | return PR_TRUE; |
396 | 0 | } |
397 | 0 | } |
398 | | |
399 | | PRBool |
400 | | sec_port_iso88591_utf8_conversion_function( |
401 | | const unsigned char *inBuf, |
402 | | unsigned int inBufLen, |
403 | | unsigned char *outBuf, |
404 | | unsigned int maxOutBufLen, |
405 | | unsigned int *outBufLen) |
406 | 0 | { |
407 | 0 | unsigned int i, len = 0; |
408 | |
|
409 | 0 | PORT_Assert((unsigned int *)NULL != outBufLen); |
410 | |
|
411 | 0 | for (i = 0; i < inBufLen; i++) { |
412 | 0 | if ((inBuf[i] & 0x80) == 0x00) |
413 | 0 | len += 1; |
414 | 0 | else |
415 | 0 | len += 2; |
416 | 0 | } |
417 | |
|
418 | 0 | if (len > maxOutBufLen) { |
419 | 0 | *outBufLen = len; |
420 | 0 | return PR_FALSE; |
421 | 0 | } |
422 | | |
423 | 0 | len = 0; |
424 | |
|
425 | 0 | for (i = 0; i < inBufLen; i++) { |
426 | 0 | if ((inBuf[i] & 0x80) == 0x00) { |
427 | | /* 00-7F -> 0xxxxxxx */ |
428 | | /* 0abcdefg -> 0abcdefg */ |
429 | |
|
430 | 0 | outBuf[len] = inBuf[i]; |
431 | 0 | len += 1; |
432 | 0 | } else { |
433 | | /* 80-FF <- 110xxxxx 10xxxxxx */ |
434 | | /* 00000000 abcdefgh -> 110000ab 10cdefgh */ |
435 | |
|
436 | 0 | outBuf[len + 0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6); |
437 | 0 | outBuf[len + 1] = 0x80 | ((inBuf[i] & 0x3F) >> 0); |
438 | |
|
439 | 0 | len += 2; |
440 | 0 | } |
441 | 0 | } |
442 | |
|
443 | 0 | *outBufLen = len; |
444 | 0 | return PR_TRUE; |
445 | 0 | } |