Line | Count | Source |
1 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
2 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
3 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
4 | | |
5 | | #include "seccomon.h" |
6 | | #include "secport.h" |
7 | | #include <limits.h> |
8 | | |
9 | | /* |
10 | | * From RFC 2044: |
11 | | * |
12 | | * UCS-4 range (hex.) UTF-8 octet sequence (binary) |
13 | | * 0000 0000-0000 007F 0xxxxxxx |
14 | | * 0000 0080-0000 07FF 110xxxxx 10xxxxxx |
15 | | * 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx |
16 | | * 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
17 | | * 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
18 | | * 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx |
19 | | */ |
20 | | |
21 | | /* |
22 | | * From http://www.imc.org/draft-hoffman-utf16 |
23 | | * |
24 | | * For U on [0x00010000,0x0010FFFF]: Let U' = U - 0x00010000 |
25 | | * |
26 | | * U' = yyyyyyyyyyxxxxxxxxxx |
27 | | * W1 = 110110yyyyyyyyyy |
28 | | * W2 = 110111xxxxxxxxxx |
29 | | */ |
30 | | |
31 | | /* |
32 | | * This code is assuming NETWORK BYTE ORDER for the 16- and 32-bit |
33 | | * character values. If you wish to use this code for working with |
34 | | * host byte order values, define the following: |
35 | | * |
36 | | * #if IS_BIG_ENDIAN |
37 | | * #define L_0 0 |
38 | | * #define L_1 1 |
39 | | * #define L_2 2 |
40 | | * #define L_3 3 |
41 | | * #define H_0 0 |
42 | | * #define H_1 1 |
43 | | * #else / * not everyone has elif * / |
44 | | * #if IS_LITTLE_ENDIAN |
45 | | * #define L_0 3 |
46 | | * #define L_1 2 |
47 | | * #define L_2 1 |
48 | | * #define L_3 0 |
49 | | * #define H_0 1 |
50 | | * #define H_1 0 |
51 | | * #else |
52 | | * #error "PDP and NUXI support deferred" |
53 | | * #endif / * IS_LITTLE_ENDIAN * / |
54 | | * #endif / * IS_BIG_ENDIAN * / |
55 | | */ |
56 | | |
57 | 4.69k | #define L_0 0 |
58 | 12.7k | #define L_1 1 |
59 | 12.9k | #define L_2 2 |
60 | 8.52k | #define L_3 3 |
61 | 139k | #define H_0 0 |
62 | 39.7k | #define H_1 1 |
63 | | |
64 | 0 | #define BAD_UTF8 ((PRUint32)-1) |
65 | | |
66 | | /* |
67 | | * Parse a single UTF-8 character per the spec. in section 3.9 (D36) |
68 | | * of Unicode 4.0.0. |
69 | | * |
70 | | * Parameters: |
71 | | * index - Points to the byte offset in inBuf of character to read. On success, |
72 | | * updated to the offset of the following character. |
73 | | * inBuf - Input buffer, UTF-8 encoded |
74 | | * inbufLen - Length of input buffer, in bytes. |
75 | | * |
76 | | * Returns: |
77 | | * Success - The UCS4 encoded character |
78 | | * Failure - BAD_UTF8 |
79 | | */ |
80 | | static PRUint32 |
81 | | sec_port_read_utf8(unsigned int *index, unsigned char *inBuf, unsigned int inBufLen) |
82 | 0 | { |
83 | 0 | PRUint32 result; |
84 | 0 | unsigned int i = *index; |
85 | 0 | int bytes_left; |
86 | 0 | PRUint32 min_value; |
87 | |
|
88 | 0 | PORT_Assert(i < inBufLen); |
89 | |
|
90 | 0 | if ((inBuf[i] & 0x80) == 0x00) { |
91 | 0 | result = inBuf[i++]; |
92 | 0 | bytes_left = 0; |
93 | 0 | min_value = 0; |
94 | 0 | } else if ((inBuf[i] & 0xE0) == 0xC0) { |
95 | 0 | result = inBuf[i++] & 0x1F; |
96 | 0 | bytes_left = 1; |
97 | 0 | min_value = 0x80; |
98 | 0 | } else if ((inBuf[i] & 0xF0) == 0xE0) { |
99 | 0 | result = inBuf[i++] & 0x0F; |
100 | 0 | bytes_left = 2; |
101 | 0 | min_value = 0x800; |
102 | 0 | } else if ((inBuf[i] & 0xF8) == 0xF0) { |
103 | 0 | result = inBuf[i++] & 0x07; |
104 | 0 | bytes_left = 3; |
105 | 0 | min_value = 0x10000; |
106 | 0 | } else { |
107 | 0 | return BAD_UTF8; |
108 | 0 | } |
109 | | |
110 | 0 | while (bytes_left--) { |
111 | 0 | if (i >= inBufLen || (inBuf[i] & 0xC0) != 0x80) |
112 | 0 | return BAD_UTF8; |
113 | 0 | result = (result << 6) | (inBuf[i++] & 0x3F); |
114 | 0 | } |
115 | | |
116 | | /* Check for overlong sequences, surrogates, and outside unicode range */ |
117 | 0 | if (result < min_value || (result & 0xFFFFF800) == 0xD800 || result > 0x10FFFF) { |
118 | 0 | return BAD_UTF8; |
119 | 0 | } |
120 | | |
121 | 0 | *index = i; |
122 | 0 | return result; |
123 | 0 | } |
124 | | |
125 | | PRBool |
126 | | sec_port_ucs4_utf8_conversion_function( |
127 | | PRBool toUnicode, |
128 | | unsigned char *inBuf, |
129 | | unsigned int inBufLen, |
130 | | unsigned char *outBuf, |
131 | | unsigned int maxOutBufLen, |
132 | | unsigned int *outBufLen) |
133 | 2.80k | { |
134 | 2.80k | PORT_Assert((unsigned int *)NULL != outBufLen); |
135 | | |
136 | 2.80k | if (toUnicode) { |
137 | 0 | unsigned int i, len = 0; |
138 | |
|
139 | 0 | if (inBufLen > UINT_MAX / 4) { |
140 | 0 | *outBufLen = 0; |
141 | 0 | return PR_FALSE; |
142 | 0 | } |
143 | | |
144 | 0 | for (i = 0; i < inBufLen;) { |
145 | 0 | if ((inBuf[i] & 0x80) == 0x00) |
146 | 0 | i += 1; |
147 | 0 | else if ((inBuf[i] & 0xE0) == 0xC0) |
148 | 0 | i += 2; |
149 | 0 | else if ((inBuf[i] & 0xF0) == 0xE0) |
150 | 0 | i += 3; |
151 | 0 | else if ((inBuf[i] & 0xF8) == 0xF0) |
152 | 0 | i += 4; |
153 | 0 | else |
154 | 0 | return PR_FALSE; |
155 | | |
156 | 0 | len += 4; |
157 | 0 | } |
158 | | |
159 | 0 | if (len > maxOutBufLen) { |
160 | 0 | *outBufLen = len; |
161 | 0 | return PR_FALSE; |
162 | 0 | } |
163 | | |
164 | 0 | len = 0; |
165 | |
|
166 | 0 | for (i = 0; i < inBufLen;) { |
167 | 0 | PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); |
168 | |
|
169 | 0 | if (ucs4 == BAD_UTF8) |
170 | 0 | return PR_FALSE; |
171 | | |
172 | 0 | outBuf[len + L_0] = 0x00; |
173 | 0 | outBuf[len + L_1] = (unsigned char)(ucs4 >> 16); |
174 | 0 | outBuf[len + L_2] = (unsigned char)(ucs4 >> 8); |
175 | 0 | outBuf[len + L_3] = (unsigned char)ucs4; |
176 | |
|
177 | 0 | len += 4; |
178 | 0 | } |
179 | | |
180 | 0 | *outBufLen = len; |
181 | 0 | return PR_TRUE; |
182 | 2.80k | } else { |
183 | 2.80k | unsigned int i, len = 0; |
184 | 2.80k | PORT_Assert((inBufLen % 4) == 0); |
185 | 2.80k | if ((inBufLen % 4) != 0) { |
186 | 0 | *outBufLen = 0; |
187 | 0 | return PR_FALSE; |
188 | 0 | } |
189 | | |
190 | 6.50k | for (i = 0; i < inBufLen; i += 4) { |
191 | 4.69k | if ((inBuf[i + L_0] > 0x00) || (inBuf[i + L_1] > 0x10)) { |
192 | 1.00k | *outBufLen = 0; |
193 | 1.00k | return PR_FALSE; |
194 | 3.69k | } else if (inBuf[i + L_1] >= 0x01) |
195 | 973 | len += 4; |
196 | 2.72k | else if (inBuf[i + L_2] >= 0x08) |
197 | 701 | len += 3; |
198 | 2.02k | else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80)) |
199 | 1.03k | len += 2; |
200 | 983 | else |
201 | 983 | len += 1; |
202 | 4.69k | } |
203 | | |
204 | 1.80k | if (len > maxOutBufLen) { |
205 | 0 | *outBufLen = len; |
206 | 0 | return PR_FALSE; |
207 | 0 | } |
208 | | |
209 | 1.80k | len = 0; |
210 | | |
211 | 5.13k | for (i = 0; i < inBufLen; i += 4) { |
212 | 3.33k | if (inBuf[i + L_1] >= 0x01) { |
213 | | /* 0001 0000-001F FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
214 | | /* 00000000 000abcde fghijklm nopqrstu -> |
215 | | 11110abc 10defghi 10jklmno 10pqrstu */ |
216 | | |
217 | 840 | outBuf[len + 0] = 0xF0 | ((inBuf[i + L_1] & 0x1C) >> 2); |
218 | 840 | outBuf[len + 1] = 0x80 | ((inBuf[i + L_1] & 0x03) << 4) | ((inBuf[i + L_2] & 0xF0) >> 4); |
219 | 840 | outBuf[len + 2] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6); |
220 | 840 | outBuf[len + 3] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0); |
221 | | |
222 | 840 | len += 4; |
223 | 2.49k | } else if (inBuf[i + L_2] >= 0x08) { |
224 | | /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ |
225 | | /* 00000000 00000000 abcdefgh ijklmnop -> |
226 | | 1110abcd 10efghij 10klmnop */ |
227 | | |
228 | 601 | outBuf[len + 0] = 0xE0 | ((inBuf[i + L_2] & 0xF0) >> 4); |
229 | 601 | outBuf[len + 1] = 0x80 | ((inBuf[i + L_2] & 0x0F) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6); |
230 | 601 | outBuf[len + 2] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0); |
231 | | |
232 | 601 | len += 3; |
233 | 1.89k | } else if ((inBuf[i + L_2] > 0x00) || (inBuf[i + L_3] >= 0x80)) { |
234 | | /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */ |
235 | | /* 00000000 00000000 00000abc defghijk -> |
236 | | 110abcde 10fghijk */ |
237 | | |
238 | 990 | outBuf[len + 0] = 0xC0 | ((inBuf[i + L_2] & 0x07) << 2) | ((inBuf[i + L_3] & 0xC0) >> 6); |
239 | 990 | outBuf[len + 1] = 0x80 | ((inBuf[i + L_3] & 0x3F) >> 0); |
240 | | |
241 | 990 | len += 2; |
242 | 990 | } else { |
243 | | /* 0000 0000-0000 007F -> 0xxxxxx */ |
244 | | /* 00000000 00000000 00000000 0abcdefg -> |
245 | | 0abcdefg */ |
246 | | |
247 | 900 | outBuf[len + 0] = (inBuf[i + L_3] & 0x7F); |
248 | | |
249 | 900 | len += 1; |
250 | 900 | } |
251 | 3.33k | } |
252 | | |
253 | 1.80k | *outBufLen = len; |
254 | 1.80k | return PR_TRUE; |
255 | 1.80k | } |
256 | 2.80k | } |
257 | | |
258 | | PRBool |
259 | | sec_port_ucs2_utf8_conversion_function( |
260 | | PRBool toUnicode, |
261 | | unsigned char *inBuf, |
262 | | unsigned int inBufLen, |
263 | | unsigned char *outBuf, |
264 | | unsigned int maxOutBufLen, |
265 | | unsigned int *outBufLen) |
266 | 28.6k | { |
267 | 28.6k | PORT_Assert((unsigned int *)NULL != outBufLen); |
268 | | |
269 | 28.6k | if (toUnicode) { |
270 | 0 | unsigned int i, len = 0; |
271 | |
|
272 | 0 | if (inBufLen > UINT_MAX / 2) { |
273 | 0 | *outBufLen = 0; |
274 | 0 | return PR_FALSE; |
275 | 0 | } |
276 | | |
277 | 0 | for (i = 0; i < inBufLen;) { |
278 | 0 | if ((inBuf[i] & 0x80) == 0x00) { |
279 | 0 | i += 1; |
280 | 0 | len += 2; |
281 | 0 | } else if ((inBuf[i] & 0xE0) == 0xC0) { |
282 | 0 | i += 2; |
283 | 0 | len += 2; |
284 | 0 | } else if ((inBuf[i] & 0xF0) == 0xE0) { |
285 | 0 | i += 3; |
286 | 0 | len += 2; |
287 | 0 | } else if ((inBuf[i] & 0xF8) == 0xF0) { |
288 | 0 | i += 4; |
289 | 0 | len += 4; |
290 | 0 | } else |
291 | 0 | return PR_FALSE; |
292 | 0 | } |
293 | | |
294 | 0 | if (len > maxOutBufLen) { |
295 | 0 | *outBufLen = len; |
296 | 0 | return PR_FALSE; |
297 | 0 | } |
298 | | |
299 | 0 | len = 0; |
300 | |
|
301 | 0 | for (i = 0; i < inBufLen;) { |
302 | 0 | PRUint32 ucs4 = sec_port_read_utf8(&i, inBuf, inBufLen); |
303 | |
|
304 | 0 | if (ucs4 == BAD_UTF8) |
305 | 0 | return PR_FALSE; |
306 | | |
307 | 0 | if (ucs4 < 0x10000) { |
308 | 0 | outBuf[len + H_0] = (unsigned char)(ucs4 >> 8); |
309 | 0 | outBuf[len + H_1] = (unsigned char)ucs4; |
310 | 0 | len += 2; |
311 | 0 | } else { |
312 | 0 | ucs4 -= 0x10000; |
313 | 0 | outBuf[len + 0 + H_0] = (unsigned char)(0xD8 | ((ucs4 >> 18) & 0x3)); |
314 | 0 | outBuf[len + 0 + H_1] = (unsigned char)(ucs4 >> 10); |
315 | 0 | outBuf[len + 2 + H_0] = (unsigned char)(0xDC | ((ucs4 >> 8) & 0x3)); |
316 | 0 | outBuf[len + 2 + H_1] = (unsigned char)ucs4; |
317 | 0 | len += 4; |
318 | 0 | } |
319 | 0 | } |
320 | | |
321 | 0 | *outBufLen = len; |
322 | 0 | return PR_TRUE; |
323 | 28.6k | } else { |
324 | 28.6k | unsigned int i, len = 0; |
325 | 28.6k | PORT_Assert((inBufLen % 2) == 0); |
326 | 28.6k | if ((inBufLen % 2) != 0) { |
327 | 0 | *outBufLen = 0; |
328 | 0 | return PR_FALSE; |
329 | 0 | } |
330 | | |
331 | 28.6k | if (inBufLen / 2 > UINT_MAX / 3) { |
332 | 0 | *outBufLen = 0; |
333 | 0 | return PR_FALSE; |
334 | 0 | } |
335 | | |
336 | 48.6k | for (i = 0; i < inBufLen; i += 2) { |
337 | 20.9k | if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00)) |
338 | 2.20k | len += 1; |
339 | 18.7k | else if (inBuf[i + H_0] < 0x08) |
340 | 4.09k | len += 2; |
341 | 14.6k | else if (((inBuf[i + H_0] & 0xFC) == 0xD8)) { |
342 | 1.13k | if (((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC)) { |
343 | 479 | i += 2; |
344 | 479 | len += 4; |
345 | 657 | } else { |
346 | 657 | return PR_FALSE; |
347 | 657 | } |
348 | 13.5k | } else if ((inBuf[i + H_0] & 0xFC) == 0xDC) { |
349 | 312 | return PR_FALSE; |
350 | 13.2k | } else { |
351 | 13.2k | len += 3; |
352 | 13.2k | } |
353 | 20.9k | } |
354 | | |
355 | 27.6k | if (len > maxOutBufLen) { |
356 | 0 | *outBufLen = len; |
357 | 0 | return PR_FALSE; |
358 | 0 | } |
359 | | |
360 | 27.6k | len = 0; |
361 | | |
362 | 44.9k | for (i = 0; i < inBufLen; i += 2) { |
363 | 17.3k | if ((inBuf[i + H_0] == 0x00) && ((inBuf[i + H_1] & 0x80) == 0x00)) { |
364 | | /* 0000-007F -> 0xxxxxx */ |
365 | | /* 00000000 0abcdefg -> 0abcdefg */ |
366 | | |
367 | 2.05k | outBuf[len] = inBuf[i + H_1] & 0x7F; |
368 | | |
369 | 2.05k | len += 1; |
370 | 15.2k | } else if (inBuf[i + H_0] < 0x08) { |
371 | | /* 0080-07FF -> 110xxxxx 10xxxxxx */ |
372 | | /* 00000abc defghijk -> 110abcde 10fghijk */ |
373 | | |
374 | 3.69k | outBuf[len + 0] = 0xC0 | ((inBuf[i + H_0] & 0x07) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6); |
375 | 3.69k | outBuf[len + 1] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0); |
376 | | |
377 | 3.69k | len += 2; |
378 | 11.5k | } else if ((inBuf[i + H_0] & 0xFC) == 0xD8) { |
379 | 417 | int abcde, BCDE; |
380 | | |
381 | 417 | PORT_Assert(((inBufLen - i) > 2) && ((inBuf[i + 2 + H_0] & 0xFC) == 0xDC)); |
382 | | |
383 | | /* D800-DBFF DC00-DFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */ |
384 | | /* 110110BC DEfghijk 110111lm nopqrstu -> |
385 | | { Let abcde = BCDE + 1 } |
386 | | 11110abc 10defghi 10jklmno 10pqrstu */ |
387 | | |
388 | 417 | BCDE = ((inBuf[i + H_0] & 0x03) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6); |
389 | 417 | abcde = BCDE + 1; |
390 | | |
391 | 417 | outBuf[len + 0] = 0xF0 | ((abcde & 0x1C) >> 2); |
392 | 417 | outBuf[len + 1] = 0x80 | ((abcde & 0x03) << 4) | ((inBuf[i + 0 + H_1] & 0x3C) >> 2); |
393 | 417 | outBuf[len + 2] = 0x80 | ((inBuf[i + 0 + H_1] & 0x03) << 4) | ((inBuf[i + 2 + H_0] & 0x03) << 2) | ((inBuf[i + 2 + H_1] & 0xC0) >> 6); |
394 | 417 | outBuf[len + 3] = 0x80 | ((inBuf[i + 2 + H_1] & 0x3F) >> 0); |
395 | | |
396 | 417 | i += 2; |
397 | 417 | len += 4; |
398 | 11.1k | } else { |
399 | | /* 0800-FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */ |
400 | | /* abcdefgh ijklmnop -> 1110abcd 10efghij 10klmnop */ |
401 | | |
402 | 11.1k | outBuf[len + 0] = 0xE0 | ((inBuf[i + H_0] & 0xF0) >> 4); |
403 | 11.1k | outBuf[len + 1] = 0x80 | ((inBuf[i + H_0] & 0x0F) << 2) | ((inBuf[i + H_1] & 0xC0) >> 6); |
404 | 11.1k | outBuf[len + 2] = 0x80 | ((inBuf[i + H_1] & 0x3F) >> 0); |
405 | | |
406 | 11.1k | len += 3; |
407 | 11.1k | } |
408 | 17.3k | } |
409 | | |
410 | 27.6k | *outBufLen = len; |
411 | 27.6k | return PR_TRUE; |
412 | 27.6k | } |
413 | 28.6k | } |
414 | | |
415 | | PRBool |
416 | | sec_port_iso88591_utf8_conversion_function( |
417 | | const unsigned char *inBuf, |
418 | | unsigned int inBufLen, |
419 | | unsigned char *outBuf, |
420 | | unsigned int maxOutBufLen, |
421 | | unsigned int *outBufLen) |
422 | 6.52k | { |
423 | 6.52k | unsigned int i, len = 0; |
424 | | |
425 | 6.52k | PORT_Assert((unsigned int *)NULL != outBufLen); |
426 | | |
427 | 6.52k | if (inBufLen > UINT_MAX / 2) { |
428 | 0 | *outBufLen = 0; |
429 | 0 | return PR_FALSE; |
430 | 0 | } |
431 | | |
432 | 220k | for (i = 0; i < inBufLen; i++) { |
433 | 214k | if ((inBuf[i] & 0x80) == 0x00) |
434 | 157k | len += 1; |
435 | 56.2k | else |
436 | 56.2k | len += 2; |
437 | 214k | } |
438 | | |
439 | 6.52k | if (len > maxOutBufLen) { |
440 | 0 | *outBufLen = len; |
441 | 0 | return PR_FALSE; |
442 | 0 | } |
443 | | |
444 | 6.52k | len = 0; |
445 | | |
446 | 220k | for (i = 0; i < inBufLen; i++) { |
447 | 214k | if ((inBuf[i] & 0x80) == 0x00) { |
448 | | /* 00-7F -> 0xxxxxxx */ |
449 | | /* 0abcdefg -> 0abcdefg */ |
450 | | |
451 | 157k | outBuf[len] = inBuf[i]; |
452 | 157k | len += 1; |
453 | 157k | } else { |
454 | | /* 80-FF <- 110xxxxx 10xxxxxx */ |
455 | | /* 00000000 abcdefgh -> 110000ab 10cdefgh */ |
456 | | |
457 | 56.2k | outBuf[len + 0] = 0xC0 | ((inBuf[i] & 0xC0) >> 6); |
458 | 56.2k | outBuf[len + 1] = 0x80 | ((inBuf[i] & 0x3F) >> 0); |
459 | | |
460 | 56.2k | len += 2; |
461 | 56.2k | } |
462 | 214k | } |
463 | | |
464 | 6.52k | *outBufLen = len; |
465 | 6.52k | return PR_TRUE; |
466 | 6.52k | } |