/src/FreeRDP/winpr/libwinpr/crt/unicode_builtin.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2001-2004 Unicode, Inc. |
3 | | * |
4 | | * Disclaimer |
5 | | * |
6 | | * This source code is provided as is by Unicode, Inc. No claims are |
7 | | * made as to fitness for any particular purpose. No warranties of any |
8 | | * kind are expressed or implied. The recipient agrees to determine |
9 | | * applicability of information provided. If this file has been |
10 | | * purchased on magnetic or optical media from Unicode, Inc., the |
11 | | * sole remedy for any claim will be exchange of defective media |
12 | | * within 90 days of receipt. |
13 | | * |
14 | | * Limitations on Rights to Redistribute This Code |
15 | | * |
16 | | * Unicode, Inc. hereby grants the right to freely use the information |
17 | | * supplied in this file in the creation of products supporting the |
18 | | * Unicode Standard, and to make copies of this file in any form |
19 | | * for internal or external distribution as long as this notice |
20 | | * remains attached. |
21 | | */ |
22 | | |
23 | | /* --------------------------------------------------------------------- |
24 | | |
25 | | Conversions between UTF32, UTF-16, and UTF-8. Source code file. |
26 | | Author: Mark E. Davis, 1994. |
27 | | Rev History: Rick McGowan, fixes & updates May 2001. |
28 | | Sept 2001: fixed const & error conditions per |
29 | | mods suggested by S. Parent & A. Lillich. |
30 | | June 2002: Tim Dodd added detection and handling of incomplete |
31 | | source sequences, enhanced error detection, added casts |
32 | | to eliminate compiler warnings. |
33 | | July 2003: slight mods to back out aggressive FFFE detection. |
34 | | Jan 2004: updated switches in from-UTF8 conversions. |
35 | | Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. |
36 | | |
37 | | See the header file "utf.h" for complete documentation. |
38 | | |
39 | | ------------------------------------------------------------------------ */ |
40 | | |
41 | | #include <winpr/wtypes.h> |
42 | | #include <winpr/string.h> |
43 | | #include <winpr/assert.h> |
44 | | #include <winpr/cast.h> |
45 | | |
46 | | #include "unicode.h" |
47 | | |
48 | | #include "../log.h" |
49 | | #define TAG WINPR_TAG("unicode") |
50 | | |
51 | | /* |
52 | | * Character Types: |
53 | | * |
54 | | * UTF8: uint8_t 8 bits |
55 | | * UTF16: uint16_t 16 bits |
56 | | * UTF32: uint32_t 32 bits |
57 | | */ |
58 | | |
59 | | /* Some fundamental constants */ |
60 | 0 | #define UNI_REPLACEMENT_CHAR (uint32_t)0x0000FFFD |
61 | 4.39M | #define UNI_MAX_BMP (uint32_t)0x0000FFFF |
62 | 972 | #define UNI_MAX_UTF16 (uint32_t)0x0010FFFF |
63 | | #define UNI_MAX_UTF32 (uint32_t)0x7FFFFFFF |
64 | | #define UNI_MAX_LEGAL_UTF32 (uint32_t)0x0010FFFF |
65 | | |
66 | | typedef enum |
67 | | { |
68 | | conversionOK, /* conversion successful */ |
69 | | sourceExhausted, /* partial character in source, but hit end */ |
70 | | targetExhausted, /* insuff. room in target for conversion */ |
71 | | sourceIllegal /* source sequence is illegal/malformed */ |
72 | | } ConversionResult; |
73 | | |
74 | | typedef enum |
75 | | { |
76 | | strictConversion = 0, |
77 | | lenientConversion |
78 | | } ConversionFlags; |
79 | | |
80 | | static const int halfShift = 10; /* used for shifting by 10 bits */ |
81 | | |
82 | | static const uint32_t halfBase = 0x0010000UL; |
83 | | static const uint32_t halfMask = 0x3FFUL; |
84 | | |
85 | 10.1M | #define UNI_SUR_HIGH_START (uint32_t)0xD800 |
86 | 77.5k | #define UNI_SUR_HIGH_END (uint32_t)0xDBFF |
87 | 1.32M | #define UNI_SUR_LOW_START (uint32_t)0xDC00 |
88 | 77.7k | #define UNI_SUR_LOW_END (uint32_t)0xDFFF |
89 | | |
90 | | /* --------------------------------------------------------------------- */ |
91 | | |
92 | | /* |
93 | | * Index into the table below with the first byte of a UTF-8 sequence to |
94 | | * get the number of trailing bytes that are supposed to follow it. |
95 | | * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is |
96 | | * left as-is for anyone who may want to do such conversion, which was |
97 | | * allowed in earlier algorithms. |
98 | | */ |
99 | | static const char trailingBytesForUTF8[256] = { |
100 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
101 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
102 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
103 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
104 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
105 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
106 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
107 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 |
108 | | }; |
109 | | |
110 | | /* |
111 | | * Magic values subtracted from a buffer value during UTF8 conversion. |
112 | | * This table contains as many values as there might be trailing bytes |
113 | | * in a UTF-8 sequence. |
114 | | */ |
115 | | static const uint32_t offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, |
116 | | 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; |
117 | | |
118 | | /* |
119 | | * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed |
120 | | * into the first byte, depending on how many bytes follow. There are |
121 | | * as many entries in this table as there are UTF-8 sequence types. |
122 | | * (I.e., one byte sequence, two byte... etc.). Remember that sequence |
123 | | * for *legal* UTF-8 will be 4 or fewer bytes total. |
124 | | */ |
125 | | static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; |
126 | | |
127 | | /* --------------------------------------------------------------------- */ |
128 | | |
129 | | /* The interface converts a whole buffer to avoid function-call overhead. |
130 | | * Constants have been gathered. Loops & conditionals have been removed as |
131 | | * much as possible for efficiency, in favor of drop-through switches. |
132 | | * (See "Note A" at the bottom of the file for equivalent code.) |
133 | | * If your compiler supports it, the "isLegalUTF8" call can be turned |
134 | | * into an inline function. |
135 | | */ |
136 | | |
137 | | /* --------------------------------------------------------------------- */ |
138 | | |
139 | | static ConversionResult winpr_ConvertUTF16toUTF8_Internal(const uint16_t** sourceStart, |
140 | | const uint16_t* sourceEnd, |
141 | | uint8_t** targetStart, uint8_t* targetEnd, |
142 | | ConversionFlags flags) |
143 | 360 | { |
144 | 360 | bool computeLength = (!targetEnd) ? true : false; |
145 | 360 | const uint16_t* source = *sourceStart; |
146 | 360 | uint8_t* target = *targetStart; |
147 | 360 | ConversionResult result = conversionOK; |
148 | | |
149 | 660k | while (source < sourceEnd) |
150 | 660k | { |
151 | 660k | uint32_t ch = 0; |
152 | 660k | unsigned short bytesToWrite = 0; |
153 | 660k | const uint32_t byteMask = 0xBF; |
154 | 660k | const uint32_t byteMark = 0x80; |
155 | 660k | const uint16_t* oldSource = |
156 | 660k | source; /* In case we have to back up because of target overflow. */ |
157 | | |
158 | 660k | ch = *source++; |
159 | | |
160 | | /* If we have a surrogate pair, convert to UTF32 first. */ |
161 | 660k | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) |
162 | 845 | { |
163 | | /* If the 16 bits following the high surrogate are in the source buffer... */ |
164 | 845 | if (source < sourceEnd) |
165 | 844 | { |
166 | 844 | uint32_t ch2 = *source; |
167 | | |
168 | | /* If it's a low surrogate, convert to UTF32. */ |
169 | 844 | if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) |
170 | 800 | { |
171 | 800 | ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + |
172 | 800 | halfBase; |
173 | 800 | ++source; |
174 | 800 | } |
175 | 44 | else if (flags == strictConversion) |
176 | 44 | { |
177 | | /* it's an unpaired high surrogate */ |
178 | 44 | --source; /* return to the illegal value itself */ |
179 | 44 | result = sourceIllegal; |
180 | 44 | break; |
181 | 44 | } |
182 | 844 | } |
183 | 1 | else |
184 | 1 | { |
185 | | /* We don't have the 16 bits following the high surrogate. */ |
186 | 1 | --source; /* return to the high surrogate */ |
187 | 1 | result = sourceExhausted; |
188 | 1 | break; |
189 | 1 | } |
190 | 845 | } |
191 | 659k | else if (flags == strictConversion) |
192 | 659k | { |
193 | | /* UTF-16 surrogate values are illegal in UTF-32 */ |
194 | 659k | if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) |
195 | 23 | { |
196 | 23 | --source; /* return to the illegal value itself */ |
197 | 23 | result = sourceIllegal; |
198 | 23 | break; |
199 | 23 | } |
200 | 659k | } |
201 | | |
202 | | /* Figure out how many bytes the result will require */ |
203 | 660k | if (ch < (uint32_t)0x80) |
204 | 953 | { |
205 | 953 | bytesToWrite = 1; |
206 | 953 | } |
207 | 659k | else if (ch < (uint32_t)0x800) |
208 | 45.0k | { |
209 | 45.0k | bytesToWrite = 2; |
210 | 45.0k | } |
211 | 614k | else if (ch < (uint32_t)0x10000) |
212 | 613k | { |
213 | 613k | bytesToWrite = 3; |
214 | 613k | } |
215 | 800 | else if (ch < (uint32_t)0x110000) |
216 | 800 | { |
217 | 800 | bytesToWrite = 4; |
218 | 800 | } |
219 | 0 | else |
220 | 0 | { |
221 | 0 | bytesToWrite = 3; |
222 | 0 | ch = UNI_REPLACEMENT_CHAR; |
223 | 0 | } |
224 | | |
225 | 660k | target += bytesToWrite; |
226 | | |
227 | 660k | if ((target > targetEnd) && (!computeLength)) |
228 | 0 | { |
229 | 0 | source = oldSource; /* Back up source pointer! */ |
230 | 0 | target -= bytesToWrite; |
231 | 0 | result = targetExhausted; |
232 | 0 | break; |
233 | 0 | } |
234 | | |
235 | 660k | if (!computeLength) |
236 | 329k | { |
237 | 329k | switch (bytesToWrite) |
238 | 329k | { |
239 | | /* note: everything falls through. */ |
240 | 399 | case 4: |
241 | 399 | *--target = (uint8_t)((ch | byteMark) & byteMask); |
242 | 399 | ch >>= 6; |
243 | | /* fallthrough */ |
244 | 399 | WINPR_FALLTHROUGH |
245 | 306k | case 3: |
246 | 306k | *--target = (uint8_t)((ch | byteMark) & byteMask); |
247 | 306k | ch >>= 6; |
248 | | /* fallthrough */ |
249 | 306k | WINPR_FALLTHROUGH |
250 | | |
251 | 329k | case 2: |
252 | 329k | *--target = (uint8_t)((ch | byteMark) & byteMask); |
253 | 329k | ch >>= 6; |
254 | | /* fallthrough */ |
255 | 329k | WINPR_FALLTHROUGH |
256 | | |
257 | 329k | case 1: |
258 | 329k | *--target = (uint8_t)(ch | firstByteMark[bytesToWrite]); |
259 | 329k | } |
260 | 329k | } |
261 | 330k | else |
262 | 330k | { |
263 | 330k | switch (bytesToWrite) |
264 | 330k | { |
265 | | /* note: everything falls through. */ |
266 | 401 | case 4: |
267 | 401 | --target; |
268 | | /* fallthrough */ |
269 | 401 | WINPR_FALLTHROUGH |
270 | | |
271 | 307k | case 3: |
272 | 307k | --target; |
273 | | /* fallthrough */ |
274 | 307k | WINPR_FALLTHROUGH |
275 | | |
276 | 330k | case 2: |
277 | 330k | --target; |
278 | | /* fallthrough */ |
279 | 330k | WINPR_FALLTHROUGH |
280 | | |
281 | 330k | case 1: |
282 | 330k | --target; |
283 | 330k | } |
284 | 330k | } |
285 | | |
286 | 660k | target += bytesToWrite; |
287 | 660k | } |
288 | | |
289 | 360 | *sourceStart = source; |
290 | 360 | *targetStart = target; |
291 | 360 | return result; |
292 | 360 | } |
293 | | |
294 | | /* --------------------------------------------------------------------- */ |
295 | | |
296 | | /* |
297 | | * Utility routine to tell whether a sequence of bytes is legal UTF-8. |
298 | | * This must be called with the length pre-determined by the first byte. |
299 | | * If not calling this from ConvertUTF8to*, then the length can be set by: |
300 | | * length = trailingBytesForUTF8[*source]+1; |
301 | | * and the sequence is illegal right away if there aren't that many bytes |
302 | | * available. |
303 | | * If presented with a length > 4, this returns false. The Unicode |
304 | | * definition of UTF-8 goes up to 4-byte sequences. |
305 | | */ |
306 | | |
307 | | static bool isLegalUTF8(const uint8_t* source, int length) |
308 | 4.39M | { |
309 | 4.39M | uint8_t a = 0; |
310 | 4.39M | const uint8_t* srcptr = source + length; |
311 | | |
312 | 4.39M | switch (length) |
313 | 4.39M | { |
314 | 3 | default: |
315 | 3 | return false; |
316 | | |
317 | | /* Everything else falls through when "true"... */ |
318 | 997 | case 4: |
319 | 997 | if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
320 | 12 | return false; |
321 | | /* fallthrough */ |
322 | 985 | WINPR_FALLTHROUGH |
323 | | |
324 | 1.88k | case 3: |
325 | 1.88k | if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
326 | 12 | return false; |
327 | | /* fallthrough */ |
328 | 1.87k | WINPR_FALLTHROUGH |
329 | | |
330 | 2.22k | case 2: |
331 | 2.22k | if ((a = (*--srcptr)) > 0xBF) |
332 | 3 | return false; |
333 | | |
334 | 2.22k | switch (*source) |
335 | 2.22k | { |
336 | | /* no fall-through in this inner switch */ |
337 | 356 | case 0xE0: |
338 | 356 | if (a < 0xA0) |
339 | 11 | return false; |
340 | | |
341 | 345 | break; |
342 | | |
343 | 345 | case 0xED: |
344 | 227 | if (a > 0x9F) |
345 | 4 | return false; |
346 | | |
347 | 223 | break; |
348 | | |
349 | 389 | case 0xF0: |
350 | 389 | if (a < 0x90) |
351 | 9 | return false; |
352 | | |
353 | 380 | break; |
354 | | |
355 | 500 | case 0xF4: |
356 | 500 | if (a > 0x8F) |
357 | 2 | return false; |
358 | | |
359 | 498 | break; |
360 | | |
361 | 748 | default: |
362 | 748 | if (a < 0x80) |
363 | 7 | return false; |
364 | 741 | break; |
365 | 2.22k | } |
366 | | /* fallthrough */ |
367 | 2.18k | WINPR_FALLTHROUGH |
368 | | |
369 | 4.39M | case 1: |
370 | 4.39M | if (*source >= 0x80 && *source < 0xC2) |
371 | 20 | return false; |
372 | 4.39M | } |
373 | | |
374 | 4.39M | if (*source > 0xF4) |
375 | 1 | return false; |
376 | | |
377 | 4.39M | return true; |
378 | 4.39M | } |
379 | | |
380 | | /* --------------------------------------------------------------------- */ |
381 | | |
382 | | static ConversionResult winpr_ConvertUTF8toUTF16_Internal(const uint8_t** sourceStart, |
383 | | const uint8_t* sourceEnd, |
384 | | uint16_t** targetStart, |
385 | | uint16_t* targetEnd, |
386 | | ConversionFlags flags) |
387 | 1.74k | { |
388 | 1.74k | bool computeLength = (!targetEnd) ? true : false; |
389 | 1.74k | ConversionResult result = conversionOK; |
390 | 1.74k | const uint8_t* source = *sourceStart; |
391 | 1.74k | uint16_t* target = *targetStart; |
392 | | |
393 | 4.39M | while (source < sourceEnd) |
394 | 4.39M | { |
395 | 4.39M | uint32_t ch = 0; |
396 | 4.39M | unsigned short extraBytesToRead = |
397 | 8.78M | WINPR_ASSERTING_INT_CAST(unsigned short, trailingBytesForUTF8[*source]); |
398 | | |
399 | 4.39M | if ((source + extraBytesToRead) >= sourceEnd) |
400 | 2 | { |
401 | 2 | result = sourceExhausted; |
402 | 2 | break; |
403 | 2 | } |
404 | | |
405 | | /* Do this check whether lenient or strict */ |
406 | 4.39M | if (!isLegalUTF8(source, extraBytesToRead + 1)) |
407 | 84 | { |
408 | 84 | result = sourceIllegal; |
409 | 84 | break; |
410 | 84 | } |
411 | | |
412 | | /* |
413 | | * The cases all fall through. See "Note A" below. |
414 | | */ |
415 | 4.39M | switch (extraBytesToRead) |
416 | 4.39M | { |
417 | 0 | case 5: |
418 | 0 | ch += *source++; |
419 | 0 | ch <<= 6; /* remember, illegal UTF-8 */ |
420 | | /* fallthrough */ |
421 | 0 | WINPR_FALLTHROUGH |
422 | |
|
423 | 0 | case 4: |
424 | 0 | ch += *source++; |
425 | 0 | ch <<= 6; /* remember, illegal UTF-8 */ |
426 | | /* fallthrough */ |
427 | 0 | WINPR_FALLTHROUGH |
428 | |
|
429 | 972 | case 3: |
430 | 972 | ch += *source++; |
431 | 972 | ch <<= 6; |
432 | | /* fallthrough */ |
433 | 972 | WINPR_FALLTHROUGH |
434 | | |
435 | 1.84k | case 2: |
436 | 1.84k | ch += *source++; |
437 | 1.84k | ch <<= 6; |
438 | | /* fallthrough */ |
439 | 1.84k | WINPR_FALLTHROUGH |
440 | | |
441 | 2.18k | case 1: |
442 | 2.18k | ch += *source++; |
443 | 2.18k | ch <<= 6; |
444 | | /* fallthrough */ |
445 | 2.18k | WINPR_FALLTHROUGH |
446 | | |
447 | 4.39M | case 0: |
448 | 4.39M | ch += *source++; |
449 | 4.39M | } |
450 | | |
451 | 4.39M | ch -= offsetsFromUTF8[extraBytesToRead]; |
452 | | |
453 | 4.39M | if ((target >= targetEnd) && (!computeLength)) |
454 | 0 | { |
455 | 0 | source -= (extraBytesToRead + 1); /* Back up source pointer! */ |
456 | 0 | result = targetExhausted; |
457 | 0 | break; |
458 | 0 | } |
459 | | |
460 | 4.39M | if (ch <= UNI_MAX_BMP) |
461 | 4.38M | { |
462 | | /* Target is a character <= 0xFFFF */ |
463 | | /* UTF-16 surrogate values are illegal in UTF-32 */ |
464 | 4.38M | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) |
465 | 0 | { |
466 | 0 | if (flags == strictConversion) |
467 | 0 | { |
468 | 0 | source -= (extraBytesToRead + 1); /* return to the illegal value itself */ |
469 | 0 | result = sourceIllegal; |
470 | 0 | break; |
471 | 0 | } |
472 | 0 | else |
473 | 0 | { |
474 | 0 | if (!computeLength) |
475 | 0 | *target++ = UNI_REPLACEMENT_CHAR; |
476 | 0 | else |
477 | 0 | target++; |
478 | 0 | } |
479 | 0 | } |
480 | 4.38M | else |
481 | 4.38M | { |
482 | 4.38M | if (!computeLength) |
483 | 2.15M | *target++ = (uint16_t)ch; /* normal case */ |
484 | 2.23M | else |
485 | 2.23M | target++; |
486 | 4.38M | } |
487 | 4.38M | } |
488 | 972 | else if (ch > UNI_MAX_UTF16) |
489 | 0 | { |
490 | 0 | if (flags == strictConversion) |
491 | 0 | { |
492 | 0 | result = sourceIllegal; |
493 | 0 | source -= (extraBytesToRead + 1); /* return to the start */ |
494 | 0 | break; /* Bail out; shouldn't continue */ |
495 | 0 | } |
496 | 0 | else |
497 | 0 | { |
498 | 0 | if (!computeLength) |
499 | 0 | *target++ = UNI_REPLACEMENT_CHAR; |
500 | 0 | else |
501 | 0 | target++; |
502 | 0 | } |
503 | 0 | } |
504 | 972 | else |
505 | 972 | { |
506 | | /* target is a character in range 0xFFFF - 0x10FFFF. */ |
507 | 972 | if ((target + 1 >= targetEnd) && (!computeLength)) |
508 | 0 | { |
509 | 0 | source -= (extraBytesToRead + 1); /* Back up source pointer! */ |
510 | 0 | result = targetExhausted; |
511 | 0 | break; |
512 | 0 | } |
513 | | |
514 | 972 | ch -= halfBase; |
515 | | |
516 | 972 | if (!computeLength) |
517 | 479 | { |
518 | 479 | *target++ = (uint16_t)((ch >> halfShift) + UNI_SUR_HIGH_START); |
519 | 479 | *target++ = (uint16_t)((ch & halfMask) + UNI_SUR_LOW_START); |
520 | 479 | } |
521 | 493 | else |
522 | 493 | { |
523 | 493 | target++; |
524 | 493 | target++; |
525 | 493 | } |
526 | 972 | } |
527 | 4.39M | } |
528 | | |
529 | 1.74k | *sourceStart = source; |
530 | 1.74k | *targetStart = target; |
531 | 1.74k | return result; |
532 | 1.74k | } |
533 | | |
534 | | /** |
535 | | * WinPR built-in Unicode API |
536 | | */ |
537 | | |
538 | | static int winpr_ConvertUTF8toUTF16(const uint8_t* src, int cchSrc, uint16_t* dst, int cchDst) |
539 | 1.74k | { |
540 | 1.74k | size_t length = 0; |
541 | 1.74k | uint16_t* dstBeg = NULL; |
542 | 1.74k | uint16_t* dstEnd = NULL; |
543 | 1.74k | const uint8_t* srcBeg = NULL; |
544 | 1.74k | const uint8_t* srcEnd = NULL; |
545 | 1.74k | ConversionResult result = sourceIllegal; |
546 | | |
547 | 1.74k | if (cchSrc == -1) |
548 | 0 | cchSrc = (int)strnlen((const char*)src, INT32_MAX - 1) + 1; |
549 | | |
550 | 1.74k | srcBeg = src; |
551 | 1.74k | srcEnd = &src[cchSrc]; |
552 | | |
553 | 1.74k | if (cchDst == 0) |
554 | 915 | { |
555 | 915 | result = |
556 | 915 | winpr_ConvertUTF8toUTF16_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion); |
557 | | |
558 | 915 | length = dstBeg - (uint16_t*)NULL; |
559 | 915 | } |
560 | 829 | else |
561 | 829 | { |
562 | 829 | dstBeg = dst; |
563 | 829 | dstEnd = &dst[cchDst]; |
564 | | |
565 | 829 | result = |
566 | 829 | winpr_ConvertUTF8toUTF16_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion); |
567 | | |
568 | 829 | length = dstBeg - dst; |
569 | 829 | } |
570 | | |
571 | 1.74k | if (result == targetExhausted) |
572 | 0 | { |
573 | 0 | SetLastError(ERROR_INSUFFICIENT_BUFFER); |
574 | 0 | return 0; |
575 | 0 | } |
576 | | |
577 | 1.74k | return (result == conversionOK) ? WINPR_ASSERTING_INT_CAST(int, length) : 0; |
578 | 1.74k | } |
579 | | |
580 | | static int winpr_ConvertUTF16toUTF8(const uint16_t* src, int cchSrc, uint8_t* dst, int cchDst) |
581 | 360 | { |
582 | 360 | size_t length = 0; |
583 | 360 | uint8_t* dstBeg = NULL; |
584 | 360 | uint8_t* dstEnd = NULL; |
585 | 360 | const uint16_t* srcBeg = NULL; |
586 | 360 | const uint16_t* srcEnd = NULL; |
587 | 360 | ConversionResult result = sourceIllegal; |
588 | | |
589 | 360 | if (cchSrc == -1) |
590 | 0 | cchSrc = (int)_wcsnlen((const WCHAR*)src, INT32_MAX - 1) + 1; |
591 | | |
592 | 360 | srcBeg = src; |
593 | 360 | srcEnd = &src[cchSrc]; |
594 | | |
595 | 360 | if (cchDst == 0) |
596 | 214 | { |
597 | 214 | result = |
598 | 214 | winpr_ConvertUTF16toUTF8_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion); |
599 | | |
600 | 214 | length = dstBeg - ((uint8_t*)NULL); |
601 | 214 | } |
602 | 146 | else |
603 | 146 | { |
604 | 146 | dstBeg = dst; |
605 | 146 | dstEnd = &dst[cchDst]; |
606 | | |
607 | 146 | result = |
608 | 146 | winpr_ConvertUTF16toUTF8_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion); |
609 | | |
610 | 146 | length = dstBeg - dst; |
611 | 146 | } |
612 | | |
613 | 360 | if (result == targetExhausted) |
614 | 0 | { |
615 | 0 | SetLastError(ERROR_INSUFFICIENT_BUFFER); |
616 | 0 | return 0; |
617 | 0 | } |
618 | | |
619 | 360 | return (result == conversionOK) ? WINPR_ASSERTING_INT_CAST(int, length) : 0; |
620 | 360 | } |
621 | | |
622 | | /* --------------------------------------------------------------------- */ |
623 | | |
624 | | int int_MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte, |
625 | | LPWSTR lpWideCharStr, int cchWideChar) |
626 | 1.74k | { |
627 | 1.74k | size_t cbCharLen = (size_t)cbMultiByte; |
628 | | |
629 | 1.74k | WINPR_UNUSED(dwFlags); |
630 | | |
631 | | /* If cbMultiByte is 0, the function fails */ |
632 | 1.74k | if ((cbMultiByte == 0) || (cbMultiByte < -1)) |
633 | 0 | return 0; |
634 | | |
635 | 1.74k | if (cchWideChar < 0) |
636 | 0 | return -1; |
637 | | |
638 | 1.74k | if (cbMultiByte < 0) |
639 | 0 | { |
640 | 0 | const size_t len = strlen(lpMultiByteStr); |
641 | 0 | if (len >= INT32_MAX) |
642 | 0 | return 0; |
643 | 0 | cbCharLen = (int)len + 1; |
644 | 0 | } |
645 | 1.74k | else |
646 | 1.74k | cbCharLen = cbMultiByte; |
647 | | |
648 | 1.74k | WINPR_ASSERT(lpMultiByteStr); |
649 | 1.74k | switch (CodePage) |
650 | 1.74k | { |
651 | 0 | case CP_ACP: |
652 | 1.74k | case CP_UTF8: |
653 | 1.74k | break; |
654 | | |
655 | 0 | default: |
656 | 0 | WLog_ERR(TAG, "Unsupported encoding %u", CodePage); |
657 | 0 | return 0; |
658 | 1.74k | } |
659 | | |
660 | 1.74k | return winpr_ConvertUTF8toUTF16((const uint8_t*)lpMultiByteStr, |
661 | 1.74k | WINPR_ASSERTING_INT_CAST(int, cbCharLen), |
662 | 0 | (uint16_t*)lpWideCharStr, cchWideChar); |
663 | 1.74k | } |
664 | | |
665 | | int int_WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar, |
666 | | LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, |
667 | | LPBOOL lpUsedDefaultChar) |
668 | 360 | { |
669 | 360 | size_t cbCharLen = (size_t)cchWideChar; |
670 | | |
671 | 360 | WINPR_UNUSED(dwFlags); |
672 | | /* If cchWideChar is 0, the function fails */ |
673 | 360 | if ((cchWideChar == 0) || (cchWideChar < -1)) |
674 | 0 | return 0; |
675 | | |
676 | 360 | if (cbMultiByte < 0) |
677 | 0 | return -1; |
678 | | |
679 | 360 | WINPR_ASSERT(lpWideCharStr); |
680 | | /* If cchWideChar is -1, the string is null-terminated */ |
681 | 360 | if (cchWideChar == -1) |
682 | 0 | { |
683 | 0 | const size_t len = _wcslen(lpWideCharStr); |
684 | 0 | if (len >= INT32_MAX) |
685 | 0 | return 0; |
686 | 0 | cbCharLen = (int)len + 1; |
687 | 0 | } |
688 | 360 | else |
689 | 360 | cbCharLen = cchWideChar; |
690 | | |
691 | | /* |
692 | | * if cbMultiByte is 0, the function returns the required buffer size |
693 | | * in bytes for lpMultiByteStr and makes no use of the output parameter itself. |
694 | | */ |
695 | | |
696 | 360 | return winpr_ConvertUTF16toUTF8((const uint16_t*)lpWideCharStr, |
697 | 360 | WINPR_ASSERTING_INT_CAST(int, cbCharLen), |
698 | 0 | (uint8_t*)lpMultiByteStr, cbMultiByte); |
699 | 360 | } |