/src/FreeRDP/winpr/libwinpr/crt/unicode_builtin.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright 2001-2004 Unicode, Inc. |
3 | | * |
4 | | * Disclaimer |
5 | | * |
6 | | * This source code is provided as is by Unicode, Inc. No claims are |
7 | | * made as to fitness for any particular purpose. No warranties of any |
8 | | * kind are expressed or implied. The recipient agrees to determine |
9 | | * applicability of information provided. If this file has been |
10 | | * purchased on magnetic or optical media from Unicode, Inc., the |
11 | | * sole remedy for any claim will be exchange of defective media |
12 | | * within 90 days of receipt. |
13 | | * |
14 | | * Limitations on Rights to Redistribute This Code |
15 | | * |
16 | | * Unicode, Inc. hereby grants the right to freely use the information |
17 | | * supplied in this file in the creation of products supporting the |
18 | | * Unicode Standard, and to make copies of this file in any form |
19 | | * for internal or external distribution as long as this notice |
20 | | * remains attached. |
21 | | */ |
22 | | |
23 | | /* --------------------------------------------------------------------- |
24 | | |
25 | | Conversions between UTF32, UTF-16, and UTF-8. Source code file. |
26 | | Author: Mark E. Davis, 1994. |
27 | | Rev History: Rick McGowan, fixes & updates May 2001. |
28 | | Sept 2001: fixed const & error conditions per |
29 | | mods suggested by S. Parent & A. Lillich. |
30 | | June 2002: Tim Dodd added detection and handling of incomplete |
31 | | source sequences, enhanced error detection, added casts |
32 | | to eliminate compiler warnings. |
33 | | July 2003: slight mods to back out aggressive FFFE detection. |
34 | | Jan 2004: updated switches in from-UTF8 conversions. |
35 | | Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions. |
36 | | |
37 | | See the header file "utf.h" for complete documentation. |
38 | | |
39 | | ------------------------------------------------------------------------ */ |
40 | | |
41 | | #include <winpr/wtypes.h> |
42 | | #include <winpr/string.h> |
43 | | #include <winpr/assert.h> |
44 | | #include <winpr/cast.h> |
45 | | |
46 | | #include "unicode.h" |
47 | | |
48 | | #include "../log.h" |
49 | | #define TAG WINPR_TAG("unicode") |
50 | | |
51 | | /* |
52 | | * Character Types: |
53 | | * |
54 | | * UTF8: uint8_t 8 bits |
55 | | * UTF16: uint16_t 16 bits |
56 | | * UTF32: uint32_t 32 bits |
57 | | */ |
58 | | |
59 | | /* Some fundamental constants */ |
60 | 0 | #define UNI_REPLACEMENT_CHAR (uint32_t)0x0000FFFD |
61 | 0 | #define UNI_MAX_BMP (uint32_t)0x0000FFFF |
62 | 0 | #define UNI_MAX_UTF16 (uint32_t)0x0010FFFF |
63 | | #define UNI_MAX_UTF32 (uint32_t)0x7FFFFFFF |
64 | | #define UNI_MAX_LEGAL_UTF32 (uint32_t)0x0010FFFF |
65 | | |
66 | | typedef enum |
67 | | { |
68 | | conversionOK, /* conversion successful */ |
69 | | sourceExhausted, /* partial character in source, but hit end */ |
70 | | targetExhausted, /* insuff. room in target for conversion */ |
71 | | sourceIllegal /* source sequence is illegal/malformed */ |
72 | | } ConversionResult; |
73 | | |
74 | | typedef enum |
75 | | { |
76 | | strictConversion = 0, |
77 | | lenientConversion |
78 | | } ConversionFlags; |
79 | | |
80 | | static const int halfShift = 10; /* used for shifting by 10 bits */ |
81 | | |
82 | | static const uint32_t halfBase = 0x0010000UL; |
83 | | static const uint32_t halfMask = 0x3FFUL; |
84 | | |
85 | 0 | #define UNI_SUR_HIGH_START (uint32_t)0xD800 |
86 | 0 | #define UNI_SUR_HIGH_END (uint32_t)0xDBFF |
87 | 0 | #define UNI_SUR_LOW_START (uint32_t)0xDC00 |
88 | 0 | #define UNI_SUR_LOW_END (uint32_t)0xDFFF |
89 | | |
90 | | /* --------------------------------------------------------------------- */ |
91 | | |
92 | | /* |
93 | | * Index into the table below with the first byte of a UTF-8 sequence to |
94 | | * get the number of trailing bytes that are supposed to follow it. |
95 | | * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is |
96 | | * left as-is for anyone who may want to do such conversion, which was |
97 | | * allowed in earlier algorithms. |
98 | | */ |
99 | | static const char trailingBytesForUTF8[256] = { |
100 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
101 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
102 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
103 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
104 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
105 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
106 | | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
107 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5 |
108 | | }; |
109 | | |
110 | | /* |
111 | | * Magic values subtracted from a buffer value during UTF8 conversion. |
112 | | * This table contains as many values as there might be trailing bytes |
113 | | * in a UTF-8 sequence. |
114 | | */ |
115 | | static const uint32_t offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, |
116 | | 0x03C82080UL, 0xFA082080UL, 0x82082080UL }; |
117 | | |
118 | | /* |
119 | | * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed |
120 | | * into the first byte, depending on how many bytes follow. There are |
121 | | * as many entries in this table as there are UTF-8 sequence types. |
122 | | * (I.e., one byte sequence, two byte... etc.). Remember that sequence |
123 | | * for *legal* UTF-8 will be 4 or fewer bytes total. |
124 | | */ |
125 | | static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; |
126 | | |
127 | | /* --------------------------------------------------------------------- */ |
128 | | |
129 | | /* The interface converts a whole buffer to avoid function-call overhead. |
130 | | * Constants have been gathered. Loops & conditionals have been removed as |
131 | | * much as possible for efficiency, in favor of drop-through switches. |
132 | | * (See "Note A" at the bottom of the file for equivalent code.) |
133 | | * If your compiler supports it, the "isLegalUTF8" call can be turned |
134 | | * into an inline function. |
135 | | */ |
136 | | |
137 | | /* --------------------------------------------------------------------- */ |
138 | | |
139 | | static ConversionResult winpr_ConvertUTF16toUTF8_Internal(const uint16_t** sourceStart, |
140 | | const uint16_t* sourceEnd, |
141 | | uint8_t** targetStart, uint8_t* targetEnd, |
142 | | ConversionFlags flags) |
143 | 0 | { |
144 | 0 | bool computeLength = (!targetEnd) ? true : false; |
145 | 0 | const uint16_t* source = *sourceStart; |
146 | 0 | uint8_t* target = *targetStart; |
147 | 0 | ConversionResult result = conversionOK; |
148 | |
|
149 | 0 | while (source < sourceEnd) |
150 | 0 | { |
151 | 0 | uint32_t ch = 0; |
152 | 0 | unsigned short bytesToWrite = 0; |
153 | 0 | const uint32_t byteMask = 0xBF; |
154 | 0 | const uint32_t byteMark = 0x80; |
155 | 0 | const uint16_t* oldSource = |
156 | 0 | source; /* In case we have to back up because of target overflow. */ |
157 | |
|
158 | 0 | ch = *source++; |
159 | | |
160 | | /* If we have a surrogate pair, convert to UTF32 first. */ |
161 | 0 | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) |
162 | 0 | { |
163 | | /* If the 16 bits following the high surrogate are in the source buffer... */ |
164 | 0 | if (source < sourceEnd) |
165 | 0 | { |
166 | 0 | uint32_t ch2 = *source; |
167 | | |
168 | | /* If it's a low surrogate, convert to UTF32. */ |
169 | 0 | if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) |
170 | 0 | { |
171 | 0 | ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) + |
172 | 0 | halfBase; |
173 | 0 | ++source; |
174 | 0 | } |
175 | 0 | else if (flags == strictConversion) |
176 | 0 | { |
177 | | /* it's an unpaired high surrogate */ |
178 | 0 | --source; /* return to the illegal value itself */ |
179 | 0 | result = sourceIllegal; |
180 | 0 | break; |
181 | 0 | } |
182 | 0 | } |
183 | 0 | else |
184 | 0 | { |
185 | | /* We don't have the 16 bits following the high surrogate. */ |
186 | 0 | --source; /* return to the high surrogate */ |
187 | 0 | result = sourceExhausted; |
188 | 0 | break; |
189 | 0 | } |
190 | 0 | } |
191 | 0 | else if (flags == strictConversion) |
192 | 0 | { |
193 | | /* UTF-16 surrogate values are illegal in UTF-32 */ |
194 | 0 | if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) |
195 | 0 | { |
196 | 0 | --source; /* return to the illegal value itself */ |
197 | 0 | result = sourceIllegal; |
198 | 0 | break; |
199 | 0 | } |
200 | 0 | } |
201 | | |
202 | | /* Figure out how many bytes the result will require */ |
203 | 0 | if (ch < (uint32_t)0x80) |
204 | 0 | { |
205 | 0 | bytesToWrite = 1; |
206 | 0 | } |
207 | 0 | else if (ch < (uint32_t)0x800) |
208 | 0 | { |
209 | 0 | bytesToWrite = 2; |
210 | 0 | } |
211 | 0 | else if (ch < (uint32_t)0x10000) |
212 | 0 | { |
213 | 0 | bytesToWrite = 3; |
214 | 0 | } |
215 | 0 | else if (ch < (uint32_t)0x110000) |
216 | 0 | { |
217 | 0 | bytesToWrite = 4; |
218 | 0 | } |
219 | 0 | else |
220 | 0 | { |
221 | 0 | bytesToWrite = 3; |
222 | 0 | ch = UNI_REPLACEMENT_CHAR; |
223 | 0 | } |
224 | |
|
225 | 0 | target += bytesToWrite; |
226 | |
|
227 | 0 | if ((target > targetEnd) && (!computeLength)) |
228 | 0 | { |
229 | 0 | source = oldSource; /* Back up source pointer! */ |
230 | 0 | target -= bytesToWrite; |
231 | 0 | result = targetExhausted; |
232 | 0 | break; |
233 | 0 | } |
234 | | |
235 | 0 | if (!computeLength) |
236 | 0 | { |
237 | 0 | switch (bytesToWrite) |
238 | 0 | { |
239 | | /* note: everything falls through. */ |
240 | 0 | case 4: |
241 | 0 | *--target = (uint8_t)((ch | byteMark) & byteMask); |
242 | 0 | ch >>= 6; |
243 | | /* fallthrough */ |
244 | 0 | WINPR_FALLTHROUGH |
245 | 0 | case 3: |
246 | 0 | *--target = (uint8_t)((ch | byteMark) & byteMask); |
247 | 0 | ch >>= 6; |
248 | | /* fallthrough */ |
249 | 0 | WINPR_FALLTHROUGH |
250 | |
|
251 | 0 | case 2: |
252 | 0 | *--target = (uint8_t)((ch | byteMark) & byteMask); |
253 | 0 | ch >>= 6; |
254 | | /* fallthrough */ |
255 | 0 | WINPR_FALLTHROUGH |
256 | |
|
257 | 0 | case 1: |
258 | 0 | *--target = (uint8_t)(ch | firstByteMark[bytesToWrite]); |
259 | 0 | } |
260 | 0 | } |
261 | 0 | else |
262 | 0 | { |
263 | 0 | switch (bytesToWrite) |
264 | 0 | { |
265 | | /* note: everything falls through. */ |
266 | 0 | case 4: |
267 | 0 | --target; |
268 | | /* fallthrough */ |
269 | 0 | WINPR_FALLTHROUGH |
270 | |
|
271 | 0 | case 3: |
272 | 0 | --target; |
273 | | /* fallthrough */ |
274 | 0 | WINPR_FALLTHROUGH |
275 | |
|
276 | 0 | case 2: |
277 | 0 | --target; |
278 | | /* fallthrough */ |
279 | 0 | WINPR_FALLTHROUGH |
280 | |
|
281 | 0 | case 1: |
282 | 0 | --target; |
283 | 0 | } |
284 | 0 | } |
285 | |
|
286 | 0 | target += bytesToWrite; |
287 | 0 | } |
288 | | |
289 | 0 | *sourceStart = source; |
290 | 0 | *targetStart = target; |
291 | 0 | return result; |
292 | 0 | } |
293 | | |
294 | | /* --------------------------------------------------------------------- */ |
295 | | |
296 | | /* |
297 | | * Utility routine to tell whether a sequence of bytes is legal UTF-8. |
298 | | * This must be called with the length pre-determined by the first byte. |
299 | | * If not calling this from ConvertUTF8to*, then the length can be set by: |
300 | | * length = trailingBytesForUTF8[*source]+1; |
301 | | * and the sequence is illegal right away if there aren't that many bytes |
302 | | * available. |
303 | | * If presented with a length > 4, this returns false. The Unicode |
304 | | * definition of UTF-8 goes up to 4-byte sequences. |
305 | | */ |
306 | | |
307 | | static bool isLegalUTF8(const uint8_t* source, int length) |
308 | 0 | { |
309 | 0 | uint8_t a = 0; |
310 | 0 | const uint8_t* srcptr = source + length; |
311 | |
|
312 | 0 | switch (length) |
313 | 0 | { |
314 | 0 | default: |
315 | 0 | return false; |
316 | | |
317 | | /* Everything else falls through when "true"... */ |
318 | 0 | case 4: |
319 | 0 | if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
320 | 0 | return false; |
321 | | /* fallthrough */ |
322 | 0 | WINPR_FALLTHROUGH |
323 | |
|
324 | 0 | case 3: |
325 | 0 | if ((a = (*--srcptr)) < 0x80 || a > 0xBF) |
326 | 0 | return false; |
327 | | /* fallthrough */ |
328 | 0 | WINPR_FALLTHROUGH |
329 | |
|
330 | 0 | case 2: |
331 | 0 | if ((a = (*--srcptr)) > 0xBF) |
332 | 0 | return false; |
333 | | |
334 | 0 | switch (*source) |
335 | 0 | { |
336 | | /* no fall-through in this inner switch */ |
337 | 0 | case 0xE0: |
338 | 0 | if (a < 0xA0) |
339 | 0 | return false; |
340 | | |
341 | 0 | break; |
342 | | |
343 | 0 | case 0xED: |
344 | 0 | if (a > 0x9F) |
345 | 0 | return false; |
346 | | |
347 | 0 | break; |
348 | | |
349 | 0 | case 0xF0: |
350 | 0 | if (a < 0x90) |
351 | 0 | return false; |
352 | | |
353 | 0 | break; |
354 | | |
355 | 0 | case 0xF4: |
356 | 0 | if (a > 0x8F) |
357 | 0 | return false; |
358 | | |
359 | 0 | break; |
360 | | |
361 | 0 | default: |
362 | 0 | if (a < 0x80) |
363 | 0 | return false; |
364 | 0 | break; |
365 | 0 | } |
366 | | /* fallthrough */ |
367 | 0 | WINPR_FALLTHROUGH |
368 | |
|
369 | 0 | case 1: |
370 | 0 | if (*source >= 0x80 && *source < 0xC2) |
371 | 0 | return false; |
372 | 0 | } |
373 | | |
374 | 0 | if (*source > 0xF4) |
375 | 0 | return false; |
376 | | |
377 | 0 | return true; |
378 | 0 | } |
379 | | |
380 | | /* --------------------------------------------------------------------- */ |
381 | | |
382 | | static ConversionResult winpr_ConvertUTF8toUTF16_Internal(const uint8_t** sourceStart, |
383 | | const uint8_t* sourceEnd, |
384 | | uint16_t** targetStart, |
385 | | uint16_t* targetEnd, |
386 | | ConversionFlags flags) |
387 | 0 | { |
388 | 0 | bool computeLength = (!targetEnd) ? true : false; |
389 | 0 | ConversionResult result = conversionOK; |
390 | 0 | const uint8_t* source = *sourceStart; |
391 | 0 | uint16_t* target = *targetStart; |
392 | |
|
393 | 0 | while (source < sourceEnd) |
394 | 0 | { |
395 | 0 | uint32_t ch = 0; |
396 | 0 | unsigned short extraBytesToRead = |
397 | 0 | WINPR_ASSERTING_INT_CAST(unsigned short, trailingBytesForUTF8[*source]); |
398 | | |
399 | 0 | if ((source + extraBytesToRead) >= sourceEnd) |
400 | 0 | { |
401 | 0 | result = sourceExhausted; |
402 | 0 | break; |
403 | 0 | } |
404 | | |
405 | | /* Do this check whether lenient or strict */ |
406 | 0 | if (!isLegalUTF8(source, extraBytesToRead + 1)) |
407 | 0 | { |
408 | 0 | result = sourceIllegal; |
409 | 0 | break; |
410 | 0 | } |
411 | | |
412 | | /* |
413 | | * The cases all fall through. See "Note A" below. |
414 | | */ |
415 | 0 | switch (extraBytesToRead) |
416 | 0 | { |
417 | 0 | case 5: |
418 | 0 | ch += *source++; |
419 | 0 | ch <<= 6; /* remember, illegal UTF-8 */ |
420 | | /* fallthrough */ |
421 | 0 | WINPR_FALLTHROUGH |
422 | |
|
423 | 0 | case 4: |
424 | 0 | ch += *source++; |
425 | 0 | ch <<= 6; /* remember, illegal UTF-8 */ |
426 | | /* fallthrough */ |
427 | 0 | WINPR_FALLTHROUGH |
428 | |
|
429 | 0 | case 3: |
430 | 0 | ch += *source++; |
431 | 0 | ch <<= 6; |
432 | | /* fallthrough */ |
433 | 0 | WINPR_FALLTHROUGH |
434 | |
|
435 | 0 | case 2: |
436 | 0 | ch += *source++; |
437 | 0 | ch <<= 6; |
438 | | /* fallthrough */ |
439 | 0 | WINPR_FALLTHROUGH |
440 | |
|
441 | 0 | case 1: |
442 | 0 | ch += *source++; |
443 | 0 | ch <<= 6; |
444 | | /* fallthrough */ |
445 | 0 | WINPR_FALLTHROUGH |
446 | |
|
447 | 0 | case 0: |
448 | 0 | ch += *source++; |
449 | 0 | } |
450 | |
|
451 | 0 | ch -= offsetsFromUTF8[extraBytesToRead]; |
452 | |
|
453 | 0 | if ((target >= targetEnd) && (!computeLength)) |
454 | 0 | { |
455 | 0 | source -= (extraBytesToRead + 1); /* Back up source pointer! */ |
456 | 0 | result = targetExhausted; |
457 | 0 | break; |
458 | 0 | } |
459 | | |
460 | 0 | if (ch <= UNI_MAX_BMP) |
461 | 0 | { |
462 | | /* Target is a character <= 0xFFFF */ |
463 | | /* UTF-16 surrogate values are illegal in UTF-32 */ |
464 | 0 | if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) |
465 | 0 | { |
466 | 0 | if (flags == strictConversion) |
467 | 0 | { |
468 | 0 | source -= (extraBytesToRead + 1); /* return to the illegal value itself */ |
469 | 0 | result = sourceIllegal; |
470 | 0 | break; |
471 | 0 | } |
472 | 0 | else |
473 | 0 | { |
474 | 0 | if (!computeLength) |
475 | 0 | *target++ = UNI_REPLACEMENT_CHAR; |
476 | 0 | else |
477 | 0 | target++; |
478 | 0 | } |
479 | 0 | } |
480 | 0 | else |
481 | 0 | { |
482 | 0 | if (!computeLength) |
483 | 0 | *target++ = (uint16_t)ch; /* normal case */ |
484 | 0 | else |
485 | 0 | target++; |
486 | 0 | } |
487 | 0 | } |
488 | 0 | else if (ch > UNI_MAX_UTF16) |
489 | 0 | { |
490 | 0 | if (flags == strictConversion) |
491 | 0 | { |
492 | 0 | result = sourceIllegal; |
493 | 0 | source -= (extraBytesToRead + 1); /* return to the start */ |
494 | 0 | break; /* Bail out; shouldn't continue */ |
495 | 0 | } |
496 | 0 | else |
497 | 0 | { |
498 | 0 | if (!computeLength) |
499 | 0 | *target++ = UNI_REPLACEMENT_CHAR; |
500 | 0 | else |
501 | 0 | target++; |
502 | 0 | } |
503 | 0 | } |
504 | 0 | else |
505 | 0 | { |
506 | | /* target is a character in range 0xFFFF - 0x10FFFF. */ |
507 | 0 | if ((target + 1 >= targetEnd) && (!computeLength)) |
508 | 0 | { |
509 | 0 | source -= (extraBytesToRead + 1); /* Back up source pointer! */ |
510 | 0 | result = targetExhausted; |
511 | 0 | break; |
512 | 0 | } |
513 | | |
514 | 0 | ch -= halfBase; |
515 | |
|
516 | 0 | if (!computeLength) |
517 | 0 | { |
518 | 0 | *target++ = (uint16_t)((ch >> halfShift) + UNI_SUR_HIGH_START); |
519 | 0 | *target++ = (uint16_t)((ch & halfMask) + UNI_SUR_LOW_START); |
520 | 0 | } |
521 | 0 | else |
522 | 0 | { |
523 | 0 | target++; |
524 | 0 | target++; |
525 | 0 | } |
526 | 0 | } |
527 | 0 | } |
528 | | |
529 | 0 | *sourceStart = source; |
530 | 0 | *targetStart = target; |
531 | 0 | return result; |
532 | 0 | } |
533 | | |
534 | | /** |
535 | | * WinPR built-in Unicode API |
536 | | */ |
537 | | |
538 | | static int winpr_ConvertUTF8toUTF16(const uint8_t* src, int cchSrc, uint16_t* dst, int cchDst) |
539 | 0 | { |
540 | 0 | size_t length = 0; |
541 | 0 | uint16_t* dstBeg = NULL; |
542 | 0 | uint16_t* dstEnd = NULL; |
543 | 0 | const uint8_t* srcBeg = NULL; |
544 | 0 | const uint8_t* srcEnd = NULL; |
545 | 0 | ConversionResult result = sourceIllegal; |
546 | |
|
547 | 0 | if (cchSrc == -1) |
548 | 0 | cchSrc = (int)strnlen((const char*)src, INT32_MAX - 1) + 1; |
549 | |
|
550 | 0 | srcBeg = src; |
551 | 0 | srcEnd = &src[cchSrc]; |
552 | |
|
553 | 0 | if (cchDst == 0) |
554 | 0 | { |
555 | 0 | result = |
556 | 0 | winpr_ConvertUTF8toUTF16_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion); |
557 | |
|
558 | 0 | length = dstBeg - (uint16_t*)NULL; |
559 | 0 | } |
560 | 0 | else |
561 | 0 | { |
562 | 0 | dstBeg = dst; |
563 | 0 | dstEnd = &dst[cchDst]; |
564 | |
|
565 | 0 | result = |
566 | 0 | winpr_ConvertUTF8toUTF16_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion); |
567 | |
|
568 | 0 | length = dstBeg - dst; |
569 | 0 | } |
570 | |
|
571 | 0 | if (result == targetExhausted) |
572 | 0 | { |
573 | 0 | SetLastError(ERROR_INSUFFICIENT_BUFFER); |
574 | 0 | return 0; |
575 | 0 | } |
576 | | |
577 | 0 | return (result == conversionOK) ? WINPR_ASSERTING_INT_CAST(int, length) : 0; |
578 | 0 | } |
579 | | |
580 | | static int winpr_ConvertUTF16toUTF8(const uint16_t* src, int cchSrc, uint8_t* dst, int cchDst) |
581 | 0 | { |
582 | 0 | size_t length = 0; |
583 | 0 | uint8_t* dstBeg = NULL; |
584 | 0 | uint8_t* dstEnd = NULL; |
585 | 0 | const uint16_t* srcBeg = NULL; |
586 | 0 | const uint16_t* srcEnd = NULL; |
587 | 0 | ConversionResult result = sourceIllegal; |
588 | |
|
589 | 0 | if (cchSrc == -1) |
590 | 0 | cchSrc = (int)_wcsnlen((const WCHAR*)src, INT32_MAX - 1) + 1; |
591 | |
|
592 | 0 | srcBeg = src; |
593 | 0 | srcEnd = &src[cchSrc]; |
594 | |
|
595 | 0 | if (cchDst == 0) |
596 | 0 | { |
597 | 0 | result = |
598 | 0 | winpr_ConvertUTF16toUTF8_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion); |
599 | |
|
600 | 0 | length = dstBeg - ((uint8_t*)NULL); |
601 | 0 | } |
602 | 0 | else |
603 | 0 | { |
604 | 0 | dstBeg = dst; |
605 | 0 | dstEnd = &dst[cchDst]; |
606 | |
|
607 | 0 | result = |
608 | 0 | winpr_ConvertUTF16toUTF8_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion); |
609 | |
|
610 | 0 | length = dstBeg - dst; |
611 | 0 | } |
612 | |
|
613 | 0 | if (result == targetExhausted) |
614 | 0 | { |
615 | 0 | SetLastError(ERROR_INSUFFICIENT_BUFFER); |
616 | 0 | return 0; |
617 | 0 | } |
618 | | |
619 | 0 | return (result == conversionOK) ? WINPR_ASSERTING_INT_CAST(int, length) : 0; |
620 | 0 | } |
621 | | |
622 | | /* --------------------------------------------------------------------- */ |
623 | | |
624 | | int int_MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte, |
625 | | LPWSTR lpWideCharStr, int cchWideChar) |
626 | 0 | { |
627 | 0 | size_t cbCharLen = (size_t)cbMultiByte; |
628 | |
|
629 | 0 | WINPR_UNUSED(dwFlags); |
630 | | |
631 | | /* If cbMultiByte is 0, the function fails */ |
632 | 0 | if ((cbMultiByte == 0) || (cbMultiByte < -1)) |
633 | 0 | return 0; |
634 | | |
635 | 0 | if (cchWideChar < 0) |
636 | 0 | return -1; |
637 | | |
638 | 0 | if (cbMultiByte < 0) |
639 | 0 | { |
640 | 0 | const size_t len = strlen(lpMultiByteStr); |
641 | 0 | if (len >= INT32_MAX) |
642 | 0 | return 0; |
643 | 0 | cbCharLen = (int)len + 1; |
644 | 0 | } |
645 | 0 | else |
646 | 0 | cbCharLen = cbMultiByte; |
647 | | |
648 | 0 | WINPR_ASSERT(lpMultiByteStr); |
649 | 0 | switch (CodePage) |
650 | 0 | { |
651 | 0 | case CP_ACP: |
652 | 0 | case CP_UTF8: |
653 | 0 | break; |
654 | | |
655 | 0 | default: |
656 | 0 | WLog_ERR(TAG, "Unsupported encoding %u", CodePage); |
657 | 0 | return 0; |
658 | 0 | } |
659 | | |
660 | 0 | return winpr_ConvertUTF8toUTF16((const uint8_t*)lpMultiByteStr, |
661 | 0 | WINPR_ASSERTING_INT_CAST(int, cbCharLen), |
662 | 0 | (uint16_t*)lpWideCharStr, cchWideChar); |
663 | 0 | } |
664 | | |
665 | | int int_WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar, |
666 | | LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar, |
667 | | LPBOOL lpUsedDefaultChar) |
668 | 0 | { |
669 | 0 | size_t cbCharLen = (size_t)cchWideChar; |
670 | |
|
671 | 0 | WINPR_UNUSED(dwFlags); |
672 | | /* If cchWideChar is 0, the function fails */ |
673 | 0 | if ((cchWideChar == 0) || (cchWideChar < -1)) |
674 | 0 | return 0; |
675 | | |
676 | 0 | if (cbMultiByte < 0) |
677 | 0 | return -1; |
678 | | |
679 | 0 | WINPR_ASSERT(lpWideCharStr); |
680 | | /* If cchWideChar is -1, the string is null-terminated */ |
681 | 0 | if (cchWideChar == -1) |
682 | 0 | { |
683 | 0 | const size_t len = _wcslen(lpWideCharStr); |
684 | 0 | if (len >= INT32_MAX) |
685 | 0 | return 0; |
686 | 0 | cbCharLen = (int)len + 1; |
687 | 0 | } |
688 | 0 | else |
689 | 0 | cbCharLen = cchWideChar; |
690 | | |
691 | | /* |
692 | | * if cbMultiByte is 0, the function returns the required buffer size |
693 | | * in bytes for lpMultiByteStr and makes no use of the output parameter itself. |
694 | | */ |
695 | | |
696 | 0 | return winpr_ConvertUTF16toUTF8((const uint16_t*)lpWideCharStr, |
697 | 0 | WINPR_ASSERTING_INT_CAST(int, cbCharLen), |
698 | 0 | (uint8_t*)lpMultiByteStr, cbMultiByte); |
699 | 0 | } |