Coverage Report

Created: 2026-01-09 06:43

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/FreeRDP/winpr/libwinpr/crt/unicode_builtin.c
Line
Count
Source
1
/*
2
 * Copyright 2001-2004 Unicode, Inc.
3
 *
4
 * Disclaimer
5
 *
6
 * This source code is provided as is by Unicode, Inc. No claims are
7
 * made as to fitness for any particular purpose. No warranties of any
8
 * kind are expressed or implied. The recipient agrees to determine
9
 * applicability of information provided. If this file has been
10
 * purchased on magnetic or optical media from Unicode, Inc., the
11
 * sole remedy for any claim will be exchange of defective media
12
 * within 90 days of receipt.
13
 *
14
 * Limitations on Rights to Redistribute This Code
15
 *
16
 * Unicode, Inc. hereby grants the right to freely use the information
17
 * supplied in this file in the creation of products supporting the
18
 * Unicode Standard, and to make copies of this file in any form
19
 * for internal or external distribution as long as this notice
20
 * remains attached.
21
 */
22
23
/* ---------------------------------------------------------------------
24
25
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
26
Author: Mark E. Davis, 1994.
27
Rev History: Rick McGowan, fixes & updates May 2001.
28
Sept 2001: fixed const & error conditions per
29
mods suggested by S. Parent & A. Lillich.
30
June 2002: Tim Dodd added detection and handling of incomplete
31
source sequences, enhanced error detection, added casts
32
to eliminate compiler warnings.
33
July 2003: slight mods to back out aggressive FFFE detection.
34
Jan 2004: updated switches in from-UTF8 conversions.
35
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
36
37
See the header file "utf.h" for complete documentation.
38
39
------------------------------------------------------------------------ */
40
41
#include <winpr/wtypes.h>
42
#include <winpr/string.h>
43
#include <winpr/assert.h>
44
#include <winpr/cast.h>
45
46
#include "unicode.h"
47
48
#include "../log.h"
49
#define TAG WINPR_TAG("unicode")
50
51
/*
52
 * Character Types:
53
 *
54
 * UTF8:    uint8_t   8 bits
55
 * UTF16: uint16_t  16 bits
56
 * UTF32: uint32_t  32 bits
57
 */
58
59
/* Some fundamental constants */
60
0
#define UNI_REPLACEMENT_CHAR (uint32_t)0x0000FFFD
61
7.64M
#define UNI_MAX_BMP (uint32_t)0x0000FFFF
62
871
#define UNI_MAX_UTF16 (uint32_t)0x0010FFFF
63
#define UNI_MAX_UTF32 (uint32_t)0x7FFFFFFF
64
#define UNI_MAX_LEGAL_UTF32 (uint32_t)0x0010FFFF
65
66
typedef enum
67
{
68
  conversionOK,    /* conversion successful */
69
  sourceExhausted, /* partial character in source, but hit end */
70
  targetExhausted, /* insuff. room in target for conversion */
71
  sourceIllegal    /* source sequence is illegal/malformed */
72
} ConversionResult;
73
74
typedef enum
75
{
76
  strictConversion = 0,
77
  lenientConversion
78
} ConversionFlags;
79
80
static const int halfShift = 10; /* used for shifting by 10 bits */
81
82
static const uint32_t halfBase = 0x0010000UL;
83
static const uint32_t halfMask = 0x3FFUL;
84
85
16.4M
#define UNI_SUR_HIGH_START (uint32_t)0xD800
86
68.9k
#define UNI_SUR_HIGH_END (uint32_t)0xDBFF
87
1.17M
#define UNI_SUR_LOW_START (uint32_t)0xDC00
88
69.1k
#define UNI_SUR_LOW_END (uint32_t)0xDFFF
89
90
/* --------------------------------------------------------------------- */
91
92
/*
93
 * Index into the table below with the first byte of a UTF-8 sequence to
94
 * get the number of trailing bytes that are supposed to follow it.
95
 * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
96
 * left as-is for anyone who may want to do such conversion, which was
97
 * allowed in earlier algorithms.
98
 */
99
static const char trailingBytesForUTF8[256] = {
100
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
108
};
109
110
/*
111
 * Magic values subtracted from a buffer value during UTF8 conversion.
112
 * This table contains as many values as there might be trailing bytes
113
 * in a UTF-8 sequence.
114
 */
115
static const uint32_t offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
116
                                           0x03C82080UL, 0xFA082080UL, 0x82082080UL };
117
118
/*
119
 * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
120
 * into the first byte, depending on how many bytes follow.  There are
121
 * as many entries in this table as there are UTF-8 sequence types.
122
 * (I.e., one byte sequence, two byte... etc.). Remember that sequence
123
 * for *legal* UTF-8 will be 4 or fewer bytes total.
124
 */
125
static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
126
127
/* We always need UTF-16LE, even on big endian systems! */
128
static WCHAR setWcharFrom(WCHAR w)
129
4.35M
{
130
#if defined(__BIG_ENDIAN__)
131
  union
132
  {
133
    WCHAR w;
134
    char c[2];
135
  } cnv;
136
137
  cnv.w = w;
138
  const char c = cnv.c[0];
139
  cnv.c[0] = cnv.c[1];
140
  cnv.c[1] = c;
141
  return cnv.w;
142
#else
143
4.35M
  return w;
144
4.35M
#endif
145
4.35M
}
146
147
/* --------------------------------------------------------------------- */
148
149
/* The interface converts a whole buffer to avoid function-call overhead.
150
 * Constants have been gathered. Loops & conditionals have been removed as
151
 * much as possible for efficiency, in favor of drop-through switches.
152
 * (See "Note A" at the bottom of the file for equivalent code.)
153
 * If your compiler supports it, the "isLegalUTF8" call can be turned
154
 * into an inline function.
155
 */
156
157
/* --------------------------------------------------------------------- */
158
159
static ConversionResult winpr_ConvertUTF16toUTF8_Internal(const uint16_t** sourceStart,
160
                                                          const uint16_t* sourceEnd,
161
                                                          uint8_t** targetStart,
162
                                                          const uint8_t* targetEnd,
163
                                                          ConversionFlags flags)
164
428
{
165
428
  bool computeLength = (!targetEnd) ? true : false;
166
428
  const uint16_t* source = *sourceStart;
167
428
  uint8_t* target = *targetStart;
168
428
  ConversionResult result = conversionOK;
169
170
585k
  while (source < sourceEnd)
171
585k
  {
172
585k
    uint32_t ch = 0;
173
585k
    unsigned short bytesToWrite = 0;
174
585k
    const uint32_t byteMask = 0xBF;
175
585k
    const uint32_t byteMark = 0x80;
176
585k
    const uint16_t* oldSource =
177
585k
        source; /* In case we have to back up because of target overflow. */
178
179
585k
    ch = setWcharFrom(*source++);
180
181
    /* If we have a surrogate pair, convert to UTF32 first. */
182
585k
    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
183
939
    {
184
      /* If the 16 bits following the high surrogate are in the source buffer... */
185
939
      if (source < sourceEnd)
186
937
      {
187
937
        uint32_t ch2 = setWcharFrom(*source);
188
189
        /* If it's a low surrogate, convert to UTF32. */
190
937
        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
191
883
        {
192
883
          ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) +
193
883
               halfBase;
194
883
          ++source;
195
883
        }
196
54
        else if (flags == strictConversion)
197
54
        {
198
          /* it's an unpaired high surrogate */
199
54
          --source; /* return to the illegal value itself */
200
54
          result = sourceIllegal;
201
54
          break;
202
54
        }
203
937
      }
204
2
      else
205
2
      {
206
        /* We don't have the 16 bits following the high surrogate. */
207
2
        --source; /* return to the high surrogate */
208
2
        result = sourceExhausted;
209
2
        break;
210
2
      }
211
939
    }
212
584k
    else if (flags == strictConversion)
213
584k
    {
214
      /* UTF-16 surrogate values are illegal in UTF-32 */
215
584k
      if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
216
34
      {
217
34
        --source; /* return to the illegal value itself */
218
34
        result = sourceIllegal;
219
34
        break;
220
34
      }
221
584k
    }
222
223
    /* Figure out how many bytes the result will require */
224
585k
    if (ch < (uint32_t)0x80)
225
786
    {
226
786
      bytesToWrite = 1;
227
786
    }
228
584k
    else if (ch < (uint32_t)0x800)
229
39.2k
    {
230
39.2k
      bytesToWrite = 2;
231
39.2k
    }
232
545k
    else if (ch < (uint32_t)0x10000)
233
544k
    {
234
544k
      bytesToWrite = 3;
235
544k
    }
236
883
    else if (ch < (uint32_t)0x110000)
237
883
    {
238
883
      bytesToWrite = 4;
239
883
    }
240
0
    else
241
0
    {
242
0
      bytesToWrite = 3;
243
0
      ch = UNI_REPLACEMENT_CHAR;
244
0
    }
245
246
585k
    target += bytesToWrite;
247
248
585k
    if ((target > targetEnd) && (!computeLength))
249
0
    {
250
0
      source = oldSource; /* Back up source pointer! */
251
0
      target -= bytesToWrite;
252
0
      result = targetExhausted;
253
0
      break;
254
0
    }
255
256
585k
    if (!computeLength)
257
291k
    {
258
291k
      switch (bytesToWrite)
259
291k
      {
260
          /* note: everything falls through. */
261
436
        case 4:
262
436
          *--target = (uint8_t)((ch | byteMark) & byteMask);
263
436
          ch >>= 6;
264
          /* fallthrough */
265
436
          WINPR_FALLTHROUGH
266
271k
        case 3:
267
271k
          *--target = (uint8_t)((ch | byteMark) & byteMask);
268
271k
          ch >>= 6;
269
          /* fallthrough */
270
271k
          WINPR_FALLTHROUGH
271
272
291k
        case 2:
273
291k
          *--target = (uint8_t)((ch | byteMark) & byteMask);
274
291k
          ch >>= 6;
275
          /* fallthrough */
276
291k
          WINPR_FALLTHROUGH
277
278
291k
        case 1:
279
291k
          *--target = (uint8_t)(ch | firstByteMark[bytesToWrite]);
280
291k
          break;
281
0
        default:
282
0
          return sourceIllegal;
283
291k
      }
284
291k
    }
285
293k
    else
286
293k
    {
287
293k
      switch (bytesToWrite)
288
293k
      {
289
          /* note: everything falls through. */
290
447
        case 4:
291
447
          --target;
292
          /* fallthrough */
293
447
          WINPR_FALLTHROUGH
294
295
273k
        case 3:
296
273k
          --target;
297
          /* fallthrough */
298
273k
          WINPR_FALLTHROUGH
299
300
292k
        case 2:
301
292k
          --target;
302
          /* fallthrough */
303
292k
          WINPR_FALLTHROUGH
304
305
293k
        case 1:
306
293k
          --target;
307
293k
          break;
308
0
        default:
309
0
          return sourceIllegal;
310
293k
      }
311
293k
    }
312
313
585k
    target += bytesToWrite;
314
585k
  }
315
316
428
  *sourceStart = source;
317
428
  *targetStart = target;
318
428
  return result;
319
428
}
320
321
/* --------------------------------------------------------------------- */
322
323
/*
324
 * Utility routine to tell whether a sequence of bytes is legal UTF-8.
325
 * This must be called with the length pre-determined by the first byte.
326
 * If not calling this from ConvertUTF8to*, then the length can be set by:
327
 *  length = trailingBytesForUTF8[*source]+1;
328
 * and the sequence is illegal right away if there aren't that many bytes
329
 * available.
330
 * If presented with a length > 4, this returns false.  The Unicode
331
 * definition of UTF-8 goes up to 4-byte sequences.
332
 */
333
334
static bool isLegalUTF8(const uint8_t* source, int length)
335
7.64M
{
336
7.64M
  uint8_t a = 0;
337
7.64M
  const uint8_t* srcptr = source + length;
338
339
7.64M
  switch (length)
340
7.64M
  {
341
2
    default:
342
2
      return false;
343
344
      /* Everything else falls through when "true"... */
345
888
    case 4:
346
888
      if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
347
11
        return false;
348
      /* fallthrough */
349
877
      WINPR_FALLTHROUGH
350
351
1.57k
    case 3:
352
1.57k
      if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
353
12
        return false;
354
      /* fallthrough */
355
1.56k
      WINPR_FALLTHROUGH
356
357
1.94k
    case 2:
358
1.94k
      if ((a = (*--srcptr)) > 0xBF)
359
3
        return false;
360
361
1.94k
      switch (*source)
362
1.94k
      {
363
          /* no fall-through in this inner switch */
364
214
        case 0xE0:
365
214
          if (a < 0xA0)
366
9
            return false;
367
368
205
          break;
369
370
232
        case 0xED:
371
232
          if (a > 0x9F)
372
2
            return false;
373
374
230
          break;
375
376
348
        case 0xF0:
377
348
          if (a < 0x90)
378
2
            return false;
379
380
346
          break;
381
382
517
        case 0xF4:
383
517
          if (a > 0x8F)
384
1
            return false;
385
386
516
          break;
387
388
632
        default:
389
632
          if (a < 0x80)
390
6
            return false;
391
626
          break;
392
1.94k
      }
393
      /* fallthrough */
394
1.92k
      WINPR_FALLTHROUGH
395
396
7.64M
    case 1:
397
7.64M
      if (*source >= 0x80 && *source < 0xC2)
398
28
        return false;
399
7.64M
  }
400
401
7.64M
  if (*source > 0xF4)
402
2
    return false;
403
404
7.64M
  return true;
405
7.64M
}
406
407
/* --------------------------------------------------------------------- */
408
409
static ConversionResult winpr_ConvertUTF8toUTF16_Internal(const uint8_t** sourceStart,
410
                                                          const uint8_t* sourceEnd,
411
                                                          uint16_t** targetStart,
412
                                                          const uint16_t* targetEnd,
413
                                                          ConversionFlags flags)
414
2.03k
{
415
2.03k
  bool computeLength = (!targetEnd) ? true : false;
416
2.03k
  ConversionResult result = conversionOK;
417
2.03k
  const uint8_t* source = *sourceStart;
418
2.03k
  uint16_t* target = *targetStart;
419
420
7.64M
  while (source < sourceEnd)
421
7.64M
  {
422
7.64M
    uint32_t ch = 0;
423
7.64M
    unsigned short extraBytesToRead =
424
15.2M
        WINPR_ASSERTING_INT_CAST(unsigned short, trailingBytesForUTF8[*source]);
425
426
15.2M
    if ((source + extraBytesToRead) >= sourceEnd)
427
1
    {
428
1
      result = sourceExhausted;
429
1
      break;
430
1
    }
431
432
    /* Do this check whether lenient or strict */
433
7.64M
    if (!isLegalUTF8(source, extraBytesToRead + 1))
434
78
    {
435
78
      result = sourceIllegal;
436
78
      break;
437
78
    }
438
439
    /*
440
     * The cases all fall through. See "Note A" below.
441
     */
442
7.64M
    switch (extraBytesToRead)
443
7.64M
    {
444
0
      case 5:
445
0
        ch += *source++;
446
0
        ch <<= 6; /* remember, illegal UTF-8 */
447
                  /* fallthrough */
448
0
        WINPR_FALLTHROUGH
449
450
0
      case 4:
451
0
        ch += *source++;
452
0
        ch <<= 6; /* remember, illegal UTF-8 */
453
                  /* fallthrough */
454
0
        WINPR_FALLTHROUGH
455
456
871
      case 3:
457
871
        ch += *source++;
458
871
        ch <<= 6;
459
        /* fallthrough */
460
871
        WINPR_FALLTHROUGH
461
462
1.54k
      case 2:
463
1.54k
        ch += *source++;
464
1.54k
        ch <<= 6;
465
        /* fallthrough */
466
1.54k
        WINPR_FALLTHROUGH
467
468
1.91k
      case 1:
469
1.91k
        ch += *source++;
470
1.91k
        ch <<= 6;
471
        /* fallthrough */
472
1.91k
        WINPR_FALLTHROUGH
473
474
7.64M
      case 0:
475
7.64M
        ch += *source++;
476
7.64M
        break;
477
0
      default:
478
0
        return sourceIllegal;
479
7.64M
    }
480
481
7.64M
    ch -= offsetsFromUTF8[extraBytesToRead];
482
483
7.64M
    if ((target >= targetEnd) && (!computeLength))
484
0
    {
485
0
      source -= (extraBytesToRead + 1); /* Back up source pointer! */
486
0
      result = targetExhausted;
487
0
      break;
488
0
    }
489
490
7.64M
    if (ch <= UNI_MAX_BMP)
491
7.64M
    {
492
      /* Target is a character <= 0xFFFF */
493
      /* UTF-16 surrogate values are illegal in UTF-32 */
494
7.64M
      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
495
0
      {
496
0
        if (flags == strictConversion)
497
0
        {
498
0
          source -= (extraBytesToRead + 1); /* return to the illegal value itself */
499
0
          result = sourceIllegal;
500
0
          break;
501
0
        }
502
0
        else
503
0
        {
504
0
          if (!computeLength)
505
0
            *target++ = setWcharFrom(UNI_REPLACEMENT_CHAR);
506
0
          else
507
0
            target++;
508
0
        }
509
0
      }
510
7.64M
      else
511
7.64M
      {
512
7.64M
        if (!computeLength)
513
3.76M
          *target++ = setWcharFrom((WCHAR)ch); /* normal case */
514
3.87M
        else
515
3.87M
          target++;
516
7.64M
      }
517
7.64M
    }
518
871
    else if (ch > UNI_MAX_UTF16)
519
0
    {
520
0
      if (flags == strictConversion)
521
0
      {
522
0
        result = sourceIllegal;
523
0
        source -= (extraBytesToRead + 1); /* return to the start */
524
0
        break;                            /* Bail out; shouldn't continue */
525
0
      }
526
0
      else
527
0
      {
528
0
        if (!computeLength)
529
0
          *target++ = setWcharFrom(UNI_REPLACEMENT_CHAR);
530
0
        else
531
0
          target++;
532
0
      }
533
0
    }
534
871
    else
535
871
    {
536
      /* target is a character in range 0xFFFF - 0x10FFFF. */
537
871
      if ((target + 1 >= targetEnd) && (!computeLength))
538
0
      {
539
0
        source -= (extraBytesToRead + 1); /* Back up source pointer! */
540
0
        result = targetExhausted;
541
0
        break;
542
0
      }
543
544
871
      ch -= halfBase;
545
546
871
      if (!computeLength)
547
426
      {
548
426
        *target++ = setWcharFrom((WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START));
549
426
        *target++ = setWcharFrom((WCHAR)((ch & halfMask) + UNI_SUR_LOW_START));
550
426
      }
551
445
      else
552
445
      {
553
445
        target++;
554
445
        target++;
555
445
      }
556
871
    }
557
7.64M
  }
558
559
2.03k
  *sourceStart = source;
560
2.03k
  *targetStart = target;
561
2.03k
  return result;
562
2.03k
}
563
564
/**
565
 * WinPR built-in Unicode API
566
 */
567
568
static int winpr_ConvertUTF8toUTF16(const uint8_t* src, int cchSrc, uint16_t* dst, int cchDst)
569
2.03k
{
570
2.03k
  size_t length = 0;
571
2.03k
  uint16_t* dstBeg = NULL;
572
2.03k
  uint16_t* dstEnd = NULL;
573
2.03k
  const uint8_t* srcBeg = NULL;
574
2.03k
  const uint8_t* srcEnd = NULL;
575
2.03k
  ConversionResult result = sourceIllegal;
576
577
2.03k
  if (cchSrc == -1)
578
0
    cchSrc = (int)strnlen((const char*)src, INT32_MAX - 1) + 1;
579
580
2.03k
  srcBeg = src;
581
2.03k
  srcEnd = &src[cchSrc];
582
583
2.03k
  if (cchDst == 0)
584
1.05k
  {
585
1.05k
    result =
586
1.05k
        winpr_ConvertUTF8toUTF16_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion);
587
588
1.05k
    length = dstBeg - (uint16_t*)NULL;
589
1.05k
  }
590
980
  else
591
980
  {
592
980
    dstBeg = dst;
593
980
    dstEnd = &dst[cchDst];
594
595
980
    result =
596
980
        winpr_ConvertUTF8toUTF16_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion);
597
598
980
    length = dstBeg - dst;
599
980
  }
600
601
2.03k
  if (result == targetExhausted)
602
0
  {
603
0
    SetLastError(ERROR_INSUFFICIENT_BUFFER);
604
0
    return 0;
605
0
  }
606
607
2.03k
  return (result == conversionOK) ? WINPR_ASSERTING_INT_CAST(int, length) : 0;
608
2.03k
}
609
610
static int winpr_ConvertUTF16toUTF8(const uint16_t* src, int cchSrc, uint8_t* dst, int cchDst)
611
428
{
612
428
  size_t length = 0;
613
428
  uint8_t* dstBeg = NULL;
614
428
  uint8_t* dstEnd = NULL;
615
428
  const uint16_t* srcBeg = NULL;
616
428
  const uint16_t* srcEnd = NULL;
617
428
  ConversionResult result = sourceIllegal;
618
619
428
  if (cchSrc == -1)
620
0
    cchSrc = (int)_wcsnlen((const WCHAR*)src, INT32_MAX - 1) + 1;
621
622
428
  srcBeg = src;
623
428
  srcEnd = &src[cchSrc];
624
625
428
  if (cchDst == 0)
626
259
  {
627
259
    result =
628
259
        winpr_ConvertUTF16toUTF8_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion);
629
630
259
    length = dstBeg - ((uint8_t*)NULL);
631
259
  }
632
169
  else
633
169
  {
634
169
    dstBeg = dst;
635
169
    dstEnd = &dst[cchDst];
636
637
169
    result =
638
169
        winpr_ConvertUTF16toUTF8_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion);
639
640
169
    length = dstBeg - dst;
641
169
  }
642
643
428
  if (result == targetExhausted)
644
0
  {
645
0
    SetLastError(ERROR_INSUFFICIENT_BUFFER);
646
0
    return 0;
647
0
  }
648
649
428
  return (result == conversionOK) ? WINPR_ASSERTING_INT_CAST(int, length) : 0;
650
428
}
651
652
/* --------------------------------------------------------------------- */
653
654
int int_MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
655
                            LPWSTR lpWideCharStr, int cchWideChar)
656
2.03k
{
657
2.03k
  size_t cbCharLen = (size_t)cbMultiByte;
658
659
2.03k
  WINPR_UNUSED(dwFlags);
660
661
  /* If cbMultiByte is 0, the function fails */
662
2.03k
  if ((cbMultiByte == 0) || (cbMultiByte < -1))
663
0
    return 0;
664
665
2.03k
  if (cchWideChar < 0)
666
0
    return -1;
667
668
2.03k
  if (cbMultiByte < 0)
669
0
  {
670
0
    const size_t len = strlen(lpMultiByteStr);
671
0
    if (len >= INT32_MAX)
672
0
      return 0;
673
0
    cbCharLen = (int)len + 1;
674
0
  }
675
2.03k
  else
676
2.03k
    cbCharLen = cbMultiByte;
677
678
2.03k
  WINPR_ASSERT(lpMultiByteStr);
679
2.03k
  switch (CodePage)
680
2.03k
  {
681
0
    case CP_ACP:
682
2.03k
    case CP_UTF8:
683
2.03k
      break;
684
685
0
    default:
686
0
      WLog_ERR(TAG, "Unsupported encoding %u", CodePage);
687
0
      return 0;
688
2.03k
  }
689
690
2.03k
  return winpr_ConvertUTF8toUTF16((const uint8_t*)lpMultiByteStr,
691
2.03k
                                  WINPR_ASSERTING_INT_CAST(int, cbCharLen),
692
2.03k
                                  (uint16_t*)lpWideCharStr, cchWideChar);
693
2.03k
}
694
695
int int_WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
696
                            LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
697
                            LPBOOL lpUsedDefaultChar)
698
428
{
699
428
  size_t cbCharLen = (size_t)cchWideChar;
700
701
428
  WINPR_UNUSED(dwFlags);
702
  /* If cchWideChar is 0, the function fails */
703
428
  if ((cchWideChar == 0) || (cchWideChar < -1))
704
0
    return 0;
705
706
428
  if (cbMultiByte < 0)
707
0
    return -1;
708
709
428
  WINPR_ASSERT(lpWideCharStr);
710
  /* If cchWideChar is -1, the string is null-terminated */
711
428
  if (cchWideChar == -1)
712
0
  {
713
0
    const size_t len = _wcslen(lpWideCharStr);
714
0
    if (len >= INT32_MAX)
715
0
      return 0;
716
0
    cbCharLen = (int)len + 1;
717
0
  }
718
428
  else
719
428
    cbCharLen = cchWideChar;
720
721
  /*
722
   * if cbMultiByte is 0, the function returns the required buffer size
723
   * in bytes for lpMultiByteStr and makes no use of the output parameter itself.
724
   */
725
726
428
  return winpr_ConvertUTF16toUTF8((const uint16_t*)lpWideCharStr,
727
428
                                  WINPR_ASSERTING_INT_CAST(int, cbCharLen),
728
428
                                  (uint8_t*)lpMultiByteStr, cbMultiByte);
729
428
}