Coverage Report

Created: 2026-04-12 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/FreeRDP/winpr/libwinpr/crt/unicode_builtin.c
Line
Count
Source
1
/*
2
 * Copyright 2001-2004 Unicode, Inc.
3
 *
4
 * Disclaimer
5
 *
6
 * This source code is provided as is by Unicode, Inc. No claims are
7
 * made as to fitness for any particular purpose. No warranties of any
8
 * kind are expressed or implied. The recipient agrees to determine
9
 * applicability of information provided. If this file has been
10
 * purchased on magnetic or optical media from Unicode, Inc., the
11
 * sole remedy for any claim will be exchange of defective media
12
 * within 90 days of receipt.
13
 *
14
 * Limitations on Rights to Redistribute This Code
15
 *
16
 * Unicode, Inc. hereby grants the right to freely use the information
17
 * supplied in this file in the creation of products supporting the
18
 * Unicode Standard, and to make copies of this file in any form
19
 * for internal or external distribution as long as this notice
20
 * remains attached.
21
 */
22
23
/* ---------------------------------------------------------------------
24
25
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
26
Author: Mark E. Davis, 1994.
27
Rev History: Rick McGowan, fixes & updates May 2001.
28
Sept 2001: fixed const & error conditions per
29
mods suggested by S. Parent & A. Lillich.
30
June 2002: Tim Dodd added detection and handling of incomplete
31
source sequences, enhanced error detection, added casts
32
to eliminate compiler warnings.
33
July 2003: slight mods to back out aggressive FFFE detection.
34
Jan 2004: updated switches in from-UTF8 conversions.
35
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
36
37
See the header file "utf.h" for complete documentation.
38
39
------------------------------------------------------------------------ */
40
41
#include <winpr/wtypes.h>
42
#include <winpr/string.h>
43
#include <winpr/assert.h>
44
#include <winpr/cast.h>
45
46
#include "unicode.h"
47
48
#include "../log.h"
49
#define TAG WINPR_TAG("unicode")
50
51
/*
52
 * Character Types:
53
 *
54
 * UTF8:    uint8_t   8 bits
55
 * UTF16: uint16_t  16 bits
56
 * UTF32: uint32_t  32 bits
57
 */
58
59
/* Some fundamental constants */
60
0
#define UNI_REPLACEMENT_CHAR (uint32_t)0x0000FFFD
61
14.6M
#define UNI_MAX_BMP (uint32_t)0x0000FFFF
62
800
#define UNI_MAX_UTF16 (uint32_t)0x0010FFFF
63
#define UNI_MAX_UTF32 (uint32_t)0x7FFFFFFF
64
#define UNI_MAX_LEGAL_UTF32 (uint32_t)0x0010FFFF
65
66
typedef enum
67
{
68
  conversionOK,    /* conversion successful */
69
  sourceExhausted, /* partial character in source, but hit end */
70
  targetExhausted, /* insuff. room in target for conversion */
71
  sourceIllegal    /* source sequence is illegal/malformed */
72
} ConversionResult;
73
74
typedef enum
75
{
76
  strictConversion = 0,
77
  lenientConversion
78
} ConversionFlags;
79
80
static const int halfShift = 10; /* used for shifting by 10 bits */
81
82
static const uint32_t halfBase = 0x0010000UL;
83
static const uint32_t halfMask = 0x3FFUL;
84
85
44.6M
#define UNI_SUR_HIGH_START (uint32_t)0xD800
86
102k
#define UNI_SUR_HIGH_END (uint32_t)0xDBFF
87
15.3M
#define UNI_SUR_LOW_START (uint32_t)0xDC00
88
92.0k
#define UNI_SUR_LOW_END (uint32_t)0xDFFF
89
90
/* --------------------------------------------------------------------- */
91
92
/*
93
 * Index into the table below with the first byte of a UTF-8 sequence to
94
 * get the number of trailing bytes that are supposed to follow it.
95
 * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
96
 * left as-is for anyone who may want to do such conversion, which was
97
 * allowed in earlier algorithms.
98
 */
99
static const char trailingBytesForUTF8[256] = {
100
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
102
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
103
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
104
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
105
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
106
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
107
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5
108
};
109
110
/*
111
 * Magic values subtracted from a buffer value during UTF8 conversion.
112
 * This table contains as many values as there might be trailing bytes
113
 * in a UTF-8 sequence.
114
 */
115
static const uint32_t offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
116
                                           0x03C82080UL, 0xFA082080UL, 0x82082080UL };
117
118
/*
119
 * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
120
 * into the first byte, depending on how many bytes follow.  There are
121
 * as many entries in this table as there are UTF-8 sequence types.
122
 * (I.e., one byte sequence, two byte... etc.). Remember that sequence
123
 * for *legal* UTF-8 will be 4 or fewer bytes total.
124
 */
125
static const uint8_t firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
126
127
/* We always need UTF-16LE, even on big endian systems! */
128
static WCHAR setWcharFrom(WCHAR w)
129
19.0M
{
130
#if defined(__BIG_ENDIAN__)
131
  union
132
  {
133
    WCHAR w;
134
    char c[2];
135
  } cnv;
136
137
  cnv.w = w;
138
  const char c = cnv.c[0];
139
  cnv.c[0] = cnv.c[1];
140
  cnv.c[1] = c;
141
  return cnv.w;
142
#else
143
19.0M
  return w;
144
19.0M
#endif
145
19.0M
}
146
147
/* --------------------------------------------------------------------- */
148
149
/* The interface converts a whole buffer to avoid function-call overhead.
150
 * Constants have been gathered. Loops & conditionals have been removed as
151
 * much as possible for efficiency, in favor of drop-through switches.
152
 * (See "Note A" at the bottom of the file for equivalent code.)
153
 * If your compiler supports it, the "isLegalUTF8" call can be turned
154
 * into an inline function.
155
 */
156
157
/* --------------------------------------------------------------------- */
158
159
static ConversionResult winpr_ConvertUTF16toUTF8_Internal(const uint16_t** sourceStart,
160
                                                          const uint16_t* sourceEnd,
161
                                                          uint8_t** targetStart,
162
                                                          const uint8_t* targetEnd,
163
                                                          ConversionFlags flags)
164
456k
{
165
456k
  bool computeLength = (!targetEnd) ? true : false;
166
456k
  const uint16_t* source = *sourceStart;
167
456k
  uint8_t* target = *targetStart;
168
456k
  ConversionResult result = conversionOK;
169
170
8.11M
  while (source < sourceEnd)
171
7.67M
  {
172
7.67M
    uint32_t ch = 0;
173
7.67M
    unsigned short bytesToWrite = 0;
174
7.67M
    const uint32_t byteMask = 0xBF;
175
7.67M
    const uint32_t byteMark = 0x80;
176
7.67M
    const uint16_t* oldSource =
177
7.67M
        source; /* In case we have to back up because of target overflow. */
178
179
7.67M
    ch = setWcharFrom(*source++);
180
181
    /* If we have a surrogate pair, convert to UTF32 first. */
182
7.67M
    if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END)
183
11.6k
    {
184
      /* If the 16 bits following the high surrogate are in the source buffer... */
185
11.6k
      if (source < sourceEnd)
186
11.6k
      {
187
11.6k
        uint32_t ch2 = setWcharFrom(*source);
188
189
        /* If it's a low surrogate, convert to UTF32. */
190
11.6k
        if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END)
191
1.21k
        {
192
1.21k
          ch = ((ch - UNI_SUR_HIGH_START) << halfShift) + (ch2 - UNI_SUR_LOW_START) +
193
1.21k
               halfBase;
194
1.21k
          ++source;
195
1.21k
        }
196
10.4k
        else if (flags == strictConversion)
197
10.4k
        {
198
          /* it's an unpaired high surrogate */
199
10.4k
          --source; /* return to the illegal value itself */
200
10.4k
          result = sourceIllegal;
201
10.4k
          break;
202
10.4k
        }
203
11.6k
      }
204
25
      else
205
25
      {
206
        /* We don't have the 16 bits following the high surrogate. */
207
25
        --source; /* return to the high surrogate */
208
25
        result = sourceExhausted;
209
25
        break;
210
25
      }
211
11.6k
    }
212
7.66M
    else if (flags == strictConversion)
213
7.66M
    {
214
      /* UTF-16 surrogate values are illegal in UTF-32 */
215
7.66M
      if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END)
216
210
      {
217
210
        --source; /* return to the illegal value itself */
218
210
        result = sourceIllegal;
219
210
        break;
220
210
      }
221
7.66M
    }
222
223
    /* Figure out how many bytes the result will require */
224
7.66M
    if (ch < (uint32_t)0x80)
225
6.73M
    {
226
6.73M
      bytesToWrite = 1;
227
6.73M
    }
228
927k
    else if (ch < (uint32_t)0x800)
229
54.9k
    {
230
54.9k
      bytesToWrite = 2;
231
54.9k
    }
232
872k
    else if (ch < (uint32_t)0x10000)
233
871k
    {
234
871k
      bytesToWrite = 3;
235
871k
    }
236
1.21k
    else if (ch < (uint32_t)0x110000)
237
1.21k
    {
238
1.21k
      bytesToWrite = 4;
239
1.21k
    }
240
0
    else
241
0
    {
242
0
      bytesToWrite = 3;
243
0
      ch = UNI_REPLACEMENT_CHAR;
244
0
    }
245
246
7.66M
    target += bytesToWrite;
247
248
7.66M
    if ((target > targetEnd) && (!computeLength))
249
1.51k
    {
250
1.51k
      source = oldSource; /* Back up source pointer! */
251
1.51k
      target -= bytesToWrite;
252
1.51k
      result = targetExhausted;
253
1.51k
      break;
254
1.51k
    }
255
256
7.66M
    if (!computeLength)
257
7.11M
    {
258
7.11M
      switch (bytesToWrite)
259
7.11M
      {
260
          /* note: everything falls through. */
261
603
        case 4:
262
603
          *--target = (uint8_t)((ch | byteMark) & byteMask);
263
603
          ch >>= 6;
264
          /* fallthrough */
265
603
          WINPR_FALLTHROUGH
266
561k
        case 3:
267
561k
          *--target = (uint8_t)((ch | byteMark) & byteMask);
268
561k
          ch >>= 6;
269
          /* fallthrough */
270
561k
          WINPR_FALLTHROUGH
271
272
596k
        case 2:
273
596k
          *--target = (uint8_t)((ch | byteMark) & byteMask);
274
596k
          ch >>= 6;
275
          /* fallthrough */
276
596k
          WINPR_FALLTHROUGH
277
278
7.11M
        case 1:
279
7.11M
          *--target = (uint8_t)(ch | firstByteMark[bytesToWrite]);
280
7.11M
          break;
281
0
        default:
282
0
          return sourceIllegal;
283
7.11M
      }
284
7.11M
    }
285
552k
    else
286
552k
    {
287
552k
      switch (bytesToWrite)
288
552k
      {
289
          /* note: everything falls through. */
290
606
        case 4:
291
606
          --target;
292
          /* fallthrough */
293
606
          WINPR_FALLTHROUGH
294
295
308k
        case 3:
296
308k
          --target;
297
          /* fallthrough */
298
308k
          WINPR_FALLTHROUGH
299
300
328k
        case 2:
301
328k
          --target;
302
          /* fallthrough */
303
328k
          WINPR_FALLTHROUGH
304
305
552k
        case 1:
306
552k
          --target;
307
552k
          break;
308
0
        default:
309
0
          return sourceIllegal;
310
552k
      }
311
552k
    }
312
313
7.66M
    target += bytesToWrite;
314
7.66M
  }
315
316
456k
  *sourceStart = source;
317
456k
  *targetStart = target;
318
456k
  return result;
319
456k
}
320
321
/* --------------------------------------------------------------------- */
322
323
/*
324
 * Utility routine to tell whether a sequence of bytes is legal UTF-8.
325
 * This must be called with the length pre-determined by the first byte.
326
 * If not calling this from ConvertUTF8to*, then the length can be set by:
327
 *  length = trailingBytesForUTF8[*source]+1;
328
 * and the sequence is illegal right away if there aren't that many bytes
329
 * available.
330
 * If presented with a length > 4, this returns false.  The Unicode
331
 * definition of UTF-8 goes up to 4-byte sequences.
332
 */
333
334
static bool isLegalUTF8(const uint8_t* source, int length)
335
14.6M
{
336
14.6M
  uint8_t a = 0;
337
14.6M
  const uint8_t* srcptr = source + length;
338
339
14.6M
  switch (length)
340
14.6M
  {
341
4
    default:
342
4
      return false;
343
344
      /* Everything else falls through when "true"... */
345
817
    case 4:
346
817
      if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
347
12
        return false;
348
      /* fallthrough */
349
805
      WINPR_FALLTHROUGH
350
351
1.65k
    case 3:
352
1.65k
      if ((a = (*--srcptr)) < 0x80 || a > 0xBF)
353
12
        return false;
354
      /* fallthrough */
355
1.63k
      WINPR_FALLTHROUGH
356
357
1.99k
    case 2:
358
1.99k
      if ((a = (*--srcptr)) > 0xBF)
359
2
        return false;
360
361
1.99k
      switch (*source)
362
1.99k
      {
363
          /* no fall-through in this inner switch */
364
336
        case 0xE0:
365
336
          if (a < 0xA0)
366
11
            return false;
367
368
325
          break;
369
370
325
        case 0xED:
371
227
          if (a > 0x9F)
372
3
            return false;
373
374
224
          break;
375
376
273
        case 0xF0:
377
273
          if (a < 0x90)
378
1
            return false;
379
380
272
          break;
381
382
517
        case 0xF4:
383
517
          if (a > 0x8F)
384
1
            return false;
385
386
516
          break;
387
388
639
        default:
389
639
          if (a < 0x80)
390
8
            return false;
391
631
          break;
392
1.99k
      }
393
      /* fallthrough */
394
1.96k
      WINPR_FALLTHROUGH
395
396
14.6M
    case 1:
397
14.6M
      if (*source >= 0x80 && *source < 0xC2)
398
23
        return false;
399
14.6M
  }
400
401
14.6M
  if (*source > 0xF4)
402
2
    return false;
403
404
14.6M
  return true;
405
14.6M
}
406
407
/* --------------------------------------------------------------------- */
408
409
static ConversionResult winpr_ConvertUTF8toUTF16_Internal(const uint8_t** sourceStart,
410
                                                          const uint8_t* sourceEnd,
411
                                                          uint16_t** targetStart,
412
                                                          const uint16_t* targetEnd,
413
                                                          ConversionFlags flags)
414
428k
{
415
428k
  bool computeLength = (!targetEnd) ? true : false;
416
428k
  ConversionResult result = conversionOK;
417
428k
  const uint8_t* source = *sourceStart;
418
428k
  uint16_t* target = *targetStart;
419
420
15.1M
  while (source < sourceEnd)
421
14.6M
  {
422
14.6M
    uint32_t ch = 0;
423
14.6M
    unsigned short extraBytesToRead =
424
14.6M
        WINPR_ASSERTING_INT_CAST(unsigned short, trailingBytesForUTF8[*source]);
425
426
14.6M
    if ((source + extraBytesToRead) >= sourceEnd)
427
1
    {
428
1
      result = sourceExhausted;
429
1
      break;
430
1
    }
431
432
    /* Do this check whether lenient or strict */
433
14.6M
    if (!isLegalUTF8(source, extraBytesToRead + 1))
434
79
    {
435
79
      result = sourceIllegal;
436
79
      break;
437
79
    }
438
439
    /*
440
     * The cases all fall through. See "Note A" below.
441
     */
442
14.6M
    switch (extraBytesToRead)
443
14.6M
    {
444
0
      case 5:
445
0
        ch += *source++;
446
0
        ch <<= 6; /* remember, illegal UTF-8 */
447
                  /* fallthrough */
448
0
        WINPR_FALLTHROUGH
449
450
0
      case 4:
451
0
        ch += *source++;
452
0
        ch <<= 6; /* remember, illegal UTF-8 */
453
                  /* fallthrough */
454
0
        WINPR_FALLTHROUGH
455
456
800
      case 3:
457
800
        ch += *source++;
458
800
        ch <<= 6;
459
        /* fallthrough */
460
800
        WINPR_FALLTHROUGH
461
462
1.61k
      case 2:
463
1.61k
        ch += *source++;
464
1.61k
        ch <<= 6;
465
        /* fallthrough */
466
1.61k
        WINPR_FALLTHROUGH
467
468
1.96k
      case 1:
469
1.96k
        ch += *source++;
470
1.96k
        ch <<= 6;
471
        /* fallthrough */
472
1.96k
        WINPR_FALLTHROUGH
473
474
14.6M
      case 0:
475
14.6M
        ch += *source++;
476
14.6M
        break;
477
0
      default:
478
0
        return sourceIllegal;
479
14.6M
    }
480
481
14.6M
    ch -= offsetsFromUTF8[extraBytesToRead];
482
483
14.6M
    if ((target >= targetEnd) && (!computeLength))
484
0
    {
485
0
      source -= (extraBytesToRead + 1); /* Back up source pointer! */
486
0
      result = targetExhausted;
487
0
      break;
488
0
    }
489
490
14.6M
    if (ch <= UNI_MAX_BMP)
491
14.6M
    {
492
      /* Target is a character <= 0xFFFF */
493
      /* UTF-16 surrogate values are illegal in UTF-32 */
494
14.6M
      if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END)
495
0
      {
496
0
        if (flags == strictConversion)
497
0
        {
498
0
          source -= (extraBytesToRead + 1); /* return to the illegal value itself */
499
0
          result = sourceIllegal;
500
0
          break;
501
0
        }
502
0
        else
503
0
        {
504
0
          if (!computeLength)
505
0
            *target++ = setWcharFrom(UNI_REPLACEMENT_CHAR);
506
0
          else
507
0
            target++;
508
0
        }
509
0
      }
510
14.6M
      else
511
14.6M
      {
512
14.6M
        if (!computeLength)
513
11.3M
          *target++ = setWcharFrom((WCHAR)ch); /* normal case */
514
3.33M
        else
515
3.33M
          target++;
516
14.6M
      }
517
14.6M
    }
518
800
    else if (ch > UNI_MAX_UTF16)
519
0
    {
520
0
      if (flags == strictConversion)
521
0
      {
522
0
        result = sourceIllegal;
523
0
        source -= (extraBytesToRead + 1); /* return to the start */
524
0
        break;                            /* Bail out; shouldn't continue */
525
0
      }
526
0
      else
527
0
      {
528
0
        if (!computeLength)
529
0
          *target++ = setWcharFrom(UNI_REPLACEMENT_CHAR);
530
0
        else
531
0
          target++;
532
0
      }
533
0
    }
534
800
    else
535
800
    {
536
      /* target is a character in range 0xFFFF - 0x10FFFF. */
537
800
      if ((target + 1 >= targetEnd) && (!computeLength))
538
0
      {
539
0
        source -= (extraBytesToRead + 1); /* Back up source pointer! */
540
0
        result = targetExhausted;
541
0
        break;
542
0
      }
543
544
800
      ch -= halfBase;
545
546
800
      if (!computeLength)
547
392
      {
548
392
        *target++ = setWcharFrom((WCHAR)((ch >> halfShift) + UNI_SUR_HIGH_START));
549
392
        *target++ = setWcharFrom((WCHAR)((ch & halfMask) + UNI_SUR_LOW_START));
550
392
      }
551
408
      else
552
408
      {
553
408
        target++;
554
408
        target++;
555
408
      }
556
800
    }
557
14.6M
  }
558
559
428k
  *sourceStart = source;
560
428k
  *targetStart = target;
561
428k
  return result;
562
428k
}
563
564
/**
565
 * WinPR built-in Unicode API
566
 */
567
568
static int winpr_ConvertUTF8toUTF16(const uint8_t* src, int cchSrc, uint16_t* dst, int cchDst)
569
428k
{
570
428k
  size_t length = 0;
571
428k
  uint16_t* dstBeg = nullptr;
572
428k
  uint16_t* dstEnd = nullptr;
573
428k
  const uint8_t* srcBeg = nullptr;
574
428k
  const uint8_t* srcEnd = nullptr;
575
428k
  ConversionResult result = sourceIllegal;
576
577
428k
  if (cchSrc == -1)
578
0
    cchSrc = (int)strnlen((const char*)src, INT32_MAX - 1) + 1;
579
580
428k
  srcBeg = src;
581
428k
  srcEnd = &src[cchSrc];
582
583
428k
  if (cchDst == 0)
584
1.06k
  {
585
1.06k
    result =
586
1.06k
        winpr_ConvertUTF8toUTF16_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion);
587
588
1.06k
    length = dstBeg - (uint16_t*)nullptr;
589
1.06k
  }
590
427k
  else
591
427k
  {
592
427k
    dstBeg = dst;
593
427k
    dstEnd = &dst[cchDst];
594
595
427k
    result =
596
427k
        winpr_ConvertUTF8toUTF16_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion);
597
598
427k
    length = dstBeg - dst;
599
427k
  }
600
601
428k
  if (result == targetExhausted)
602
0
  {
603
0
    SetLastError(ERROR_INSUFFICIENT_BUFFER);
604
0
    return 0;
605
0
  }
606
607
428k
  return (result == conversionOK) ? WINPR_ASSERTING_INT_CAST(int, length) : 0;
608
428k
}
609
610
static int winpr_ConvertUTF16toUTF8(const uint16_t* src, int cchSrc, uint8_t* dst, int cchDst)
611
456k
{
612
456k
  size_t length = 0;
613
456k
  uint8_t* dstBeg = nullptr;
614
456k
  uint8_t* dstEnd = nullptr;
615
456k
  const uint16_t* srcBeg = nullptr;
616
456k
  const uint16_t* srcEnd = nullptr;
617
456k
  ConversionResult result = sourceIllegal;
618
619
456k
  if (cchSrc == -1)
620
0
    cchSrc = (int)_wcsnlen((const WCHAR*)src, INT32_MAX - 1) + 1;
621
622
456k
  srcBeg = src;
623
456k
  srcEnd = &src[cchSrc];
624
625
456k
  if (cchDst == 0)
626
54.3k
  {
627
54.3k
    result =
628
54.3k
        winpr_ConvertUTF16toUTF8_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion);
629
630
54.3k
    length = dstBeg - ((uint8_t*)nullptr);
631
54.3k
  }
632
402k
  else
633
402k
  {
634
402k
    dstBeg = dst;
635
402k
    dstEnd = &dst[cchDst];
636
637
402k
    result =
638
402k
        winpr_ConvertUTF16toUTF8_Internal(&srcBeg, srcEnd, &dstBeg, dstEnd, strictConversion);
639
640
402k
    length = dstBeg - dst;
641
402k
  }
642
643
456k
  if (result == targetExhausted)
644
1.51k
  {
645
1.51k
    SetLastError(ERROR_INSUFFICIENT_BUFFER);
646
1.51k
    return 0;
647
1.51k
  }
648
649
455k
  return (result == conversionOK) ? WINPR_ASSERTING_INT_CAST(int, length) : 0;
650
456k
}
651
652
/* --------------------------------------------------------------------- */
653
654
int int_MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
655
                            LPWSTR lpWideCharStr, int cchWideChar)
656
428k
{
657
428k
  size_t cbCharLen = (size_t)cbMultiByte;
658
659
428k
  WINPR_UNUSED(dwFlags);
660
661
  /* If cbMultiByte is 0, the function fails */
662
428k
  if ((cbMultiByte == 0) || (cbMultiByte < -1))
663
0
    return 0;
664
665
428k
  if (cchWideChar < 0)
666
0
    return -1;
667
668
428k
  if (cbMultiByte < 0)
669
0
  {
670
0
    const size_t len = strlen(lpMultiByteStr);
671
0
    if (len >= INT32_MAX)
672
0
      return 0;
673
0
    cbCharLen = (int)len + 1;
674
0
  }
675
428k
  else
676
428k
    cbCharLen = cbMultiByte;
677
678
428k
  WINPR_ASSERT(lpMultiByteStr);
679
428k
  switch (CodePage)
680
428k
  {
681
0
    case CP_ACP:
682
428k
    case CP_UTF8:
683
428k
      break;
684
685
0
    default:
686
0
      WLog_ERR(TAG, "Unsupported encoding %u", CodePage);
687
0
      return 0;
688
428k
  }
689
690
428k
  return winpr_ConvertUTF8toUTF16((const uint8_t*)lpMultiByteStr,
691
428k
                                  WINPR_ASSERTING_INT_CAST(int, cbCharLen),
692
428k
                                  (uint16_t*)lpWideCharStr, cchWideChar);
693
428k
}
694
695
int int_WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
696
                            LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
697
                            LPBOOL lpUsedDefaultChar)
698
456k
{
699
456k
  size_t cbCharLen = (size_t)cchWideChar;
700
701
456k
  WINPR_UNUSED(dwFlags);
702
  /* If cchWideChar is 0, the function fails */
703
456k
  if ((cchWideChar == 0) || (cchWideChar < -1))
704
0
    return 0;
705
706
456k
  if (cbMultiByte < 0)
707
0
    return -1;
708
709
456k
  WINPR_ASSERT(lpWideCharStr);
710
  /* If cchWideChar is -1, the string is null-terminated */
711
456k
  if (cchWideChar == -1)
712
0
  {
713
0
    const size_t len = _wcslen(lpWideCharStr);
714
0
    if (len >= INT32_MAX)
715
0
      return 0;
716
0
    cbCharLen = (int)len + 1;
717
0
  }
718
456k
  else
719
456k
    cbCharLen = cchWideChar;
720
721
  /*
722
   * if cbMultiByte is 0, the function returns the required buffer size
723
   * in bytes for lpMultiByteStr and makes no use of the output parameter itself.
724
   */
725
726
456k
  return winpr_ConvertUTF16toUTF8((const uint16_t*)lpWideCharStr,
727
456k
                                  WINPR_ASSERTING_INT_CAST(int, cbCharLen),
728
456k
                                  (uint8_t*)lpMultiByteStr, cbMultiByte);
729
456k
}