/src/FreeRDP/winpr/libwinpr/crt/unicode.c

Source
/**
 * WinPR: Windows Portable Runtime
 * Unicode Conversion (CRT)
 *
 * Copyright 2012 Marc-Andre Moreau <marcandre.moreau@gmail.com>
 * Copyright 2022 Armin Novak <anovak@thincast.com>
 * Copyright 2022 Thincast Technologies GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <winpr/config.h>
#include <winpr/assert.h>

#include <errno.h>
#include <wctype.h>

#include <winpr/crt.h>
#include <winpr/error.h>
#include <winpr/print.h>

#ifndef _WIN32

#include "unicode.h"

/**
 * Notes on cross-platform Unicode portability:
 *
 * Unicode has many possible Unicode Transformation Format (UTF) encodings,
 * where some of the most commonly used are UTF-8, UTF-16 and sometimes UTF-32.
 *
 * The number in the UTF encoding name (8, 16, 32) refers to the number of bits
 * per code unit. A code unit is the minimal bit combination that can represent
 * a unit of encoded text in the given encoding. For instance, UTF-8 encodes
 * the English alphabet using 8 bits (or one byte) each, just like in ASCII.
 *
 * However, the total number of code points (values in the Unicode codespace)
 * only fits completely within 32 bits. This means that for UTF-8 and UTF-16,
 * more than one code unit may be required to fully encode a specific value.
 * UTF-8 and UTF-16 are variable-width encodings, while UTF-32 is fixed-width.
 *
 * UTF-8 has the advantage of being backwards compatible with ASCII, and is
 * one of the most commonly used Unicode encoding.
 *
 * UTF-16 is used everywhere in the Windows API. The strategy employed by
 * Microsoft to provide backwards compatibility in their API was to create
 * an ANSI and a Unicode version of the same function, ending with A (ANSI)
 * and W (Wide character, or UTF-16 Unicode). In headers, the original
 * function name is replaced by a macro that defines to either the ANSI
 * or Unicode version based on the definition of the _UNICODE macro.
 *
 * UTF-32 has the advantage of being fixed width, but wastes a lot of space
 * for English text (4x more than UTF-8, 2x more than UTF-16).
 *
 * In C, wide character strings are often defined with the wchar_t type.
 * Many functions are provided to deal with those wide character strings,
 * such as wcslen (strlen equivalent) or wprintf (printf equivalent).
 *
 * This may lead to some confusion, since many of these functions exist
 * on both Windows and Linux, but they are *not* the same!
 *
 * This sample hello world is a good example:
 *
 * #include <wchar.h>
 *
 * wchar_t hello[] = L"Hello, World!\n";
 *
 * int main(int argc, char** argv)
 * {
 *  wprintf(hello);
 *  wprintf(L"sizeof(wchar_t): %d\n", sizeof(wchar_t));
 *  return 0;
 * }
 *
 * There is a reason why the sample prints the size of the wchar_t type:
 * On Windows, wchar_t is two bytes (UTF-16), while on most other systems
 * it is 4 bytes (UTF-32). This means that if you write code on Windows,
 * use L"" to define a string which is meant to be UTF-16 and not UTF-32,
 * you will have a little surprise when trying to port your code to Linux.
 *
 * Since the Windows API uses UTF-16, not UTF-32, WinPR defines the WCHAR
 * type to always be 2-bytes long and uses it instead of wchar_t. Do not
 * ever use wchar_t with WinPR unless you know what you are doing.
 *
 * As for L"", it is unfortunately unusable in a portable way, unless a
 * special option is passed to GCC to define wchar_t as being two bytes.
 * For string constants that must be UTF-16, it is a pain, but they can
 * be defined in a portable way like this:
 *
 * WCHAR hello[] = { 'H','e','l','l','o','\0' };
 *
 * Such strings cannot be passed to native functions like wcslen(), which
 * may expect a different wchar_t size. For this reason, WinPR provides
 * _wcslen, which expects UTF-16 WCHAR strings on all platforms.
 *
 */

/** \deprecated We no longer export this function, see ConvertUtf8ToWChar family of functions for a
 * replacement
 *
 * Conversion to Unicode (UTF-16)
 * MultiByteToWideChar: http://msdn.microsoft.com/en-us/library/windows/desktop/dd319072/
 *
 * cbMultiByte is an input size in bytes (BYTE)
 * cchWideChar is an output size in wide characters (WCHAR)
 *
 * Null-terminated UTF-8 strings:
 *
 * cchWideChar *cannot* be assumed to be cbMultiByte since UTF-8 is variable-width!
 *
 * Instead, obtain the required cchWideChar output size like this:
 * cchWideChar = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) lpMultiByteStr, -1, nullptr, 0);
 *
 * A value of -1 for cbMultiByte indicates that the input string is null-terminated,
 * and the null terminator *will* be processed. The size returned by MultiByteToWideChar
 * will therefore include the null terminator. Equivalent behavior can be obtained by
 * computing the length in bytes of the input buffer, including the null terminator:
 *
 * cbMultiByte = strlen((char*) lpMultiByteStr) + 1;
 *
 * An output buffer of the proper size can then be allocated:
 *
 * lpWideCharStr = (LPWSTR) malloc(cchWideChar * sizeof(WCHAR));
 *
 * Since cchWideChar is an output size in wide characters, the actual buffer size is:
 * (cchWideChar * sizeof(WCHAR)) or (cchWideChar * 2)
 *
 * Finally, perform the conversion:
 *
 * cchWideChar = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) lpMultiByteStr, -1, lpWideCharStr,
 * cchWideChar);
 *
 * The value returned by MultiByteToWideChar corresponds to the number of wide characters written
 * to the output buffer, and should match the value obtained on the first call to
 * MultiByteToWideChar.
 *
 */

#if !defined(WITH_WINPR_DEPRECATED)
static
#endif
    int
    MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
                        LPWSTR lpWideCharStr, int cchWideChar)
{
  return int_MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, lpWideCharStr,
                                 cchWideChar);
}

/** \deprecated We no longer export this function, see ConvertWCharToUtf8 family of functions for a
 * replacement
 *
 * Conversion from Unicode (UTF-16)
 * WideCharToMultiByte: http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130/
 *
 * cchWideChar is an input size in wide characters (WCHAR)
 * cbMultiByte is an output size in bytes (BYTE)
 *
 * Null-terminated UTF-16 strings:
 *
 * cbMultiByte *cannot* be assumed to be cchWideChar since UTF-8 is variable-width!
 *
 * Instead, obtain the required cbMultiByte output size like this:
 * cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) lpWideCharStr, -1, nullptr, 0, nullptr,
 * nullptr);
 *
 * A value of -1 for cbMultiByte indicates that the input string is null-terminated,
 * and the null terminator *will* be processed. The size returned by WideCharToMultiByte
 * will therefore include the null terminator. Equivalent behavior can be obtained by
 * computing the length in bytes of the input buffer, including the null terminator:
 *
 * cchWideChar = _wcslen((WCHAR*) lpWideCharStr) + 1;
 *
 * An output buffer of the proper size can then be allocated:
 * lpMultiByteStr = (LPSTR) malloc(cbMultiByte);
 *
 * Since cbMultiByte is an output size in bytes, it is the same as the buffer size
 *
 * Finally, perform the conversion:
 *
 * cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) lpWideCharStr, -1, lpMultiByteStr,
 * cbMultiByte, nullptr, nullptr);
 *
 * The value returned by WideCharToMultiByte corresponds to the number of bytes written
 * to the output buffer, and should match the value obtained on the first call to
 * WideCharToMultiByte.
 *
 */

#if !defined(WITH_WINPR_DEPRECATED)
static
#endif
    int
    WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
                        LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
                        LPBOOL lpUsedDefaultChar)
{
  return int_WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, lpMultiByteStr,
                                 cbMultiByte, lpDefaultChar, lpUsedDefaultChar);
}

#endif

/**
 * ConvertToUnicode is a convenience wrapper for MultiByteToWideChar:
 *
 * If the lpWideCharStr parameter for the converted string points to nullptr
 * or if the cchWideChar parameter is set to 0 this function will automatically
 * allocate the required memory which is guaranteed to be null-terminated
 * after the conversion, even if the source c string isn't.
 *
 * If the cbMultiByte parameter is set to -1 the passed lpMultiByteStr must
 * be null-terminated and the required length for the converted string will be
 * calculated accordingly.
 */
#if defined(WITH_WINPR_DEPRECATED)
int ConvertToUnicode(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
                     LPWSTR* lpWideCharStr, int cchWideChar)
{
  int status = 0;
  BOOL allocate = FALSE;

  if (!lpMultiByteStr)
    return 0;

  if (!lpWideCharStr)
    return 0;

  if (cbMultiByte == -1)
  {
    size_t len = strnlen(lpMultiByteStr, INT_MAX);
    if (len >= INT_MAX)
      return 0;
    cbMultiByte = (int)(len + 1);
  }

  if (cchWideChar == 0)
  {
    cchWideChar =
        MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, nullptr, 0);
    allocate = TRUE;
  }
  else if (!(*lpWideCharStr))
    allocate = TRUE;

  if (cchWideChar < 1)
    return 0;

  if (allocate)
  {
    *lpWideCharStr = (LPWSTR)calloc((size_t)cchWideChar + 1ull, sizeof(WCHAR));

    if (!(*lpWideCharStr))
    {
      // SetLastError(ERROR_INSUFFICIENT_BUFFER);
      return 0;
    }
  }

  status = MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, *lpWideCharStr,
                               cchWideChar);

  if (status != cchWideChar)
  {
    if (allocate)
    {
      free(*lpWideCharStr);
      *lpWideCharStr = nullptr;
      status = 0;
    }
  }

  return status;
}
#endif

/**
 * ConvertFromUnicode is a convenience wrapper for WideCharToMultiByte:
 *
 * If the lpMultiByteStr parameter for the converted string points to nullptr
 * or if the cbMultiByte parameter is set to 0 this function will automatically
 * allocate the required memory which is guaranteed to be null-terminated
 * after the conversion, even if the source unicode string isn't.
 *
 * If the cchWideChar parameter is set to -1 the passed lpWideCharStr must
 * be null-terminated and the required length for the converted string will be
 * calculated accordingly.
 */
#if defined(WITH_WINPR_DEPRECATED)
int ConvertFromUnicode(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
                       LPSTR* lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
                       LPBOOL lpUsedDefaultChar)
{
  int status = 0;
  BOOL allocate = FALSE;

  if (!lpWideCharStr)
    return 0;

  if (!lpMultiByteStr)
    return 0;

  if (cchWideChar == -1)
    cchWideChar = (int)(_wcslen(lpWideCharStr) + 1);

  if (cbMultiByte == 0)
  {
    cbMultiByte = WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, nullptr, 0,
                                      nullptr, nullptr);
    allocate = TRUE;
  }
  else if (!(*lpMultiByteStr))
    allocate = TRUE;

  if (cbMultiByte < 1)
    return 0;

  if (allocate)
  {
    *lpMultiByteStr = (LPSTR)calloc(1, (size_t)cbMultiByte + 1ull);

    if (!(*lpMultiByteStr))
    {
      // SetLastError(ERROR_INSUFFICIENT_BUFFER);
      return 0;
    }
  }

  status = WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, *lpMultiByteStr,
                               cbMultiByte, lpDefaultChar, lpUsedDefaultChar);

  if ((status != cbMultiByte) && allocate)
  {
    status = 0;
  }

  if ((status <= 0) && allocate)
  {
    free(*lpMultiByteStr);
    *lpMultiByteStr = nullptr;
  }

  return status;
}
#endif

/**
 * Swap Unicode byte order (UTF16LE <-> UTF16BE)
 */

const WCHAR* ByteSwapUnicode(WCHAR* wstr, size_t length)
{
  WINPR_ASSERT(wstr || (length == 0));

  for (size_t x = 0; x < length; x++)
    wstr[x] = _byteswap_ushort(wstr[x]);
  return wstr;
}

SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len)
{
  if (!wstr)
  {
    if (str && len)
      str[0] = 0;
    return 0;
  }

  const size_t wlen = _wcslen(wstr);
  return ConvertWCharNToUtf8(wstr, wlen + 1, str, len);
}

SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len)
{
  BOOL isNullTerminated = FALSE;
  if (wlen == 0)
    return 0;

  WINPR_ASSERT(wstr);
  size_t iwlen = _wcsnlen(wstr, wlen);

  if ((len > INT32_MAX) || (wlen > INT32_MAX))
  {
    SetLastError(ERROR_INVALID_PARAMETER);
    return -1;
  }

  if (iwlen < wlen)
  {
    isNullTerminated = TRUE;
    iwlen++;
  }
  WINPR_PRAGMA_DIAG_PUSH
  WINPR_PRAGMA_DIAG_IGNORED_DEPRECATED_DECL
  const int rc =
      WideCharToMultiByte(CP_UTF8, 0, wstr, (int)iwlen, str, (int)len, nullptr, nullptr);
  WINPR_PRAGMA_DIAG_POP
  if ((rc <= 0) || ((len > 0) && ((size_t)rc > len)))
    return -1;
  else if (!isNullTerminated)
  {
    if (str && ((size_t)rc < len))
      str[rc] = '\0';
    return rc;
  }
  else if ((size_t)rc == len)
  {
    if (str && (str[rc - 1] != '\0'))
      return rc;
  }
  return rc - 1;
}

SSIZE_T ConvertMszWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len)
{
  if (wlen == 0)
    return 0;

  WINPR_ASSERT(wstr);

  if ((len > INT32_MAX) || (wlen > INT32_MAX))
  {
    SetLastError(ERROR_INVALID_PARAMETER);
    return -1;
  }

  const int iwlen = (int)len;
  WINPR_PRAGMA_DIAG_PUSH
  WINPR_PRAGMA_DIAG_IGNORED_DEPRECATED_DECL
  const int rc = WideCharToMultiByte(CP_UTF8, 0, wstr, (int)wlen, str, iwlen, nullptr, nullptr);
  WINPR_PRAGMA_DIAG_POP
  if ((rc <= 0) || ((len > 0) && (rc > iwlen)))
    return -1;

  return rc;
}

SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen)
{
  if (!str)
  {
    if (wstr && wlen)
      wstr[0] = 0;
    return 0;
  }

  const size_t len = strlen(str);
  return ConvertUtf8NToWChar(str, len + 1, wstr, wlen);
}

SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
{
  size_t ilen = strnlen(str, len);
  BOOL isNullTerminated = FALSE;
  if (len == 0)
    return 0;

  WINPR_ASSERT(str);

  if ((len > INT32_MAX) || (wlen > INT32_MAX))
  {
    SetLastError(ERROR_INVALID_PARAMETER);
    return -1;
  }
  if (ilen < len)
  {
    isNullTerminated = TRUE;
    ilen++;
  }

  const int iwlen = (int)wlen;
  WINPR_PRAGMA_DIAG_PUSH
  WINPR_PRAGMA_DIAG_IGNORED_DEPRECATED_DECL
  const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)ilen, wstr, iwlen);
  WINPR_PRAGMA_DIAG_POP
  if ((rc <= 0) || ((wlen > 0) && (rc > iwlen)))
    return -1;
  if (!isNullTerminated)
  {
    if (wstr && (rc < iwlen))
      wstr[rc] = '\0';
    return rc;
  }
  else if (rc == iwlen)
  {
    if (wstr && (wstr[rc - 1] != '\0'))
      return rc;
  }
  return rc - 1;
}

SSIZE_T ConvertMszUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
{
  if (len == 0)
    return 0;

  WINPR_ASSERT(str);

  if ((len > INT32_MAX) || (wlen > INT32_MAX))
  {
    SetLastError(ERROR_INVALID_PARAMETER);
    return -1;
  }

  const int iwlen = (int)wlen;
  WINPR_PRAGMA_DIAG_PUSH
  WINPR_PRAGMA_DIAG_IGNORED_DEPRECATED_DECL
  const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)len, wstr, iwlen);
  WINPR_PRAGMA_DIAG_POP
  if ((rc <= 0) || ((wlen > 0) && (rc > iwlen)))
    return -1;

  return rc;
}

char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pUtfCharLength)
{
  char* tmp = nullptr;
  const SSIZE_T rc = ConvertWCharToUtf8(wstr, nullptr, 0);
  if (pUtfCharLength)
    *pUtfCharLength = 0;
  if (rc < 0)
    return nullptr;
  tmp = calloc((size_t)rc + 1ull, sizeof(char));
  if (!tmp)
    return nullptr;
  const SSIZE_T rc2 = ConvertWCharToUtf8(wstr, tmp, (size_t)rc + 1ull);
  if (rc2 < 0)
  {
    free(tmp);
    return nullptr;
  }
  WINPR_ASSERT(rc == rc2);
  if (pUtfCharLength)
    *pUtfCharLength = (size_t)rc2;
  return tmp;
}

char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength)
{
  char* tmp = nullptr;
  const SSIZE_T rc = ConvertWCharNToUtf8(wstr, wlen, nullptr, 0);

  if (pUtfCharLength)
    *pUtfCharLength = 0;
  if (rc < 0)
    return nullptr;
  tmp = calloc((size_t)rc + 1ull, sizeof(char));
  if (!tmp)
    return nullptr;
  const SSIZE_T rc2 = ConvertWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 1ull);
  if (rc2 < 0)
  {
    free(tmp);
    return nullptr;
  }
  WINPR_ASSERT(rc == rc2);
  if (pUtfCharLength)
    *pUtfCharLength = (size_t)rc2;
  return tmp;
}

char* ConvertMszWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength)
{
  char* tmp = nullptr;
  const SSIZE_T rc = ConvertMszWCharNToUtf8(wstr, wlen, nullptr, 0);

  if (pUtfCharLength)
    *pUtfCharLength = 0;
  if (rc < 0)
    return nullptr;
  tmp = calloc((size_t)rc + 1ull, sizeof(char));
  if (!tmp)
    return nullptr;
  const SSIZE_T rc2 = ConvertMszWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 1ull);
  if (rc2 < 0)
  {
    free(tmp);
    return nullptr;
  }
  WINPR_ASSERT(rc == rc2);
  if (pUtfCharLength)
    *pUtfCharLength = (size_t)rc2;
  return tmp;
}

WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize)
{
  WCHAR* tmp = nullptr;
  const SSIZE_T rc = ConvertUtf8ToWChar(str, nullptr, 0);
  if (pSize)
    *pSize = 0;
  if (rc < 0)
    return nullptr;
  tmp = calloc((size_t)rc + 1ull, sizeof(WCHAR));
  if (!tmp)
    return nullptr;
  const SSIZE_T rc2 = ConvertUtf8ToWChar(str, tmp, (size_t)rc + 1ull);
  if (rc2 < 0)
  {
    free(tmp);
    return nullptr;
  }
  WINPR_ASSERT(rc == rc2);
  if (pSize)
    *pSize = (size_t)rc2;
  return tmp;
}

WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
{
  WCHAR* tmp = nullptr;
  const SSIZE_T rc = ConvertUtf8NToWChar(str, len, nullptr, 0);
  if (pSize)
    *pSize = 0;
  if (rc < 0)
    return nullptr;
  tmp = calloc((size_t)rc + 1ull, sizeof(WCHAR));
  if (!tmp)
    return nullptr;
  const SSIZE_T rc2 = ConvertUtf8NToWChar(str, len, tmp, (size_t)rc + 1ull);
  if (rc2 < 0)
  {
    free(tmp);
    return nullptr;
  }
  WINPR_ASSERT(rc == rc2);
  if (pSize)
    *pSize = (size_t)rc2;
  return tmp;
}

WCHAR* ConvertMszUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
{
  WCHAR* tmp = nullptr;
  const SSIZE_T rc = ConvertMszUtf8NToWChar(str, len, nullptr, 0);
  if (pSize)
    *pSize = 0;
  if (rc < 0)
    return nullptr;
  tmp = calloc((size_t)rc + 1ull, sizeof(WCHAR));
  if (!tmp)
    return nullptr;
  const SSIZE_T rc2 = ConvertMszUtf8NToWChar(str, len, tmp, (size_t)rc + 1ull);
  if (rc2 < 0)
  {
    free(tmp);
    return nullptr;
  }
  WINPR_ASSERT(rc == rc2);
  if (pSize)
    *pSize = (size_t)rc2;
  return tmp;
}

Coverage Report

Created: 2026-05-11 06:55

Line	Count	Source
1		/**
2		* WinPR: Windows Portable Runtime
3		* Unicode Conversion (CRT)
4		*
5		* Copyright 2012 Marc-Andre Moreau <marcandre.moreau@gmail.com>
6		* Copyright 2022 Armin Novak <anovak@thincast.com>
7		* Copyright 2022 Thincast Technologies GmbH
8		*
9		* Licensed under the Apache License, Version 2.0 (the "License");
10		* you may not use this file except in compliance with the License.
11		* You may obtain a copy of the License at
12		*
13		* http://www.apache.org/licenses/LICENSE-2.0
14		*
15		* Unless required by applicable law or agreed to in writing, software
16		* distributed under the License is distributed on an "AS IS" BASIS,
17		* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18		* See the License for the specific language governing permissions and
19		* limitations under the License.
20		*/
21
22		#include <winpr/config.h>
23		#include <winpr/assert.h>
24
25		#include <errno.h>
26		#include <wctype.h>
27
28		#include <winpr/crt.h>
29		#include <winpr/error.h>
30		#include <winpr/print.h>
31
32		#ifndef _WIN32
33
34		#include "unicode.h"
35
36		/**
37		* Notes on cross-platform Unicode portability:
38		*
39		* Unicode has many possible Unicode Transformation Format (UTF) encodings,
40		* where some of the most commonly used are UTF-8, UTF-16 and sometimes UTF-32.
41		*
42		* The number in the UTF encoding name (8, 16, 32) refers to the number of bits
43		* per code unit. A code unit is the minimal bit combination that can represent
44		* a unit of encoded text in the given encoding. For instance, UTF-8 encodes
45		* the English alphabet using 8 bits (or one byte) each, just like in ASCII.
46		*
47		* However, the total number of code points (values in the Unicode codespace)
48		* only fits completely within 32 bits. This means that for UTF-8 and UTF-16,
49		* more than one code unit may be required to fully encode a specific value.
50		* UTF-8 and UTF-16 are variable-width encodings, while UTF-32 is fixed-width.
51		*
52		* UTF-8 has the advantage of being backwards compatible with ASCII, and is
53		* one of the most commonly used Unicode encoding.
54		*
55		* UTF-16 is used everywhere in the Windows API. The strategy employed by
56		* Microsoft to provide backwards compatibility in their API was to create
57		* an ANSI and a Unicode version of the same function, ending with A (ANSI)
58		* and W (Wide character, or UTF-16 Unicode). In headers, the original
59		* function name is replaced by a macro that defines to either the ANSI
60		* or Unicode version based on the definition of the _UNICODE macro.
61		*
62		* UTF-32 has the advantage of being fixed width, but wastes a lot of space
63		* for English text (4x more than UTF-8, 2x more than UTF-16).
64		*
65		* In C, wide character strings are often defined with the wchar_t type.
66		* Many functions are provided to deal with those wide character strings,
67		* such as wcslen (strlen equivalent) or wprintf (printf equivalent).
68		*
69		* This may lead to some confusion, since many of these functions exist
70		* on both Windows and Linux, but they are not the same!
71		*
72		* This sample hello world is a good example:
73		*
74		* #include <wchar.h>
75		*
76		* wchar_t hello[] = L"Hello, World!\n";
77		*
78		* int main(int argc, char** argv)
79		* {
80		* wprintf(hello);
81		* wprintf(L"sizeof(wchar_t): %d\n", sizeof(wchar_t));
82		* return 0;
83		* }
84		*
85		* There is a reason why the sample prints the size of the wchar_t type:
86		* On Windows, wchar_t is two bytes (UTF-16), while on most other systems
87		* it is 4 bytes (UTF-32). This means that if you write code on Windows,
88		* use L"" to define a string which is meant to be UTF-16 and not UTF-32,
89		* you will have a little surprise when trying to port your code to Linux.
90		*
91		* Since the Windows API uses UTF-16, not UTF-32, WinPR defines the WCHAR
92		* type to always be 2-bytes long and uses it instead of wchar_t. Do not
93		* ever use wchar_t with WinPR unless you know what you are doing.
94		*
95		* As for L"", it is unfortunately unusable in a portable way, unless a
96		* special option is passed to GCC to define wchar_t as being two bytes.
97		* For string constants that must be UTF-16, it is a pain, but they can
98		* be defined in a portable way like this:
99		*
100		* WCHAR hello[] = { 'H','e','l','l','o','\0' };
101		*
102		* Such strings cannot be passed to native functions like wcslen(), which
103		* may expect a different wchar_t size. For this reason, WinPR provides
104		* _wcslen, which expects UTF-16 WCHAR strings on all platforms.
105		*
106		*/
107
108		/** \deprecated We no longer export this function, see ConvertUtf8ToWChar family of functions for a
109		* replacement
110		*
111		* Conversion to Unicode (UTF-16)
112		* MultiByteToWideChar: http://msdn.microsoft.com/en-us/library/windows/desktop/dd319072/
113		*
114		* cbMultiByte is an input size in bytes (BYTE)
115		* cchWideChar is an output size in wide characters (WCHAR)
116		*
117		* Null-terminated UTF-8 strings:
118		*
119		* cchWideChar cannot be assumed to be cbMultiByte since UTF-8 is variable-width!
120		*
121		* Instead, obtain the required cchWideChar output size like this:
122		* cchWideChar = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) lpMultiByteStr, -1, nullptr, 0);
123		*
124		* A value of -1 for cbMultiByte indicates that the input string is null-terminated,
125		* and the null terminator will be processed. The size returned by MultiByteToWideChar
126		* will therefore include the null terminator. Equivalent behavior can be obtained by
127		* computing the length in bytes of the input buffer, including the null terminator:
128		*
129		* cbMultiByte = strlen((char*) lpMultiByteStr) + 1;
130		*
131		* An output buffer of the proper size can then be allocated:
132		*
133		* lpWideCharStr = (LPWSTR) malloc(cchWideChar * sizeof(WCHAR));
134		*
135		* Since cchWideChar is an output size in wide characters, the actual buffer size is:
136		* (cchWideChar * sizeof(WCHAR)) or (cchWideChar * 2)
137		*
138		* Finally, perform the conversion:
139		*
140		* cchWideChar = MultiByteToWideChar(CP_UTF8, 0, (LPCSTR) lpMultiByteStr, -1, lpWideCharStr,
141		* cchWideChar);
142		*
143		* The value returned by MultiByteToWideChar corresponds to the number of wide characters written
144		* to the output buffer, and should match the value obtained on the first call to
145		* MultiByteToWideChar.
146		*
147		*/
148
149		#if !defined(WITH_WINPR_DEPRECATED)
150		static
151		#endif
152		int
153		MultiByteToWideChar(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
154		LPWSTR lpWideCharStr, int cchWideChar)
155	2.04k	{
156	2.04k	return int_MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, lpWideCharStr,
157	2.04k	cchWideChar);
158	2.04k	}
159
160		/** \deprecated We no longer export this function, see ConvertWCharToUtf8 family of functions for a
161		* replacement
162		*
163		* Conversion from Unicode (UTF-16)
164		* WideCharToMultiByte: http://msdn.microsoft.com/en-us/library/windows/desktop/dd374130/
165		*
166		* cchWideChar is an input size in wide characters (WCHAR)
167		* cbMultiByte is an output size in bytes (BYTE)
168		*
169		* Null-terminated UTF-16 strings:
170		*
171		* cbMultiByte cannot be assumed to be cchWideChar since UTF-8 is variable-width!
172		*
173		* Instead, obtain the required cbMultiByte output size like this:
174		* cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) lpWideCharStr, -1, nullptr, 0, nullptr,
175		* nullptr);
176		*
177		* A value of -1 for cbMultiByte indicates that the input string is null-terminated,
178		* and the null terminator will be processed. The size returned by WideCharToMultiByte
179		* will therefore include the null terminator. Equivalent behavior can be obtained by
180		* computing the length in bytes of the input buffer, including the null terminator:
181		*
182		* cchWideChar = _wcslen((WCHAR*) lpWideCharStr) + 1;
183		*
184		* An output buffer of the proper size can then be allocated:
185		* lpMultiByteStr = (LPSTR) malloc(cbMultiByte);
186		*
187		* Since cbMultiByte is an output size in bytes, it is the same as the buffer size
188		*
189		* Finally, perform the conversion:
190		*
191		* cbMultiByte = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR) lpWideCharStr, -1, lpMultiByteStr,
192		* cbMultiByte, nullptr, nullptr);
193		*
194		* The value returned by WideCharToMultiByte corresponds to the number of bytes written
195		* to the output buffer, and should match the value obtained on the first call to
196		* WideCharToMultiByte.
197		*
198		*/
199
200		#if !defined(WITH_WINPR_DEPRECATED)
201		static
202		#endif
203		int
204		WideCharToMultiByte(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
205		LPSTR lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
206		LPBOOL lpUsedDefaultChar)
207	460	{
208	460	return int_WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, lpMultiByteStr,
209	460	cbMultiByte, lpDefaultChar, lpUsedDefaultChar);
210	460	}
211
212		#endif
213
214		/**
215		* ConvertToUnicode is a convenience wrapper for MultiByteToWideChar:
216		*
217		* If the lpWideCharStr parameter for the converted string points to nullptr
218		* or if the cchWideChar parameter is set to 0 this function will automatically
219		* allocate the required memory which is guaranteed to be null-terminated
220		* after the conversion, even if the source c string isn't.
221		*
222		* If the cbMultiByte parameter is set to -1 the passed lpMultiByteStr must
223		* be null-terminated and the required length for the converted string will be
224		* calculated accordingly.
225		*/
226		#if defined(WITH_WINPR_DEPRECATED)
227		int ConvertToUnicode(UINT CodePage, DWORD dwFlags, LPCSTR lpMultiByteStr, int cbMultiByte,
228		LPWSTR* lpWideCharStr, int cchWideChar)
229		{
230		int status = 0;
231		BOOL allocate = FALSE;
232
233		if (!lpMultiByteStr)
234		return 0;
235
236		if (!lpWideCharStr)
237		return 0;
238
239		if (cbMultiByte == -1)
240		{
241		size_t len = strnlen(lpMultiByteStr, INT_MAX);
242		if (len >= INT_MAX)
243		return 0;
244		cbMultiByte = (int)(len + 1);
245		}
246
247		if (cchWideChar == 0)
248		{
249		cchWideChar =
250		MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, nullptr, 0);
251		allocate = TRUE;
252		}
253		else if (!(*lpWideCharStr))
254		allocate = TRUE;
255
256		if (cchWideChar < 1)
257		return 0;
258
259		if (allocate)
260		{
261		*lpWideCharStr = (LPWSTR)calloc((size_t)cchWideChar + 1ull, sizeof(WCHAR));
262
263		if (!(*lpWideCharStr))
264		{
265		// SetLastError(ERROR_INSUFFICIENT_BUFFER);
266		return 0;
267		}
268		}
269
270		status = MultiByteToWideChar(CodePage, dwFlags, lpMultiByteStr, cbMultiByte, *lpWideCharStr,
271		cchWideChar);
272
273		if (status != cchWideChar)
274		{
275		if (allocate)
276		{
277		free(*lpWideCharStr);
278		*lpWideCharStr = nullptr;
279		status = 0;
280		}
281		}
282
283		return status;
284		}
285		#endif
286
287		/**
288		* ConvertFromUnicode is a convenience wrapper for WideCharToMultiByte:
289		*
290		* If the lpMultiByteStr parameter for the converted string points to nullptr
291		* or if the cbMultiByte parameter is set to 0 this function will automatically
292		* allocate the required memory which is guaranteed to be null-terminated
293		* after the conversion, even if the source unicode string isn't.
294		*
295		* If the cchWideChar parameter is set to -1 the passed lpWideCharStr must
296		* be null-terminated and the required length for the converted string will be
297		* calculated accordingly.
298		*/
299		#if defined(WITH_WINPR_DEPRECATED)
300		int ConvertFromUnicode(UINT CodePage, DWORD dwFlags, LPCWSTR lpWideCharStr, int cchWideChar,
301		LPSTR* lpMultiByteStr, int cbMultiByte, LPCSTR lpDefaultChar,
302		LPBOOL lpUsedDefaultChar)
303		{
304		int status = 0;
305		BOOL allocate = FALSE;
306
307		if (!lpWideCharStr)
308		return 0;
309
310		if (!lpMultiByteStr)
311		return 0;
312
313		if (cchWideChar == -1)
314		cchWideChar = (int)(_wcslen(lpWideCharStr) + 1);
315
316		if (cbMultiByte == 0)
317		{
318		cbMultiByte = WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, nullptr, 0,
319		nullptr, nullptr);
320		allocate = TRUE;
321		}
322		else if (!(*lpMultiByteStr))
323		allocate = TRUE;
324
325		if (cbMultiByte < 1)
326		return 0;
327
328		if (allocate)
329		{
330		*lpMultiByteStr = (LPSTR)calloc(1, (size_t)cbMultiByte + 1ull);
331
332		if (!(*lpMultiByteStr))
333		{
334		// SetLastError(ERROR_INSUFFICIENT_BUFFER);
335		return 0;
336		}
337		}
338
339		status = WideCharToMultiByte(CodePage, dwFlags, lpWideCharStr, cchWideChar, *lpMultiByteStr,
340		cbMultiByte, lpDefaultChar, lpUsedDefaultChar);
341
342		if ((status != cbMultiByte) && allocate)
343		{
344		status = 0;
345		}
346
347		if ((status <= 0) && allocate)
348		{
349		free(*lpMultiByteStr);
350		*lpMultiByteStr = nullptr;
351		}
352
353		return status;
354		}
355		#endif
356
357		/**
358		* Swap Unicode byte order (UTF16LE <-> UTF16BE)
359		*/
360
361		const WCHAR* ByteSwapUnicode(WCHAR* wstr, size_t length)
362	0	{
363	0	WINPR_ASSERT(wstr \|\| (length == 0));
364
365	0	for (size_t x = 0; x < length; x++)
366	0	wstr[x] = _byteswap_ushort(wstr[x]);
367	0	return wstr;
368	0	}
369
370		SSIZE_T ConvertWCharToUtf8(const WCHAR* wstr, char* str, size_t len)
371	0	{
372	0	if (!wstr)
373	0	{
374	0	if (str && len)
375	0	str[0] = 0;
376	0	return 0;
377	0	}
378
379	0	const size_t wlen = _wcslen(wstr);
380	0	return ConvertWCharNToUtf8(wstr, wlen + 1, str, len);
381	0	}
382
383		SSIZE_T ConvertWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len)
384	460	{
385	460	BOOL isNullTerminated = FALSE;
386	460	if (wlen == 0)
387	0	return 0;
388
389	460	WINPR_ASSERT(wstr);
390	460	size_t iwlen = _wcsnlen(wstr, wlen);
391
392	460	if ((len > INT32_MAX) \|\| (wlen > INT32_MAX))
393	0	{
394	0	SetLastError(ERROR_INVALID_PARAMETER);
395	0	return -1;
396	0	}
397
398	460	if (iwlen < wlen)
399	117	{
400	117	isNullTerminated = TRUE;
401	117	iwlen++;
402	117	}
403	460	WINPR_PRAGMA_DIAG_PUSH
404	460	WINPR_PRAGMA_DIAG_IGNORED_DEPRECATED_DECL
405	460	const int rc =
406	460	WideCharToMultiByte(CP_UTF8, 0, wstr, (int)iwlen, str, (int)len, nullptr, nullptr);
407	460	WINPR_PRAGMA_DIAG_POP
408	460	if ((rc <= 0) \|\| ((len > 0) && ((size_t)rc > len)))
409	102	return -1;
410	358	else if (!isNullTerminated)
411	270	{
412	270	if (str && ((size_t)rc < len))
413	135	str[rc] = '\0';
414	270	return rc;
415	270	}
416	88	else if ((size_t)rc == len)
417	44	{
418	44	if (str && (str[rc - 1] != '\0'))
419	0	return rc;
420	44	}
421	88	return rc - 1;
422	460	}
423
424		SSIZE_T ConvertMszWCharNToUtf8(const WCHAR* wstr, size_t wlen, char* str, size_t len)
425	0	{
426	0	if (wlen == 0)
427	0	return 0;
428
429	0	WINPR_ASSERT(wstr);
430
431	0	if ((len > INT32_MAX) \|\| (wlen > INT32_MAX))
432	0	{
433	0	SetLastError(ERROR_INVALID_PARAMETER);
434	0	return -1;
435	0	}
436
437	0	const int iwlen = (int)len;
438	0	WINPR_PRAGMA_DIAG_PUSH
439	0	WINPR_PRAGMA_DIAG_IGNORED_DEPRECATED_DECL
440	0	const int rc = WideCharToMultiByte(CP_UTF8, 0, wstr, (int)wlen, str, iwlen, nullptr, nullptr);
441	0	WINPR_PRAGMA_DIAG_POP
442	0	if ((rc <= 0) \|\| ((len > 0) && (rc > iwlen)))
443	0	return -1;
444
445	0	return rc;
446	0	}
447
448		SSIZE_T ConvertUtf8ToWChar(const char* str, WCHAR* wstr, size_t wlen)
449	2.14k	{
450	2.14k	if (!str)
451	108	{
452	108	if (wstr && wlen)
453	54	wstr[0] = 0;
454	108	return 0;
455	108	}
456
457	2.04k	const size_t len = strlen(str);
458	2.04k	return ConvertUtf8NToWChar(str, len + 1, wstr, wlen);
459	2.14k	}
460
461		SSIZE_T ConvertUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
462	2.04k	{
463	2.04k	size_t ilen = strnlen(str, len);
464	2.04k	BOOL isNullTerminated = FALSE;
465	2.04k	if (len == 0)
466	0	return 0;
467
468	2.04k	WINPR_ASSERT(str);
469
470	2.04k	if ((len > INT32_MAX) \|\| (wlen > INT32_MAX))
471	0	{
472	0	SetLastError(ERROR_INVALID_PARAMETER);
473	0	return -1;
474	0	}
475	2.04k	if (ilen < len)
476	2.04k	{
477	2.04k	isNullTerminated = TRUE;
478	2.04k	ilen++;
479	2.04k	}
480
481	2.04k	const int iwlen = (int)wlen;
482	2.04k	WINPR_PRAGMA_DIAG_PUSH
483	2.04k	WINPR_PRAGMA_DIAG_IGNORED_DEPRECATED_DECL
484	2.04k	const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)ilen, wstr, iwlen);
485	2.04k	WINPR_PRAGMA_DIAG_POP
486	2.04k	if ((rc <= 0) \|\| ((wlen > 0) && (rc > iwlen)))
487	80	return -1;
488	1.96k	if (!isNullTerminated)
489	0	{
490	0	if (wstr && (rc < iwlen))
491	0	wstr[rc] = '\0';
492	0	return rc;
493	0	}
494	1.96k	else if (rc == iwlen)
495	980	{
496	980	if (wstr && (wstr[rc - 1] != '\0'))
497	0	return rc;
498	980	}
499	1.96k	return rc - 1;
500	1.96k	}
501
502		SSIZE_T ConvertMszUtf8NToWChar(const char* str, size_t len, WCHAR* wstr, size_t wlen)
503	0	{
504	0	if (len == 0)
505	0	return 0;
506
507	0	WINPR_ASSERT(str);
508
509	0	if ((len > INT32_MAX) \|\| (wlen > INT32_MAX))
510	0	{
511	0	SetLastError(ERROR_INVALID_PARAMETER);
512	0	return -1;
513	0	}
514
515	0	const int iwlen = (int)wlen;
516	0	WINPR_PRAGMA_DIAG_PUSH
517	0	WINPR_PRAGMA_DIAG_IGNORED_DEPRECATED_DECL
518	0	const int rc = MultiByteToWideChar(CP_UTF8, 0, str, (int)len, wstr, iwlen);
519	0	WINPR_PRAGMA_DIAG_POP
520	0	if ((rc <= 0) \|\| ((wlen > 0) && (rc > iwlen)))
521	0	return -1;
522
523	0	return rc;
524	0	}
525
526		char* ConvertWCharToUtf8Alloc(const WCHAR* wstr, size_t* pUtfCharLength)
527	0	{
528	0	char* tmp = nullptr;
529	0	const SSIZE_T rc = ConvertWCharToUtf8(wstr, nullptr, 0);
530	0	if (pUtfCharLength)
531	0	*pUtfCharLength = 0;
532	0	if (rc < 0)
533	0	return nullptr;
534	0	tmp = calloc((size_t)rc + 1ull, sizeof(char));
535	0	if (!tmp)
536	0	return nullptr;
537	0	const SSIZE_T rc2 = ConvertWCharToUtf8(wstr, tmp, (size_t)rc + 1ull);
538	0	if (rc2 < 0)
539	0	{
540	0	free(tmp);
541	0	return nullptr;
542	0	}
543	0	WINPR_ASSERT(rc == rc2);
544	0	if (pUtfCharLength)
545	0	*pUtfCharLength = (size_t)rc2;
546	0	return tmp;
547	0	}
548
549		char* ConvertWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength)
550	281	{
551	281	char* tmp = nullptr;
552	281	const SSIZE_T rc = ConvertWCharNToUtf8(wstr, wlen, nullptr, 0);
553
554	281	if (pUtfCharLength)
555	0	*pUtfCharLength = 0;
556	281	if (rc < 0)
557	102	return nullptr;
558	179	tmp = calloc((size_t)rc + 1ull, sizeof(char));
559	179	if (!tmp)
560	0	return nullptr;
561	179	const SSIZE_T rc2 = ConvertWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 1ull);
562	179	if (rc2 < 0)
563	0	{
564	0	free(tmp);
565	0	return nullptr;
566	0	}
567	179	WINPR_ASSERT(rc == rc2);
568	179	if (pUtfCharLength)
569	0	*pUtfCharLength = (size_t)rc2;
570	179	return tmp;
571	179	}
572
573		char* ConvertMszWCharNToUtf8Alloc(const WCHAR* wstr, size_t wlen, size_t* pUtfCharLength)
574	0	{
575	0	char* tmp = nullptr;
576	0	const SSIZE_T rc = ConvertMszWCharNToUtf8(wstr, wlen, nullptr, 0);
577
578	0	if (pUtfCharLength)
579	0	*pUtfCharLength = 0;
580	0	if (rc < 0)
581	0	return nullptr;
582	0	tmp = calloc((size_t)rc + 1ull, sizeof(char));
583	0	if (!tmp)
584	0	return nullptr;
585	0	const SSIZE_T rc2 = ConvertMszWCharNToUtf8(wstr, wlen, tmp, (size_t)rc + 1ull);
586	0	if (rc2 < 0)
587	0	{
588	0	free(tmp);
589	0	return nullptr;
590	0	}
591	0	WINPR_ASSERT(rc == rc2);
592	0	if (pUtfCharLength)
593	0	*pUtfCharLength = (size_t)rc2;
594	0	return tmp;
595	0	}
596
597		WCHAR* ConvertUtf8ToWCharAlloc(const char* str, size_t* pSize)
598	1.11k	{
599	1.11k	WCHAR* tmp = nullptr;
600	1.11k	const SSIZE_T rc = ConvertUtf8ToWChar(str, nullptr, 0);
601	1.11k	if (pSize)
602	1.11k	*pSize = 0;
603	1.11k	if (rc < 0)
604	80	return nullptr;
605	1.03k	tmp = calloc((size_t)rc + 1ull, sizeof(WCHAR));
606	1.03k	if (!tmp)
607	0	return nullptr;
608	1.03k	const SSIZE_T rc2 = ConvertUtf8ToWChar(str, tmp, (size_t)rc + 1ull);
609	1.03k	if (rc2 < 0)
610	0	{
611	0	free(tmp);
612	0	return nullptr;
613	0	}
614	1.03k	WINPR_ASSERT(rc == rc2);
615	1.03k	if (pSize)
616	1.03k	*pSize = (size_t)rc2;
617	1.03k	return tmp;
618	1.03k	}
619
620		WCHAR* ConvertUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
621	0	{
622	0	WCHAR* tmp = nullptr;
623	0	const SSIZE_T rc = ConvertUtf8NToWChar(str, len, nullptr, 0);
624	0	if (pSize)
625	0	*pSize = 0;
626	0	if (rc < 0)
627	0	return nullptr;
628	0	tmp = calloc((size_t)rc + 1ull, sizeof(WCHAR));
629	0	if (!tmp)
630	0	return nullptr;
631	0	const SSIZE_T rc2 = ConvertUtf8NToWChar(str, len, tmp, (size_t)rc + 1ull);
632	0	if (rc2 < 0)
633	0	{
634	0	free(tmp);
635	0	return nullptr;
636	0	}
637	0	WINPR_ASSERT(rc == rc2);
638	0	if (pSize)
639	0	*pSize = (size_t)rc2;
640	0	return tmp;
641	0	}
642
643		WCHAR* ConvertMszUtf8NToWCharAlloc(const char* str, size_t len, size_t* pSize)
644	0	{
645	0	WCHAR* tmp = nullptr;
646	0	const SSIZE_T rc = ConvertMszUtf8NToWChar(str, len, nullptr, 0);
647	0	if (pSize)
648	0	*pSize = 0;
649	0	if (rc < 0)
650	0	return nullptr;
651	0	tmp = calloc((size_t)rc + 1ull, sizeof(WCHAR));
652	0	if (!tmp)
653	0	return nullptr;
654	0	const SSIZE_T rc2 = ConvertMszUtf8NToWChar(str, len, tmp, (size_t)rc + 1ull);
655	0	if (rc2 < 0)
656	0	{
657	0	free(tmp);
658	0	return nullptr;
659	0	}
660	0	WINPR_ASSERT(rc == rc2);
661	0	if (pSize)
662	0	*pSize = (size_t)rc2;
663	0	return tmp;
664	0	}