/src/brpc/src/butil/strings/utf_string_conversions.cc

Source (jump to first uncovered line)
// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "butil/strings/utf_string_conversions.h"

#include "butil/strings/string_piece.h"
#include "butil/strings/string_util.h"
#include "butil/strings/utf_string_conversion_utils.h"

namespace butil {

namespace {

// Generalized Unicode converter -----------------------------------------------

// Converts the given source Unicode character type to the given destination
// Unicode character type as a STL string. The given input buffer and size
// determine the source, and the given output STL string will be replaced by
// the result.
template<typename SRC_CHAR, typename DEST_STRING>
bool ConvertUnicode(const SRC_CHAR* src,
                    size_t src_len,
                    DEST_STRING* output) {
  // ICU requires 32-bit numbers.
  bool success = true;
  int32_t src_len32 = static_cast<int32_t>(src_len);
  for (int32_t i = 0; i < src_len32; i++) {
    uint32_t code_point;
    if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
      WriteUnicodeCharacter(code_point, output);
    } else {
      WriteUnicodeCharacter(0xFFFD, output);
      success = false;
    }
  }

  return success;
}

}  // namespace

// UTF-8 <-> Wide --------------------------------------------------------------

bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
  PrepareForUTF8Output(src, src_len, output);
  return ConvertUnicode(src, src_len, output);
}

std::string WideToUTF8(const std::wstring& wide) {
  std::string ret;
  // Ignore the success flag of this call, it will do the best it can for
  // invalid input, which is what we want here.
  WideToUTF8(wide.data(), wide.length(), &ret);
  return ret;
}

bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
  PrepareForUTF16Or32Output(src, src_len, output);
  return ConvertUnicode(src, src_len, output);
}

std::wstring UTF8ToWide(const StringPiece& utf8) {
  std::wstring ret;
  UTF8ToWide(utf8.data(), utf8.length(), &ret);
  return ret;
}

// UTF-16 <-> Wide -------------------------------------------------------------

#if defined(WCHAR_T_IS_UTF16)

// When wide == UTF-16, then conversions are a NOP.
bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
  output->assign(src, src_len);
  return true;
}

string16 WideToUTF16(const std::wstring& wide) {
  return wide;
}

bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
  output->assign(src, src_len);
  return true;
}

std::wstring UTF16ToWide(const string16& utf16) {
  return utf16;
}

#elif defined(WCHAR_T_IS_UTF32)

bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
  output->clear();
  // Assume that normally we won't have any non-BMP characters so the counts
  // will be the same.
  output->reserve(src_len);
  return ConvertUnicode(src, src_len, output);
}

string16 WideToUTF16(const std::wstring& wide) {
  string16 ret;
  WideToUTF16(wide.data(), wide.length(), &ret);
  return ret;
}

bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
  output->clear();
  // Assume that normally we won't have any non-BMP characters so the counts
  // will be the same.
  output->reserve(src_len);
  return ConvertUnicode(src, src_len, output);
}

std::wstring UTF16ToWide(const string16& utf16) {
  std::wstring ret;
  UTF16ToWide(utf16.data(), utf16.length(), &ret);
  return ret;
}

#endif  // defined(WCHAR_T_IS_UTF32)

// UTF16 <-> UTF8 --------------------------------------------------------------

#if defined(WCHAR_T_IS_UTF32)

bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
  PrepareForUTF16Or32Output(src, src_len, output);
  return ConvertUnicode(src, src_len, output);
}

string16 UTF8ToUTF16(const StringPiece& utf8) {
  string16 ret;
  // Ignore the success flag of this call, it will do the best it can for
  // invalid input, which is what we want here.
  UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
  return ret;
}

bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
  PrepareForUTF8Output(src, src_len, output);
  return ConvertUnicode(src, src_len, output);
}

std::string UTF16ToUTF8(const string16& utf16) {
  std::string ret;
  // Ignore the success flag of this call, it will do the best it can for
  // invalid input, which is what we want here.
  UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
  return ret;
}

#elif defined(WCHAR_T_IS_UTF16)
// Easy case since we can use the "wide" versions we already wrote above.

bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
  return UTF8ToWide(src, src_len, output);
}

string16 UTF8ToUTF16(const StringPiece& utf8) {
  return UTF8ToWide(utf8);
}

bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
  return WideToUTF8(src, src_len, output);
}

std::string UTF16ToUTF8(const string16& utf16) {
  return WideToUTF8(utf16);
}

#endif

std::wstring ASCIIToWide(const StringPiece& ascii) {
  DCHECK(IsStringASCII(ascii)) << ascii;
  return std::wstring(ascii.begin(), ascii.end());
}

string16 ASCIIToUTF16(const StringPiece& ascii) {
  DCHECK(IsStringASCII(ascii)) << ascii;
  return string16(ascii.begin(), ascii.end());
}

std::string UTF16ToASCII(const string16& utf16) {
  DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
  return std::string(utf16.begin(), utf16.end());
}

}  // namespace butil

Line	Count	Source (jump to first uncovered line)
1		// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2		// Use of this source code is governed by a BSD-style license that can be
3		// found in the LICENSE file.
4
5		#include "butil/strings/utf_string_conversions.h"
6
7		#include "butil/strings/string_piece.h"
8		#include "butil/strings/string_util.h"
9		#include "butil/strings/utf_string_conversion_utils.h"
10
11		namespace butil {
12
13		namespace {
14
15		// Generalized Unicode converter -----------------------------------------------
16
17		// Converts the given source Unicode character type to the given destination
18		// Unicode character type as a STL string. The given input buffer and size
19		// determine the source, and the given output STL string will be replaced by
20		// the result.
21		template<typename SRC_CHAR, typename DEST_STRING>
22		bool ConvertUnicode(const SRC_CHAR* src,
23		size_t src_len,
24	0	DEST_STRING* output) {
25		// ICU requires 32-bit numbers.
26	0	bool success = true;
27	0	int32_t src_len32 = static_cast<int32_t>(src_len);
28	0	for (int32_t i = 0; i < src_len32; i++) {
29	0	uint32_t code_point;
30	0	if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) {
31	0	WriteUnicodeCharacter(code_point, output);
32	0	} else {
33	0	WriteUnicodeCharacter(0xFFFD, output);
34	0	success = false;
35	0	}
36	0	}
37
38	0	return success;
39	0	} Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<wchar_t, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >(wchar_t const, unsigned long, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<char, std::__cxx11::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > >(char const, unsigned long, std::__cxx11::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<wchar_t, std::__cxx11::basic_string<unsigned short, butil::string16_char_traits, std::allocator<unsigned short> > >(wchar_t const, unsigned long, std::__cxx11::basic_string<unsigned short, butil::string16_char_traits, std::allocator<unsigned short> >) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<unsigned short, std::__cxx11::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > >(unsigned short const, unsigned long, std::__cxx11::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<char, std::__cxx11::basic_string<unsigned short, butil::string16_char_traits, std::allocator<unsigned short> > >(char const, unsigned long, std::__cxx11::basic_string<unsigned short, butil::string16_char_traits, std::allocator<unsigned short> >) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<unsigned short, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >(unsigned short const, unsigned long, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >)
40
41		} // namespace
42
43		// UTF-8 <-> Wide --------------------------------------------------------------
44
45	0	bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) {
46	0	PrepareForUTF8Output(src, src_len, output);
47	0	return ConvertUnicode(src, src_len, output);
48	0	}
49
50	0	std::string WideToUTF8(const std::wstring& wide) {
51	0	std::string ret;
52		// Ignore the success flag of this call, it will do the best it can for
53		// invalid input, which is what we want here.
54	0	WideToUTF8(wide.data(), wide.length(), &ret);
55	0	return ret;
56	0	}
57
58	0	bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) {
59	0	PrepareForUTF16Or32Output(src, src_len, output);
60	0	return ConvertUnicode(src, src_len, output);
61	0	}
62
63	0	std::wstring UTF8ToWide(const StringPiece& utf8) {
64	0	std::wstring ret;
65	0	UTF8ToWide(utf8.data(), utf8.length(), &ret);
66	0	return ret;
67	0	}
68
69		// UTF-16 <-> Wide -------------------------------------------------------------
70
71		#if defined(WCHAR_T_IS_UTF16)
72
73		// When wide == UTF-16, then conversions are a NOP.
74		bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
75		output->assign(src, src_len);
76		return true;
77		}
78
79		string16 WideToUTF16(const std::wstring& wide) {
80		return wide;
81		}
82
83		bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
84		output->assign(src, src_len);
85		return true;
86		}
87
88		std::wstring UTF16ToWide(const string16& utf16) {
89		return utf16;
90		}
91
92		#elif defined(WCHAR_T_IS_UTF32)
93
94	0	bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) {
95	0	output->clear();
96		// Assume that normally we won't have any non-BMP characters so the counts
97		// will be the same.
98	0	output->reserve(src_len);
99	0	return ConvertUnicode(src, src_len, output);
100	0	}
101
102	0	string16 WideToUTF16(const std::wstring& wide) {
103	0	string16 ret;
104	0	WideToUTF16(wide.data(), wide.length(), &ret);
105	0	return ret;
106	0	}
107
108	0	bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) {
109	0	output->clear();
110		// Assume that normally we won't have any non-BMP characters so the counts
111		// will be the same.
112	0	output->reserve(src_len);
113	0	return ConvertUnicode(src, src_len, output);
114	0	}
115
116	0	std::wstring UTF16ToWide(const string16& utf16) {
117	0	std::wstring ret;
118	0	UTF16ToWide(utf16.data(), utf16.length(), &ret);
119	0	return ret;
120	0	}
121
122		#endif // defined(WCHAR_T_IS_UTF32)
123
124		// UTF16 <-> UTF8 --------------------------------------------------------------
125
126		#if defined(WCHAR_T_IS_UTF32)
127
128	0	bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
129	0	PrepareForUTF16Or32Output(src, src_len, output);
130	0	return ConvertUnicode(src, src_len, output);
131	0	}
132
133	0	string16 UTF8ToUTF16(const StringPiece& utf8) {
134	0	string16 ret;
135		// Ignore the success flag of this call, it will do the best it can for
136		// invalid input, which is what we want here.
137	0	UTF8ToUTF16(utf8.data(), utf8.length(), &ret);
138	0	return ret;
139	0	}
140
141	0	bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
142	0	PrepareForUTF8Output(src, src_len, output);
143	0	return ConvertUnicode(src, src_len, output);
144	0	}
145
146	0	std::string UTF16ToUTF8(const string16& utf16) {
147	0	std::string ret;
148		// Ignore the success flag of this call, it will do the best it can for
149		// invalid input, which is what we want here.
150	0	UTF16ToUTF8(utf16.data(), utf16.length(), &ret);
151	0	return ret;
152	0	}
153
154		#elif defined(WCHAR_T_IS_UTF16)
155		// Easy case since we can use the "wide" versions we already wrote above.
156
157		bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) {
158		return UTF8ToWide(src, src_len, output);
159		}
160
161		string16 UTF8ToUTF16(const StringPiece& utf8) {
162		return UTF8ToWide(utf8);
163		}
164
165		bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) {
166		return WideToUTF8(src, src_len, output);
167		}
168
169		std::string UTF16ToUTF8(const string16& utf16) {
170		return WideToUTF8(utf16);
171		}
172
173		#endif
174
175	0	std::wstring ASCIIToWide(const StringPiece& ascii) {
176	0	DCHECK(IsStringASCII(ascii)) << ascii;
177	0	return std::wstring(ascii.begin(), ascii.end());
178	0	}
179
180	0	string16 ASCIIToUTF16(const StringPiece& ascii) {
181	0	DCHECK(IsStringASCII(ascii)) << ascii;
182	0	return string16(ascii.begin(), ascii.end());
183	0	}
184
185	0	std::string UTF16ToASCII(const string16& utf16) {
186	0	DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16);
187	0	return std::string(utf16.begin(), utf16.end());
188	0	}
189
190		} // namespace butil

Coverage Report

Created: 2024-09-11 06:42