/src/brpc/src/butil/strings/utf_string_conversions.cc
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright (c) 2010 The Chromium Authors. All rights reserved. |
2 | | // Use of this source code is governed by a BSD-style license that can be |
3 | | // found in the LICENSE file. |
4 | | |
5 | | #include "butil/strings/utf_string_conversions.h" |
6 | | |
7 | | #include "butil/strings/string_piece.h" |
8 | | #include "butil/strings/string_util.h" |
9 | | #include "butil/strings/utf_string_conversion_utils.h" |
10 | | |
11 | | namespace butil { |
12 | | |
13 | | namespace { |
14 | | |
15 | | // Generalized Unicode converter ----------------------------------------------- |
16 | | |
17 | | // Converts the given source Unicode character type to the given destination |
18 | | // Unicode character type as a STL string. The given input buffer and size |
19 | | // determine the source, and the given output STL string will be replaced by |
20 | | // the result. |
21 | | template<typename SRC_CHAR, typename DEST_STRING> |
22 | | bool ConvertUnicode(const SRC_CHAR* src, |
23 | | size_t src_len, |
24 | 0 | DEST_STRING* output) { |
25 | | // ICU requires 32-bit numbers. |
26 | 0 | bool success = true; |
27 | 0 | int32_t src_len32 = static_cast<int32_t>(src_len); |
28 | 0 | for (int32_t i = 0; i < src_len32; i++) { |
29 | 0 | uint32_t code_point; |
30 | 0 | if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { |
31 | 0 | WriteUnicodeCharacter(code_point, output); |
32 | 0 | } else { |
33 | 0 | WriteUnicodeCharacter(0xFFFD, output); |
34 | 0 | success = false; |
35 | 0 | } |
36 | 0 | } |
37 | |
|
38 | 0 | return success; |
39 | 0 | } Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<wchar_t, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >(wchar_t const*, unsigned long, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<char, std::__cxx11::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > >(char const*, unsigned long, std::__cxx11::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >*) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<wchar_t, std::__cxx11::basic_string<unsigned short, butil::string16_char_traits, std::allocator<unsigned short> > >(wchar_t const*, unsigned long, std::__cxx11::basic_string<unsigned short, butil::string16_char_traits, std::allocator<unsigned short> >*) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<unsigned short, std::__cxx11::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > >(unsigned short const*, unsigned long, std::__cxx11::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> >*) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<char, std::__cxx11::basic_string<unsigned short, butil::string16_char_traits, std::allocator<unsigned short> > >(char const*, unsigned long, std::__cxx11::basic_string<unsigned short, butil::string16_char_traits, std::allocator<unsigned short> >*) Unexecuted instantiation: utf_string_conversions.cc:bool butil::(anonymous namespace)::ConvertUnicode<unsigned short, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > >(unsigned short const*, unsigned long, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >*) |
40 | | |
41 | | } // namespace |
42 | | |
43 | | // UTF-8 <-> Wide -------------------------------------------------------------- |
44 | | |
45 | 0 | bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { |
46 | 0 | PrepareForUTF8Output(src, src_len, output); |
47 | 0 | return ConvertUnicode(src, src_len, output); |
48 | 0 | } |
49 | | |
50 | 0 | std::string WideToUTF8(const std::wstring& wide) { |
51 | 0 | std::string ret; |
52 | | // Ignore the success flag of this call, it will do the best it can for |
53 | | // invalid input, which is what we want here. |
54 | 0 | WideToUTF8(wide.data(), wide.length(), &ret); |
55 | 0 | return ret; |
56 | 0 | } |
57 | | |
58 | 0 | bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { |
59 | 0 | PrepareForUTF16Or32Output(src, src_len, output); |
60 | 0 | return ConvertUnicode(src, src_len, output); |
61 | 0 | } |
62 | | |
63 | 0 | std::wstring UTF8ToWide(const StringPiece& utf8) { |
64 | 0 | std::wstring ret; |
65 | 0 | UTF8ToWide(utf8.data(), utf8.length(), &ret); |
66 | 0 | return ret; |
67 | 0 | } |
68 | | |
69 | | // UTF-16 <-> Wide ------------------------------------------------------------- |
70 | | |
71 | | #if defined(WCHAR_T_IS_UTF16) |
72 | | |
73 | | // When wide == UTF-16, then conversions are a NOP. |
74 | | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { |
75 | | output->assign(src, src_len); |
76 | | return true; |
77 | | } |
78 | | |
79 | | string16 WideToUTF16(const std::wstring& wide) { |
80 | | return wide; |
81 | | } |
82 | | |
83 | | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { |
84 | | output->assign(src, src_len); |
85 | | return true; |
86 | | } |
87 | | |
88 | | std::wstring UTF16ToWide(const string16& utf16) { |
89 | | return utf16; |
90 | | } |
91 | | |
92 | | #elif defined(WCHAR_T_IS_UTF32) |
93 | | |
94 | 0 | bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { |
95 | 0 | output->clear(); |
96 | | // Assume that normally we won't have any non-BMP characters so the counts |
97 | | // will be the same. |
98 | 0 | output->reserve(src_len); |
99 | 0 | return ConvertUnicode(src, src_len, output); |
100 | 0 | } |
101 | | |
102 | 0 | string16 WideToUTF16(const std::wstring& wide) { |
103 | 0 | string16 ret; |
104 | 0 | WideToUTF16(wide.data(), wide.length(), &ret); |
105 | 0 | return ret; |
106 | 0 | } |
107 | | |
108 | 0 | bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { |
109 | 0 | output->clear(); |
110 | | // Assume that normally we won't have any non-BMP characters so the counts |
111 | | // will be the same. |
112 | 0 | output->reserve(src_len); |
113 | 0 | return ConvertUnicode(src, src_len, output); |
114 | 0 | } |
115 | | |
116 | 0 | std::wstring UTF16ToWide(const string16& utf16) { |
117 | 0 | std::wstring ret; |
118 | 0 | UTF16ToWide(utf16.data(), utf16.length(), &ret); |
119 | 0 | return ret; |
120 | 0 | } |
121 | | |
122 | | #endif // defined(WCHAR_T_IS_UTF32) |
123 | | |
124 | | // UTF16 <-> UTF8 -------------------------------------------------------------- |
125 | | |
126 | | #if defined(WCHAR_T_IS_UTF32) |
127 | | |
128 | 0 | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { |
129 | 0 | PrepareForUTF16Or32Output(src, src_len, output); |
130 | 0 | return ConvertUnicode(src, src_len, output); |
131 | 0 | } |
132 | | |
133 | 0 | string16 UTF8ToUTF16(const StringPiece& utf8) { |
134 | 0 | string16 ret; |
135 | | // Ignore the success flag of this call, it will do the best it can for |
136 | | // invalid input, which is what we want here. |
137 | 0 | UTF8ToUTF16(utf8.data(), utf8.length(), &ret); |
138 | 0 | return ret; |
139 | 0 | } |
140 | | |
141 | 0 | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { |
142 | 0 | PrepareForUTF8Output(src, src_len, output); |
143 | 0 | return ConvertUnicode(src, src_len, output); |
144 | 0 | } |
145 | | |
146 | 0 | std::string UTF16ToUTF8(const string16& utf16) { |
147 | 0 | std::string ret; |
148 | | // Ignore the success flag of this call, it will do the best it can for |
149 | | // invalid input, which is what we want here. |
150 | 0 | UTF16ToUTF8(utf16.data(), utf16.length(), &ret); |
151 | 0 | return ret; |
152 | 0 | } |
153 | | |
154 | | #elif defined(WCHAR_T_IS_UTF16) |
155 | | // Easy case since we can use the "wide" versions we already wrote above. |
156 | | |
157 | | bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { |
158 | | return UTF8ToWide(src, src_len, output); |
159 | | } |
160 | | |
161 | | string16 UTF8ToUTF16(const StringPiece& utf8) { |
162 | | return UTF8ToWide(utf8); |
163 | | } |
164 | | |
165 | | bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { |
166 | | return WideToUTF8(src, src_len, output); |
167 | | } |
168 | | |
169 | | std::string UTF16ToUTF8(const string16& utf16) { |
170 | | return WideToUTF8(utf16); |
171 | | } |
172 | | |
173 | | #endif |
174 | | |
175 | 0 | std::wstring ASCIIToWide(const StringPiece& ascii) { |
176 | 0 | DCHECK(IsStringASCII(ascii)) << ascii; |
177 | 0 | return std::wstring(ascii.begin(), ascii.end()); |
178 | 0 | } |
179 | | |
180 | 0 | string16 ASCIIToUTF16(const StringPiece& ascii) { |
181 | 0 | DCHECK(IsStringASCII(ascii)) << ascii; |
182 | 0 | return string16(ascii.begin(), ascii.end()); |
183 | 0 | } |
184 | | |
185 | 0 | std::string UTF16ToASCII(const string16& utf16) { |
186 | 0 | DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16); |
187 | 0 | return std::string(utf16.begin(), utf16.end()); |
188 | 0 | } |
189 | | |
190 | | } // namespace butil |