/src/mozilla-central/xpcom/tests/gtest/TestUTF.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include "mozilla/ArrayUtils.h" |
8 | | |
9 | | #include <stdio.h> |
10 | | #include <stdlib.h> |
11 | | #include "nsString.h" |
12 | | #include "nsStringBuffer.h" |
13 | | #include "nsReadableUtils.h" |
14 | | #include "UTFStrings.h" |
15 | | #include "nsUnicharUtils.h" |
16 | | #include "mozilla/HashFunctions.h" |
17 | | #include "nsUTF8Utils.h" |
18 | | |
19 | | #include "gtest/gtest.h" |
20 | | |
21 | | using namespace mozilla; |
22 | | |
23 | | namespace TestUTF { |
24 | | |
25 | | TEST(UTF, Valid) |
26 | 0 | { |
27 | 0 | for (unsigned int i = 0; i < ArrayLength(ValidStrings); ++i) { |
28 | 0 | nsDependentCString str8(ValidStrings[i].m8); |
29 | 0 | nsDependentString str16(ValidStrings[i].m16); |
30 | 0 |
|
31 | 0 | EXPECT_TRUE(NS_ConvertUTF16toUTF8(str16).Equals(str8)); |
32 | 0 |
|
33 | 0 | EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).Equals(str16)); |
34 | 0 |
|
35 | 0 | nsCString tmp8("string "); |
36 | 0 | AppendUTF16toUTF8(str16, tmp8); |
37 | 0 | EXPECT_TRUE(tmp8.Equals(NS_LITERAL_CSTRING("string ") + str8)); |
38 | 0 |
|
39 | 0 | nsString tmp16(NS_LITERAL_STRING("string ")); |
40 | 0 | AppendUTF8toUTF16(str8, tmp16); |
41 | 0 | EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16)); |
42 | 0 |
|
43 | 0 | EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0); |
44 | 0 | } |
45 | 0 | } |
46 | | |
47 | | TEST(UTF, Invalid16) |
48 | 0 | { |
49 | 0 | for (unsigned int i = 0; i < ArrayLength(Invalid16Strings); ++i) { |
50 | 0 | nsDependentString str16(Invalid16Strings[i].m16); |
51 | 0 | nsDependentCString str8(Invalid16Strings[i].m8); |
52 | 0 |
|
53 | 0 | EXPECT_TRUE(NS_ConvertUTF16toUTF8(str16).Equals(str8)); |
54 | 0 |
|
55 | 0 | nsCString tmp8("string "); |
56 | 0 | AppendUTF16toUTF8(str16, tmp8); |
57 | 0 | EXPECT_TRUE(tmp8.Equals(NS_LITERAL_CSTRING("string ") + str8)); |
58 | 0 |
|
59 | 0 | EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0); |
60 | 0 | } |
61 | 0 | } |
62 | | |
63 | | TEST(UTF, Invalid8) |
64 | 0 | { |
65 | 0 | for (unsigned int i = 0; i < ArrayLength(Invalid8Strings); ++i) { |
66 | 0 | nsDependentString str16(Invalid8Strings[i].m16); |
67 | 0 | nsDependentCString str8(Invalid8Strings[i].m8); |
68 | 0 |
|
69 | 0 | EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).Equals(str16)); |
70 | 0 |
|
71 | 0 | nsString tmp16(NS_LITERAL_STRING("string ")); |
72 | 0 | AppendUTF8toUTF16(str8, tmp16); |
73 | 0 | EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16)); |
74 | 0 |
|
75 | 0 | EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0); |
76 | 0 | } |
77 | 0 | } |
78 | | |
79 | | TEST(UTF, Malformed8) |
80 | 0 | { |
81 | 0 | for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) { |
82 | 0 | nsDependentString str16(Malformed8Strings[i].m16); |
83 | 0 | nsDependentCString str8(Malformed8Strings[i].m8); |
84 | 0 |
|
85 | 0 | EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).Equals(str16)); |
86 | 0 |
|
87 | 0 | nsString tmp16(NS_LITERAL_STRING("string ")); |
88 | 0 | AppendUTF8toUTF16(str8, tmp16); |
89 | 0 | EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16)); |
90 | 0 |
|
91 | 0 | EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0); |
92 | 0 | } |
93 | 0 | } |
94 | | |
95 | | TEST(UTF, Hash16) |
96 | 0 | { |
97 | 0 | for (unsigned int i = 0; i < ArrayLength(ValidStrings); ++i) { |
98 | 0 | nsDependentCString str8(ValidStrings[i].m8); |
99 | 0 | bool err; |
100 | 0 | EXPECT_EQ(HashString(ValidStrings[i].m16), |
101 | 0 | HashUTF8AsUTF16(str8.get(), str8.Length(), &err)); |
102 | 0 | EXPECT_FALSE(err); |
103 | 0 | } |
104 | 0 |
|
105 | 0 | for (unsigned int i = 0; i < ArrayLength(Invalid8Strings); ++i) { |
106 | 0 | nsDependentCString str8(Invalid8Strings[i].m8); |
107 | 0 | bool err; |
108 | 0 | EXPECT_EQ(HashUTF8AsUTF16(str8.get(), str8.Length(), &err), 0u); |
109 | 0 | EXPECT_TRUE(err); |
110 | 0 | } |
111 | 0 |
|
112 | 0 | for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) { |
113 | 0 | nsDependentCString str8(Malformed8Strings[i].m8); |
114 | 0 | bool err; |
115 | 0 | EXPECT_EQ(HashUTF8AsUTF16(str8.get(), str8.Length(), &err), 0u); |
116 | 0 | EXPECT_TRUE(err); |
117 | 0 | } |
118 | 0 | } |
119 | | |
120 | | /** |
121 | | * This tests the handling of a non-ascii character at various locations in a |
122 | | * UTF-16 string that is being converted to UTF-8. |
123 | | */ |
124 | | void NonASCII16_helper(const size_t aStrSize) |
125 | 0 | { |
126 | 0 | const size_t kTestSize = aStrSize; |
127 | 0 | const size_t kMaxASCII = 0x80; |
128 | 0 | const char16_t kUTF16Char = 0xC9; |
129 | 0 | const char kUTF8Surrogates[] = { char(0xC3), char(0x89) }; |
130 | 0 |
|
131 | 0 | // Generate a string containing only ASCII characters. |
132 | 0 | nsString asciiString; |
133 | 0 | asciiString.SetLength(kTestSize); |
134 | 0 | nsCString asciiCString; |
135 | 0 | asciiCString.SetLength(kTestSize); |
136 | 0 |
|
137 | 0 | auto str_buff = asciiString.BeginWriting(); |
138 | 0 | auto cstr_buff = asciiCString.BeginWriting(); |
139 | 0 | for (size_t i = 0; i < kTestSize; i++) { |
140 | 0 | str_buff[i] = i % kMaxASCII; |
141 | 0 | cstr_buff[i] = i % kMaxASCII; |
142 | 0 | } |
143 | 0 |
|
144 | 0 | // Now go through and test conversion when exactly one character will |
145 | 0 | // result in a multibyte sequence. |
146 | 0 | for (size_t i = 0; i < kTestSize; i++) { |
147 | 0 | // Setup the UTF-16 string. |
148 | 0 | nsString unicodeString(asciiString); |
149 | 0 | auto buff = unicodeString.BeginWriting(); |
150 | 0 | buff[i] = kUTF16Char; |
151 | 0 |
|
152 | 0 | // Do the conversion, make sure the length increased by 1. |
153 | 0 | nsCString dest; |
154 | 0 | AppendUTF16toUTF8(unicodeString, dest); |
155 | 0 | EXPECT_EQ(dest.Length(), unicodeString.Length() + 1); |
156 | 0 |
|
157 | 0 | // Build up the expected UTF-8 string. |
158 | 0 | nsCString expected; |
159 | 0 |
|
160 | 0 | // First add the leading ASCII chars. |
161 | 0 | expected.Append(asciiCString.BeginReading(), i); |
162 | 0 |
|
163 | 0 | // Now append the UTF-8 surrogate pair we expect the UTF-16 unicode char to |
164 | 0 | // be converted to. |
165 | 0 | for (auto& c : kUTF8Surrogates) { |
166 | 0 | expected.Append(c); |
167 | 0 | } |
168 | 0 |
|
169 | 0 | // And finish with the trailing ASCII chars. |
170 | 0 | expected.Append(asciiCString.BeginReading() + i + 1, kTestSize - i - 1); |
171 | 0 |
|
172 | 0 | EXPECT_STREQ(dest.BeginReading(), expected.BeginReading()); |
173 | 0 | } |
174 | 0 | } |
175 | | |
176 | | TEST(UTF, UTF8CharEnumerator) |
177 | 0 | { |
178 | 0 | const char* p = "\x61\xC0\xC2\xC2\x80\xE0\x80\x80\xE0\xA0\x80\xE1\x80\x80\xED\xBF\xBF\xED\x9F\xBF\xEE\x80\x80\xEE\x80\xFF\xF0\x90\x80\x80\xF0\x80\x80\x80\xF1\x80\x80\x80\xF4\x8F\xBF\xF4\x8F\xBF\xBF\xF4\xBF\xBF\xBF"; |
179 | 0 | const char* end = p + 49; |
180 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0061U); |
181 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
182 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
183 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0080U); |
184 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
185 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
186 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
187 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0800U); |
188 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x1000U); |
189 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
190 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
191 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
192 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xD7FFU); |
193 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xE000U); |
194 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
195 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
196 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x10000U); |
197 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
198 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
199 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
200 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
201 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x40000U); |
202 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
203 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x10FFFFU); |
204 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
205 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
206 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
207 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
208 | 0 | EXPECT_EQ(p, end); |
209 | 0 | p = "\xC2"; |
210 | 0 | end = p + 1; |
211 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
212 | 0 | EXPECT_EQ(p, end); |
213 | 0 | p = "\xE1\x80"; |
214 | 0 | end = p + 2; |
215 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
216 | 0 | EXPECT_EQ(p, end); |
217 | 0 | p = "\xF1\x80\x80"; |
218 | 0 | end = p + 3; |
219 | 0 | EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU); |
220 | 0 | EXPECT_EQ(p, end); |
221 | 0 | } |
222 | | |
223 | | TEST(UTF, UTF16CharEnumerator) |
224 | 0 | { |
225 | 0 | const char16_t* p = u"\u0061\U0001F4A9"; |
226 | 0 | const char16_t* end = p + 3; |
227 | 0 | EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x0061U); |
228 | 0 | EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x1F4A9U); |
229 | 0 | EXPECT_EQ(p, end); |
230 | 0 | const char16_t loneHigh = 0xD83D; |
231 | 0 | p = &loneHigh; |
232 | 0 | end = p + 1; |
233 | 0 | EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU); |
234 | 0 | EXPECT_EQ(p, end); |
235 | 0 | const char16_t loneLow = 0xDCA9; |
236 | 0 | p = &loneLow; |
237 | 0 | end = p + 1; |
238 | 0 | EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU); |
239 | 0 | EXPECT_EQ(p, end); |
240 | 0 | const char16_t loneHighStr[] = { 0xD83D, 0x0061 }; |
241 | 0 | p = loneHighStr; |
242 | 0 | end = p + 2; |
243 | 0 | EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU); |
244 | 0 | EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x0061U); |
245 | 0 | EXPECT_EQ(p, end); |
246 | 0 | } |
247 | | |
248 | | } // namespace TestUTF |