Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/xpcom/tests/gtest/TestUTF.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3
/* This Source Code Form is subject to the terms of the Mozilla Public
4
 * License, v. 2.0. If a copy of the MPL was not distributed with this
5
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7
#include "mozilla/ArrayUtils.h"
8
9
#include <stdio.h>
10
#include <stdlib.h>
11
#include "nsString.h"
12
#include "nsStringBuffer.h"
13
#include "nsReadableUtils.h"
14
#include "UTFStrings.h"
15
#include "nsUnicharUtils.h"
16
#include "mozilla/HashFunctions.h"
17
#include "nsUTF8Utils.h"
18
19
#include "gtest/gtest.h"
20
21
using namespace mozilla;
22
23
namespace TestUTF {
24
25
TEST(UTF, Valid)
26
0
{
27
0
  for (unsigned int i = 0; i < ArrayLength(ValidStrings); ++i) {
28
0
    nsDependentCString str8(ValidStrings[i].m8);
29
0
    nsDependentString str16(ValidStrings[i].m16);
30
0
31
0
    EXPECT_TRUE(NS_ConvertUTF16toUTF8(str16).Equals(str8));
32
0
33
0
    EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).Equals(str16));
34
0
35
0
    nsCString tmp8("string ");
36
0
    AppendUTF16toUTF8(str16, tmp8);
37
0
    EXPECT_TRUE(tmp8.Equals(NS_LITERAL_CSTRING("string ") + str8));
38
0
39
0
    nsString tmp16(NS_LITERAL_STRING("string "));
40
0
    AppendUTF8toUTF16(str8, tmp16);
41
0
    EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16));
42
0
43
0
    EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0);
44
0
  }
45
0
}
46
47
TEST(UTF, Invalid16)
48
0
{
49
0
  for (unsigned int i = 0; i < ArrayLength(Invalid16Strings); ++i) {
50
0
    nsDependentString str16(Invalid16Strings[i].m16);
51
0
    nsDependentCString str8(Invalid16Strings[i].m8);
52
0
53
0
    EXPECT_TRUE(NS_ConvertUTF16toUTF8(str16).Equals(str8));
54
0
55
0
    nsCString tmp8("string ");
56
0
    AppendUTF16toUTF8(str16, tmp8);
57
0
    EXPECT_TRUE(tmp8.Equals(NS_LITERAL_CSTRING("string ") + str8));
58
0
59
0
    EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0);
60
0
  }
61
0
}
62
63
TEST(UTF, Invalid8)
64
0
{
65
0
  for (unsigned int i = 0; i < ArrayLength(Invalid8Strings); ++i) {
66
0
    nsDependentString str16(Invalid8Strings[i].m16);
67
0
    nsDependentCString str8(Invalid8Strings[i].m8);
68
0
69
0
    EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).Equals(str16));
70
0
71
0
    nsString tmp16(NS_LITERAL_STRING("string "));
72
0
    AppendUTF8toUTF16(str8, tmp16);
73
0
    EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16));
74
0
75
0
    EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0);
76
0
  }
77
0
}
78
79
TEST(UTF, Malformed8)
80
0
{
81
0
  for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) {
82
0
    nsDependentString str16(Malformed8Strings[i].m16);
83
0
    nsDependentCString str8(Malformed8Strings[i].m8);
84
0
85
0
    EXPECT_TRUE(NS_ConvertUTF8toUTF16(str8).Equals(str16));
86
0
87
0
    nsString tmp16(NS_LITERAL_STRING("string "));
88
0
    AppendUTF8toUTF16(str8, tmp16);
89
0
    EXPECT_TRUE(tmp16.Equals(NS_LITERAL_STRING("string ") + str16));
90
0
91
0
    EXPECT_EQ(CompareUTF8toUTF16(str8, str16), 0);
92
0
  }
93
0
}
94
95
TEST(UTF, Hash16)
96
0
{
97
0
  for (unsigned int i = 0; i < ArrayLength(ValidStrings); ++i) {
98
0
    nsDependentCString str8(ValidStrings[i].m8);
99
0
    bool err;
100
0
    EXPECT_EQ(HashString(ValidStrings[i].m16),
101
0
              HashUTF8AsUTF16(str8.get(), str8.Length(), &err));
102
0
    EXPECT_FALSE(err);
103
0
  }
104
0
105
0
  for (unsigned int i = 0; i < ArrayLength(Invalid8Strings); ++i) {
106
0
    nsDependentCString str8(Invalid8Strings[i].m8);
107
0
    bool err;
108
0
    EXPECT_EQ(HashUTF8AsUTF16(str8.get(), str8.Length(), &err), 0u);
109
0
    EXPECT_TRUE(err);
110
0
  }
111
0
112
0
  for (unsigned int i = 0; i < ArrayLength(Malformed8Strings); ++i) {
113
0
    nsDependentCString str8(Malformed8Strings[i].m8);
114
0
    bool err;
115
0
    EXPECT_EQ(HashUTF8AsUTF16(str8.get(), str8.Length(), &err), 0u);
116
0
    EXPECT_TRUE(err);
117
0
  }
118
0
}
119
120
/**
121
 * This tests the handling of a non-ascii character at various locations in a
122
 * UTF-16 string that is being converted to UTF-8.
123
 */
124
void NonASCII16_helper(const size_t aStrSize)
125
0
{
126
0
  const size_t kTestSize = aStrSize;
127
0
  const size_t kMaxASCII = 0x80;
128
0
  const char16_t kUTF16Char = 0xC9;
129
0
  const char kUTF8Surrogates[] = { char(0xC3), char(0x89) };
130
0
131
0
  // Generate a string containing only ASCII characters.
132
0
  nsString asciiString;
133
0
  asciiString.SetLength(kTestSize);
134
0
  nsCString asciiCString;
135
0
  asciiCString.SetLength(kTestSize);
136
0
137
0
  auto str_buff = asciiString.BeginWriting();
138
0
  auto cstr_buff = asciiCString.BeginWriting();
139
0
  for (size_t i = 0; i < kTestSize; i++) {
140
0
    str_buff[i] = i % kMaxASCII;
141
0
    cstr_buff[i] = i % kMaxASCII;
142
0
  }
143
0
144
0
  // Now go through and test conversion when exactly one character will
145
0
  // result in a multibyte sequence.
146
0
  for (size_t i = 0; i < kTestSize; i++) {
147
0
    // Setup the UTF-16 string.
148
0
    nsString unicodeString(asciiString);
149
0
    auto buff = unicodeString.BeginWriting();
150
0
    buff[i] = kUTF16Char;
151
0
152
0
    // Do the conversion, make sure the length increased by 1.
153
0
    nsCString dest;
154
0
    AppendUTF16toUTF8(unicodeString, dest);
155
0
    EXPECT_EQ(dest.Length(), unicodeString.Length() + 1);
156
0
157
0
    // Build up the expected UTF-8 string.
158
0
    nsCString expected;
159
0
160
0
    // First add the leading ASCII chars.
161
0
    expected.Append(asciiCString.BeginReading(), i);
162
0
163
0
    // Now append the UTF-8 surrogate pair we expect the UTF-16 unicode char to
164
0
    // be converted to.
165
0
    for (auto& c : kUTF8Surrogates) {
166
0
      expected.Append(c);
167
0
    }
168
0
169
0
    // And finish with the trailing ASCII chars.
170
0
    expected.Append(asciiCString.BeginReading() + i + 1, kTestSize - i - 1);
171
0
172
0
    EXPECT_STREQ(dest.BeginReading(), expected.BeginReading());
173
0
  }
174
0
}
175
176
TEST(UTF, UTF8CharEnumerator)
177
0
{
178
0
  const char* p = "\x61\xC0\xC2\xC2\x80\xE0\x80\x80\xE0\xA0\x80\xE1\x80\x80\xED\xBF\xBF\xED\x9F\xBF\xEE\x80\x80\xEE\x80\xFF\xF0\x90\x80\x80\xF0\x80\x80\x80\xF1\x80\x80\x80\xF4\x8F\xBF\xF4\x8F\xBF\xBF\xF4\xBF\xBF\xBF";
179
0
  const char* end = p + 49;
180
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0061U);
181
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
182
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
183
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0080U);
184
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
185
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
186
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
187
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x0800U);
188
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x1000U);
189
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
190
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
191
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
192
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xD7FFU);
193
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xE000U);
194
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
195
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
196
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x10000U);
197
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
198
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
199
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
200
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
201
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x40000U);
202
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
203
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0x10FFFFU);
204
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
205
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
206
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
207
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
208
0
  EXPECT_EQ(p, end);
209
0
  p = "\xC2";
210
0
  end = p + 1;
211
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
212
0
  EXPECT_EQ(p, end);
213
0
  p = "\xE1\x80";
214
0
  end = p + 2;
215
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
216
0
  EXPECT_EQ(p, end);
217
0
  p = "\xF1\x80\x80";
218
0
  end = p + 3;
219
0
  EXPECT_EQ(UTF8CharEnumerator::NextChar(&p, end), 0xFFFDU);
220
0
  EXPECT_EQ(p, end);
221
0
}
222
223
TEST(UTF, UTF16CharEnumerator)
224
0
{
225
0
  const char16_t* p = u"\u0061\U0001F4A9";
226
0
  const char16_t* end = p + 3;
227
0
  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x0061U);
228
0
  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x1F4A9U);
229
0
  EXPECT_EQ(p, end);
230
0
  const char16_t loneHigh = 0xD83D;
231
0
  p = &loneHigh;
232
0
  end = p + 1;
233
0
  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU);
234
0
  EXPECT_EQ(p, end);
235
0
  const char16_t loneLow = 0xDCA9;
236
0
  p = &loneLow;
237
0
  end = p + 1;
238
0
  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU);
239
0
  EXPECT_EQ(p, end);
240
0
  const char16_t loneHighStr[] = { 0xD83D, 0x0061 };
241
0
  p = loneHighStr;
242
0
  end = p + 2;
243
0
  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0xFFFDU);
244
0
  EXPECT_EQ(UTF16CharEnumerator::NextChar(&p, end), 0x0061U);
245
0
  EXPECT_EQ(p, end);
246
0
}
247
248
} // namespace TestUTF