/src/mozilla-central/xpcom/tests/gtest/TestEncoding.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* vim: set ts=8 sts=2 et sw=2 tw=80: */ |
3 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
4 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
5 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
6 | | |
7 | | #include <stdlib.h> |
8 | | #include "nsString.h" |
9 | | #include "gtest/gtest.h" |
10 | | |
11 | | TEST(Encoding, GoodSurrogatePair) |
12 | 0 | { |
13 | 0 | // When this string is decoded, the surrogate pair is U+10302 and the rest of |
14 | 0 | // the string is specified by indexes 2 onward. |
15 | 0 | const char16_t goodPairData[] = { 0xD800, 0xDF02, 0x65, 0x78, 0x0 }; |
16 | 0 | nsDependentString goodPair16(goodPairData); |
17 | 0 |
|
18 | 0 | uint32_t byteCount = 0; |
19 | 0 | char* goodPair8 = ToNewUTF8String(goodPair16, &byteCount); |
20 | 0 | EXPECT_TRUE(!!goodPair8); |
21 | 0 |
|
22 | 0 | EXPECT_EQ(byteCount, 6u); |
23 | 0 |
|
24 | 0 | const unsigned char expected8[] = |
25 | 0 | { 0xF0, 0x90, 0x8C, 0x82, 0x65, 0x78, 0x0 }; |
26 | 0 | EXPECT_EQ(0, memcmp(expected8, goodPair8, sizeof(expected8))); |
27 | 0 |
|
28 | 0 | // This takes a different code path from the above, so test it to make sure |
29 | 0 | // the UTF-16 enumeration remains in sync with the UTF-8 enumeration. |
30 | 0 | nsDependentCString expected((const char*)expected8); |
31 | 0 | EXPECT_EQ(0, CompareUTF8toUTF16(expected, goodPair16)); |
32 | 0 |
|
33 | 0 | free(goodPair8); |
34 | 0 | } |
35 | | |
36 | | TEST(Encoding, BackwardsSurrogatePair) |
37 | 0 | { |
38 | 0 | // When this string is decoded, the two surrogates are wrongly ordered and |
39 | 0 | // must each be interpreted as U+FFFD. |
40 | 0 | const char16_t backwardsPairData[] = { 0xDDDD, 0xD863, 0x65, 0x78, 0x0 }; |
41 | 0 | nsDependentString backwardsPair16(backwardsPairData); |
42 | 0 |
|
43 | 0 | uint32_t byteCount = 0; |
44 | 0 | char* backwardsPair8 = ToNewUTF8String(backwardsPair16, &byteCount); |
45 | 0 | EXPECT_TRUE(!!backwardsPair8); |
46 | 0 |
|
47 | 0 | EXPECT_EQ(byteCount, 8u); |
48 | 0 |
|
49 | 0 | const unsigned char expected8[] = |
50 | 0 | { 0xEF, 0xBF, 0xBD, 0xEF, 0xBF, 0xBD, 0x65, 0x78, 0x0 }; |
51 | 0 | EXPECT_EQ(0, memcmp(expected8, backwardsPair8, sizeof(expected8))); |
52 | 0 |
|
53 | 0 | // This takes a different code path from the above, so test it to make sure |
54 | 0 | // the UTF-16 enumeration remains in sync with the UTF-8 enumeration. |
55 | 0 | nsDependentCString expected((const char*)expected8); |
56 | 0 | EXPECT_EQ(0, CompareUTF8toUTF16(expected, backwardsPair16)); |
57 | 0 |
|
58 | 0 | free(backwardsPair8); |
59 | 0 | } |
60 | | |
61 | | TEST(Encoding, MalformedUTF16OrphanHighSurrogate) |
62 | 0 | { |
63 | 0 | // When this string is decoded, the high surrogate should be replaced and the |
64 | 0 | // rest of the string is specified by indexes 1 onward. |
65 | 0 | const char16_t highSurrogateData[] = { 0xD863, 0x74, 0x65, 0x78, 0x74, 0x0 }; |
66 | 0 | nsDependentString highSurrogate16(highSurrogateData); |
67 | 0 |
|
68 | 0 | uint32_t byteCount = 0; |
69 | 0 | char* highSurrogate8 = ToNewUTF8String(highSurrogate16, &byteCount); |
70 | 0 | EXPECT_TRUE(!!highSurrogate8); |
71 | 0 |
|
72 | 0 | EXPECT_EQ(byteCount, 7u); |
73 | 0 |
|
74 | 0 | const unsigned char expected8[] = |
75 | 0 | { 0xEF, 0xBF, 0xBD, 0x74, 0x65, 0x78, 0x74, 0x0 }; |
76 | 0 | EXPECT_EQ(0, memcmp(expected8, highSurrogate8, sizeof(expected8))); |
77 | 0 |
|
78 | 0 | // This takes a different code path from the above, so test it to make sure |
79 | 0 | // the UTF-16 enumeration remains in sync with the UTF-8 enumeration. |
80 | 0 | nsDependentCString expected((const char*)expected8); |
81 | 0 | EXPECT_EQ(0, CompareUTF8toUTF16(expected, highSurrogate16)); |
82 | 0 |
|
83 | 0 | free(highSurrogate8); |
84 | 0 | } |
85 | | |
86 | | TEST(Encoding, MalformedUTF16OrphanLowSurrogate) |
87 | 0 | { |
88 | 0 | // When this string is decoded, the low surrogate should be replaced and the |
89 | 0 | // rest of the string is specified by indexes 1 onward. |
90 | 0 | const char16_t lowSurrogateData[] = { 0xDDDD, 0x74, 0x65, 0x78, 0x74, 0x0 }; |
91 | 0 | nsDependentString lowSurrogate16(lowSurrogateData); |
92 | 0 |
|
93 | 0 | uint32_t byteCount = 0; |
94 | 0 | char* lowSurrogate8 = ToNewUTF8String(lowSurrogate16, &byteCount); |
95 | 0 | EXPECT_TRUE(!!lowSurrogate8); |
96 | 0 |
|
97 | 0 | EXPECT_EQ(byteCount, 7u); |
98 | 0 |
|
99 | 0 | const unsigned char expected8[] = |
100 | 0 | { 0xEF, 0xBF, 0xBD, 0x74, 0x65, 0x78, 0x74, 0x0 }; |
101 | 0 | EXPECT_EQ(0, memcmp(expected8, lowSurrogate8, sizeof(expected8))); |
102 | 0 |
|
103 | 0 | // This takes a different code path from the above, so test it to make sure |
104 | 0 | // the UTF-16 enumeration remains in sync with the UTF-8 enumeration. |
105 | 0 | nsDependentCString expected((const char*)expected8); |
106 | 0 | EXPECT_EQ(0, CompareUTF8toUTF16(expected, lowSurrogate16)); |
107 | 0 |
|
108 | 0 | free(lowSurrogate8); |
109 | 0 | } |