/src/mozilla-central/intl/uconv/nsTextToSubURI.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | #include "nsString.h" |
6 | | #include "nsITextToSubURI.h" |
7 | | #include "nsEscape.h" |
8 | | #include "nsTextToSubURI.h" |
9 | | #include "nsCRT.h" |
10 | | #include "mozilla/ArrayUtils.h" |
11 | | #include "mozilla/Encoding.h" |
12 | | #include "mozilla/Preferences.h" |
13 | | #include "nsISupportsPrimitives.h" |
14 | | |
15 | | using namespace mozilla; |
16 | | |
17 | | // Fallback value for the pref "network.IDN.blacklist_chars". |
18 | | // UnEscapeURIForUI allows unescaped space; other than that, this is |
19 | | // the same as the default "network.IDN.blacklist_chars" value. |
20 | | static const char16_t sNetworkIDNBlacklistChars[] = |
21 | | { |
22 | | /*0x0020,*/ |
23 | | 0x00A0, 0x00BC, 0x00BD, 0x00BE, 0x01C3, 0x02D0, 0x0337, |
24 | | 0x0338, 0x0589, 0x058A, 0x05C3, 0x05F4, 0x0609, 0x060A, 0x066A, 0x06D4, |
25 | | 0x0701, 0x0702, 0x0703, 0x0704, 0x115F, 0x1160, 0x1735, 0x2000, |
26 | | 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008, |
27 | | 0x2009, 0x200A, 0x200B, 0x200E, 0x200F, 0x2010, 0x2019, 0x2024, 0x2027, 0x2028, |
28 | | 0x2029, 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x202F, 0x2039, |
29 | | 0x203A, 0x2041, 0x2044, 0x2052, 0x205F, 0x2153, 0x2154, 0x2155, |
30 | | 0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D, |
31 | | 0x215E, 0x215F, 0x2215, 0x2236, 0x23AE, 0x2571, 0x29F6, 0x29F8, |
32 | | 0x2AFB, 0x2AFD, 0x2FF0, 0x2FF1, 0x2FF2, 0x2FF3, 0x2FF4, 0x2FF5, |
33 | | 0x2FF6, 0x2FF7, 0x2FF8, 0x2FF9, 0x2FFA, 0x2FFB, /*0x3000,*/ 0x3002, |
34 | | 0x3014, 0x3015, 0x3033, 0x30A0, 0x3164, 0x321D, 0x321E, 0x33AE, 0x33AF, |
35 | | 0x33C6, 0x33DF, 0xA789, 0xFE14, 0xFE15, 0xFE3F, 0xFE5D, 0xFE5E, |
36 | | 0xFEFF, 0xFF0E, 0xFF0F, 0xFF61, 0xFFA0, 0xFFF9, 0xFFFA, 0xFFFB, |
37 | | 0xFFFC, 0xFFFD |
38 | | }; |
39 | | |
40 | | nsTextToSubURI::~nsTextToSubURI() |
41 | 0 | { |
42 | 0 | } |
43 | | |
44 | | NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI) |
45 | | |
46 | | NS_IMETHODIMP |
47 | | nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset, |
48 | | const nsAString& aText, |
49 | | nsACString& aOut) |
50 | 0 | { |
51 | 0 | auto encoding = Encoding::ForLabelNoReplacement(aCharset); |
52 | 0 | if (!encoding) { |
53 | 0 | aOut.Truncate(); |
54 | 0 | return NS_ERROR_UCONV_NOCONV; |
55 | 0 | } |
56 | 0 | nsresult rv; |
57 | 0 | const Encoding* actualEncoding; |
58 | 0 | nsAutoCString intermediate; |
59 | 0 | Tie(rv, actualEncoding) = encoding->Encode(aText, intermediate); |
60 | 0 | Unused << actualEncoding; |
61 | 0 | if (NS_FAILED(rv)) { |
62 | 0 | aOut.Truncate(); |
63 | 0 | return rv; |
64 | 0 | } |
65 | 0 | bool ok = NS_Escape(intermediate, aOut, url_XPAlphas); |
66 | 0 | if (!ok) { |
67 | 0 | aOut.Truncate(); |
68 | 0 | return NS_ERROR_OUT_OF_MEMORY; |
69 | 0 | } |
70 | 0 | return NS_OK; |
71 | 0 | } |
72 | | |
73 | | NS_IMETHODIMP |
74 | | nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset, |
75 | | const nsACString& aText, |
76 | | nsAString& aOut) |
77 | 0 | { |
78 | 0 | auto encoding = Encoding::ForLabelNoReplacement(aCharset); |
79 | 0 | if (!encoding) { |
80 | 0 | aOut.Truncate(); |
81 | 0 | return NS_ERROR_UCONV_NOCONV; |
82 | 0 | } |
83 | 0 | nsAutoCString unescaped(aText); |
84 | 0 | NS_UnescapeURL(unescaped); |
85 | 0 | auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut); |
86 | 0 | if (NS_SUCCEEDED(rv)) { |
87 | 0 | return NS_OK; |
88 | 0 | } |
89 | 0 | return rv; |
90 | 0 | } |
91 | | |
92 | | static bool statefulCharset(const char *charset) |
93 | 1.79k | { |
94 | 1.79k | // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in |
95 | 1.79k | // mozilla-central but keeping them here just in case for the benefit of |
96 | 1.79k | // comm-central. |
97 | 1.79k | if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) || |
98 | 1.79k | !nsCRT::strcasecmp(charset, "UTF-7") || |
99 | 1.79k | !nsCRT::strcasecmp(charset, "HZ-GB-2312")) |
100 | 0 | return true; |
101 | 1.79k | |
102 | 1.79k | return false; |
103 | 1.79k | } |
104 | | |
105 | | nsresult |
106 | | nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset, |
107 | | const nsCString& aURI, |
108 | | nsAString& aOut) |
109 | 1.79k | { |
110 | 1.79k | // check for 7bit encoding the data may not be ASCII after we decode |
111 | 1.79k | bool isStatefulCharset = statefulCharset(aCharset.get()); |
112 | 1.79k | |
113 | 1.79k | if (!isStatefulCharset) { |
114 | 1.79k | if (IsASCII(aURI)) { |
115 | 496 | CopyASCIItoUTF16(aURI, aOut); |
116 | 496 | return NS_OK; |
117 | 496 | } |
118 | 1.30k | if (IsUTF8(aURI)) { |
119 | 272 | CopyUTF8toUTF16(aURI, aOut); |
120 | 272 | return NS_OK; |
121 | 272 | } |
122 | 1.03k | } |
123 | 1.03k | |
124 | 1.03k | // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8. |
125 | 1.03k | NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG); |
126 | 1.03k | |
127 | 1.03k | auto encoding = Encoding::ForLabelNoReplacement(aCharset); |
128 | 0 | if (!encoding) { |
129 | 0 | aOut.Truncate(); |
130 | 0 | return NS_ERROR_UCONV_NOCONV; |
131 | 0 | } |
132 | 0 | return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut); |
133 | 0 | } |
134 | | |
135 | | NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset, |
136 | | const nsACString &aURIFragment, |
137 | | nsAString &_retval) |
138 | 0 | { |
139 | 0 | nsAutoCString unescapedSpec; |
140 | 0 | // skip control octets (0x00 - 0x1f and 0x7f) when unescaping |
141 | 0 | NS_UnescapeURL(PromiseFlatCString(aURIFragment), |
142 | 0 | esc_SkipControl | esc_AlwaysCopy, unescapedSpec); |
143 | 0 |
|
144 | 0 | // in case of failure, return escaped URI |
145 | 0 | // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte |
146 | 0 | // sequences are also considered failure in this context |
147 | 0 | if (convertURItoUnicode( |
148 | 0 | PromiseFlatCString(aCharset), unescapedSpec, _retval) |
149 | 0 | != NS_OK) { |
150 | 0 | // assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8 |
151 | 0 | CopyUTF8toUTF16(aURIFragment, _retval); |
152 | 0 | } |
153 | 0 |
|
154 | 0 | // If there are any characters that are unsafe for URIs, reescape those. |
155 | 0 | if (mUnsafeChars.IsEmpty()) { |
156 | 0 | nsAutoString blacklist; |
157 | 0 | nsresult rv = mozilla::Preferences::GetString("network.IDN.blacklist_chars", |
158 | 0 | blacklist); |
159 | 0 | if (NS_SUCCEEDED(rv)) { |
160 | 0 | // we allow SPACE and IDEOGRAPHIC SPACE in this method |
161 | 0 | blacklist.StripChars(u" \u3000"); |
162 | 0 | mUnsafeChars.AppendElements(static_cast<const char16_t*>(blacklist.Data()), |
163 | 0 | blacklist.Length()); |
164 | 0 | } else { |
165 | 0 | NS_WARNING("Failed to get the 'network.IDN.blacklist_chars' preference"); |
166 | 0 | } |
167 | 0 | // We check IsEmpty() intentionally here because an empty (or just spaces) |
168 | 0 | // pref value is likely a mistake/error of some sort. |
169 | 0 | if (mUnsafeChars.IsEmpty()) { |
170 | 0 | mUnsafeChars.AppendElements(sNetworkIDNBlacklistChars, |
171 | 0 | mozilla::ArrayLength(sNetworkIDNBlacklistChars)); |
172 | 0 | } |
173 | 0 | mUnsafeChars.Sort(); |
174 | 0 | } |
175 | 0 | const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval); |
176 | 0 | nsString reescapedSpec; |
177 | 0 | _retval = NS_EscapeURL(unescapedResult, mUnsafeChars, reescapedSpec); |
178 | 0 |
|
179 | 0 | return NS_OK; |
180 | 0 | } |
181 | | |
182 | | NS_IMETHODIMP |
183 | | nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset, |
184 | | const nsACString& aURIFragment, |
185 | | nsAString& _retval) |
186 | 1.79k | { |
187 | 1.79k | nsAutoCString unescapedSpec; |
188 | 1.79k | NS_UnescapeURL(PromiseFlatCString(aURIFragment), |
189 | 1.79k | esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec); |
190 | 1.79k | // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII |
191 | 1.79k | // superset since converting "http:" with such an encoding is always a bad |
192 | 1.79k | // idea. |
193 | 1.79k | if (!IsUTF8(unescapedSpec) && |
194 | 1.79k | (aCharset.LowerCaseEqualsLiteral("utf-16") || |
195 | 1.03k | aCharset.LowerCaseEqualsLiteral("utf-16be") || |
196 | 1.03k | aCharset.LowerCaseEqualsLiteral("utf-16le") || |
197 | 1.03k | aCharset.LowerCaseEqualsLiteral("utf-7") || |
198 | 1.03k | aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){ |
199 | 0 | CopyASCIItoUTF16(aURIFragment, _retval); |
200 | 0 | return NS_OK; |
201 | 0 | } |
202 | 1.79k | |
203 | 1.79k | nsresult rv = convertURItoUnicode(PromiseFlatCString(aCharset), |
204 | 1.79k | unescapedSpec, _retval); |
205 | 1.79k | // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error |
206 | 1.79k | // if the string ends with a valid (but incomplete) sequence. |
207 | 1.79k | return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv; |
208 | 1.79k | } |
209 | | |
210 | | //---------------------------------------------------------------------- |