/src/mozilla-central/intl/uconv/nsTextToSubURI.cpp

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsString.h"
#include "nsITextToSubURI.h"
#include "nsEscape.h"
#include "nsTextToSubURI.h"
#include "nsCRT.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/Encoding.h"
#include "mozilla/Preferences.h"
#include "nsISupportsPrimitives.h"

using namespace mozilla;

// Fallback value for the pref "network.IDN.blacklist_chars".
// UnEscapeURIForUI allows unescaped space; other than that, this is
// the same as the default "network.IDN.blacklist_chars" value.
static const char16_t sNetworkIDNBlacklistChars[] =
{
/*0x0020,*/
          0x00A0, 0x00BC, 0x00BD, 0x00BE, 0x01C3, 0x02D0, 0x0337,
  0x0338, 0x0589, 0x058A, 0x05C3, 0x05F4, 0x0609, 0x060A, 0x066A, 0x06D4,
  0x0701, 0x0702, 0x0703, 0x0704, 0x115F, 0x1160, 0x1735, 0x2000,
  0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
  0x2009, 0x200A, 0x200B, 0x200E, 0x200F, 0x2010, 0x2019, 0x2024, 0x2027, 0x2028,
  0x2029, 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x202F, 0x2039,
  0x203A, 0x2041, 0x2044, 0x2052, 0x205F, 0x2153, 0x2154, 0x2155,
  0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D,
  0x215E, 0x215F, 0x2215, 0x2236, 0x23AE, 0x2571, 0x29F6, 0x29F8,
  0x2AFB, 0x2AFD, 0x2FF0, 0x2FF1, 0x2FF2, 0x2FF3, 0x2FF4, 0x2FF5,
  0x2FF6, 0x2FF7, 0x2FF8, 0x2FF9, 0x2FFA, 0x2FFB, /*0x3000,*/ 0x3002,
  0x3014, 0x3015, 0x3033, 0x30A0, 0x3164, 0x321D, 0x321E, 0x33AE, 0x33AF,
  0x33C6, 0x33DF, 0xA789, 0xFE14, 0xFE15, 0xFE3F, 0xFE5D, 0xFE5E,
  0xFEFF, 0xFF0E, 0xFF0F, 0xFF61, 0xFFA0, 0xFFF9, 0xFFFA, 0xFFFB,
  0xFFFC, 0xFFFD
};

nsTextToSubURI::~nsTextToSubURI()
{
}

NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)

NS_IMETHODIMP
nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset,
                                 const nsAString& aText,
                                 nsACString& aOut)
{
  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
  if (!encoding) {
    aOut.Truncate();
    return NS_ERROR_UCONV_NOCONV;
  }
  nsresult rv;
  const Encoding* actualEncoding;
  nsAutoCString intermediate;
  Tie(rv, actualEncoding) = encoding->Encode(aText, intermediate);
  Unused << actualEncoding;
  if (NS_FAILED(rv)) {
    aOut.Truncate();
    return rv;
  }
  bool ok = NS_Escape(intermediate, aOut, url_XPAlphas);
  if (!ok) {
    aOut.Truncate();
    return NS_ERROR_OUT_OF_MEMORY;
  }
  return NS_OK;
}

NS_IMETHODIMP
nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset,
                                   const nsACString& aText,
                                   nsAString& aOut)
{
  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
  if (!encoding) {
    aOut.Truncate();
    return NS_ERROR_UCONV_NOCONV;
  }
  nsAutoCString unescaped(aText);
  NS_UnescapeURL(unescaped);
  auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut);
  if (NS_SUCCEEDED(rv)) {
    return NS_OK;
  }
  return rv;
}

static bool statefulCharset(const char *charset)
{
  // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in
  // mozilla-central but keeping them here just in case for the benefit of
  // comm-central.
  if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
      !nsCRT::strcasecmp(charset, "UTF-7") ||
      !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
    return true;

  return false;
}

nsresult
nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset,
                                    const nsCString& aURI,
                                    nsAString& aOut)
{
  // check for 7bit encoding the data may not be ASCII after we decode
  bool isStatefulCharset = statefulCharset(aCharset.get());

  if (!isStatefulCharset) {
    if (IsASCII(aURI)) {
      CopyASCIItoUTF16(aURI, aOut);
      return NS_OK;
    }
    if (IsUTF8(aURI)) {
      CopyUTF8toUTF16(aURI, aOut);
      return NS_OK;
    }
  }

  // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
  NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);

  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
  if (!encoding) {
    aOut.Truncate();
    return NS_ERROR_UCONV_NOCONV;
  }
  return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut);
}

NS_IMETHODIMP  nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
                                                const nsACString &aURIFragment,
                                                nsAString &_retval)
{
  nsAutoCString unescapedSpec;
  // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
  NS_UnescapeURL(PromiseFlatCString(aURIFragment),
                 esc_SkipControl | esc_AlwaysCopy, unescapedSpec);

  // in case of failure, return escaped URI
  // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
  // sequences are also considered failure in this context
  if (convertURItoUnicode(
                PromiseFlatCString(aCharset), unescapedSpec, _retval)
      != NS_OK) {
    // assume UTF-8 instead of ASCII  because hostname (IDN) may be in UTF-8
    CopyUTF8toUTF16(aURIFragment, _retval);
  }

  // If there are any characters that are unsafe for URIs, reescape those.
  if (mUnsafeChars.IsEmpty()) {
    nsAutoString blacklist;
    nsresult rv = mozilla::Preferences::GetString("network.IDN.blacklist_chars",
                                                  blacklist);
    if (NS_SUCCEEDED(rv)) {
      // we allow SPACE and IDEOGRAPHIC SPACE in this method
      blacklist.StripChars(u" \u3000");
      mUnsafeChars.AppendElements(static_cast<const char16_t*>(blacklist.Data()),
                                  blacklist.Length());
    } else {
      NS_WARNING("Failed to get the 'network.IDN.blacklist_chars' preference");
    }
    // We check IsEmpty() intentionally here because an empty (or just spaces)
    // pref value is likely a mistake/error of some sort.
    if (mUnsafeChars.IsEmpty()) {
      mUnsafeChars.AppendElements(sNetworkIDNBlacklistChars,
                                  mozilla::ArrayLength(sNetworkIDNBlacklistChars));
    }
    mUnsafeChars.Sort();
  }
  const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval);
  nsString reescapedSpec;
  _retval = NS_EscapeURL(unescapedResult, mUnsafeChars, reescapedSpec);

  return NS_OK;
}

NS_IMETHODIMP
nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset,
                                    const nsACString& aURIFragment,
                                    nsAString& _retval)
{
  nsAutoCString unescapedSpec;
  NS_UnescapeURL(PromiseFlatCString(aURIFragment),
                 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
  // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
  // superset since converting "http:" with such an encoding is always a bad
  // idea.
  if (!IsUTF8(unescapedSpec) &&
      (aCharset.LowerCaseEqualsLiteral("utf-16") ||
       aCharset.LowerCaseEqualsLiteral("utf-16be") ||
       aCharset.LowerCaseEqualsLiteral("utf-16le") ||
       aCharset.LowerCaseEqualsLiteral("utf-7") ||
       aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
    CopyASCIItoUTF16(aURIFragment, _retval);
    return NS_OK;
  }

  nsresult rv = convertURItoUnicode(PromiseFlatCString(aCharset),
                                    unescapedSpec, _retval);
  // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error
  // if the string ends with a valid (but incomplete) sequence.
  return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv;
}

//----------------------------------------------------------------------

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -- */
2		/* This Source Code Form is subject to the terms of the Mozilla Public
3		* License, v. 2.0. If a copy of the MPL was not distributed with this
4		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5		#include "nsString.h"
6		#include "nsITextToSubURI.h"
7		#include "nsEscape.h"
8		#include "nsTextToSubURI.h"
9		#include "nsCRT.h"
10		#include "mozilla/ArrayUtils.h"
11		#include "mozilla/Encoding.h"
12		#include "mozilla/Preferences.h"
13		#include "nsISupportsPrimitives.h"
14
15		using namespace mozilla;
16
17		// Fallback value for the pref "network.IDN.blacklist_chars".
18		// UnEscapeURIForUI allows unescaped space; other than that, this is
19		// the same as the default "network.IDN.blacklist_chars" value.
20		static const char16_t sNetworkIDNBlacklistChars[] =
21		{
22		/0x0020,/
23		0x00A0, 0x00BC, 0x00BD, 0x00BE, 0x01C3, 0x02D0, 0x0337,
24		0x0338, 0x0589, 0x058A, 0x05C3, 0x05F4, 0x0609, 0x060A, 0x066A, 0x06D4,
25		0x0701, 0x0702, 0x0703, 0x0704, 0x115F, 0x1160, 0x1735, 0x2000,
26		0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
27		0x2009, 0x200A, 0x200B, 0x200E, 0x200F, 0x2010, 0x2019, 0x2024, 0x2027, 0x2028,
28		0x2029, 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x202F, 0x2039,
29		0x203A, 0x2041, 0x2044, 0x2052, 0x205F, 0x2153, 0x2154, 0x2155,
30		0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D,
31		0x215E, 0x215F, 0x2215, 0x2236, 0x23AE, 0x2571, 0x29F6, 0x29F8,
32		0x2AFB, 0x2AFD, 0x2FF0, 0x2FF1, 0x2FF2, 0x2FF3, 0x2FF4, 0x2FF5,
33		0x2FF6, 0x2FF7, 0x2FF8, 0x2FF9, 0x2FFA, 0x2FFB, /0x3000,/ 0x3002,
34		0x3014, 0x3015, 0x3033, 0x30A0, 0x3164, 0x321D, 0x321E, 0x33AE, 0x33AF,
35		0x33C6, 0x33DF, 0xA789, 0xFE14, 0xFE15, 0xFE3F, 0xFE5D, 0xFE5E,
36		0xFEFF, 0xFF0E, 0xFF0F, 0xFF61, 0xFFA0, 0xFFF9, 0xFFFA, 0xFFFB,
37		0xFFFC, 0xFFFD
38		};
39
40		nsTextToSubURI::~nsTextToSubURI()
41	0	{
42	0	}
43
44		NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
45
46		NS_IMETHODIMP
47		nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset,
48		const nsAString& aText,
49		nsACString& aOut)
50	0	{
51	0	auto encoding = Encoding::ForLabelNoReplacement(aCharset);
52	0	if (!encoding) {
53	0	aOut.Truncate();
54	0	return NS_ERROR_UCONV_NOCONV;
55	0	}
56	0	nsresult rv;
57	0	const Encoding* actualEncoding;
58	0	nsAutoCString intermediate;
59	0	Tie(rv, actualEncoding) = encoding->Encode(aText, intermediate);
60	0	Unused << actualEncoding;
61	0	if (NS_FAILED(rv)) {
62	0	aOut.Truncate();
63	0	return rv;
64	0	}
65	0	bool ok = NS_Escape(intermediate, aOut, url_XPAlphas);
66	0	if (!ok) {
67	0	aOut.Truncate();
68	0	return NS_ERROR_OUT_OF_MEMORY;
69	0	}
70	0	return NS_OK;
71	0	}
72
73		NS_IMETHODIMP
74		nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset,
75		const nsACString& aText,
76		nsAString& aOut)
77	0	{
78	0	auto encoding = Encoding::ForLabelNoReplacement(aCharset);
79	0	if (!encoding) {
80	0	aOut.Truncate();
81	0	return NS_ERROR_UCONV_NOCONV;
82	0	}
83	0	nsAutoCString unescaped(aText);
84	0	NS_UnescapeURL(unescaped);
85	0	auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut);
86	0	if (NS_SUCCEEDED(rv)) {
87	0	return NS_OK;
88	0	}
89	0	return rv;
90	0	}
91
92		static bool statefulCharset(const char *charset)
93	1.79k	{
94	1.79k	// HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in
95	1.79k	// mozilla-central but keeping them here just in case for the benefit of
96	1.79k	// comm-central.
97	1.79k	if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) \|\|
98	1.79k	!nsCRT::strcasecmp(charset, "UTF-7") \|\|
99	1.79k	!nsCRT::strcasecmp(charset, "HZ-GB-2312"))
100	0	return true;
101	1.79k
102	1.79k	return false;
103	1.79k	}
104
105		nsresult
106		nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset,
107		const nsCString& aURI,
108		nsAString& aOut)
109	1.79k	{
110	1.79k	// check for 7bit encoding the data may not be ASCII after we decode
111	1.79k	bool isStatefulCharset = statefulCharset(aCharset.get());
112	1.79k
113	1.79k	if (!isStatefulCharset) {
114	1.79k	if (IsASCII(aURI)) {
115	496	CopyASCIItoUTF16(aURI, aOut);
116	496	return NS_OK;
117	496	}
118	1.30k	if (IsUTF8(aURI)) {
119	272	CopyUTF8toUTF16(aURI, aOut);
120	272	return NS_OK;
121	272	}
122	1.03k	}
123	1.03k
124	1.03k	// empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
125	1.03k	NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
126	1.03k
127	1.03k	auto encoding = Encoding::ForLabelNoReplacement(aCharset);
128	0	if (!encoding) {
129	0	aOut.Truncate();
130	0	return NS_ERROR_UCONV_NOCONV;
131	0	}
132	0	return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut);
133	0	}
134
135		NS_IMETHODIMP nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
136		const nsACString &aURIFragment,
137		nsAString &_retval)
138	0	{
139	0	nsAutoCString unescapedSpec;
140	0	// skip control octets (0x00 - 0x1f and 0x7f) when unescaping
141	0	NS_UnescapeURL(PromiseFlatCString(aURIFragment),
142	0	esc_SkipControl \| esc_AlwaysCopy, unescapedSpec);
143	0
144	0	// in case of failure, return escaped URI
145	0	// Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
146	0	// sequences are also considered failure in this context
147	0	if (convertURItoUnicode(
148	0	PromiseFlatCString(aCharset), unescapedSpec, _retval)
149	0	!= NS_OK) {
150	0	// assume UTF-8 instead of ASCII because hostname (IDN) may be in UTF-8
151	0	CopyUTF8toUTF16(aURIFragment, _retval);
152	0	}
153	0
154	0	// If there are any characters that are unsafe for URIs, reescape those.
155	0	if (mUnsafeChars.IsEmpty()) {
156	0	nsAutoString blacklist;
157	0	nsresult rv = mozilla::Preferences::GetString("network.IDN.blacklist_chars",
158	0	blacklist);
159	0	if (NS_SUCCEEDED(rv)) {
160	0	// we allow SPACE and IDEOGRAPHIC SPACE in this method
161	0	blacklist.StripChars(u" \u3000");
162	0	mUnsafeChars.AppendElements(static_cast<const char16_t*>(blacklist.Data()),
163	0	blacklist.Length());
164	0	} else {
165	0	NS_WARNING("Failed to get the 'network.IDN.blacklist_chars' preference");
166	0	}
167	0	// We check IsEmpty() intentionally here because an empty (or just spaces)
168	0	// pref value is likely a mistake/error of some sort.
169	0	if (mUnsafeChars.IsEmpty()) {
170	0	mUnsafeChars.AppendElements(sNetworkIDNBlacklistChars,
171	0	mozilla::ArrayLength(sNetworkIDNBlacklistChars));
172	0	}
173	0	mUnsafeChars.Sort();
174	0	}
175	0	const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval);
176	0	nsString reescapedSpec;
177	0	_retval = NS_EscapeURL(unescapedResult, mUnsafeChars, reescapedSpec);
178	0
179	0	return NS_OK;
180	0	}
181
182		NS_IMETHODIMP
183		nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset,
184		const nsACString& aURIFragment,
185		nsAString& _retval)
186	1.79k	{
187	1.79k	nsAutoCString unescapedSpec;
188	1.79k	NS_UnescapeURL(PromiseFlatCString(aURIFragment),
189	1.79k	esc_AlwaysCopy \| esc_OnlyNonASCII, unescapedSpec);
190	1.79k	// leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
191	1.79k	// superset since converting "http:" with such an encoding is always a bad
192	1.79k	// idea.
193	1.79k	if (!IsUTF8(unescapedSpec) &&
194	1.79k	(aCharset.LowerCaseEqualsLiteral("utf-16") \|\|
195	1.03k	aCharset.LowerCaseEqualsLiteral("utf-16be") \|\|
196	1.03k	aCharset.LowerCaseEqualsLiteral("utf-16le") \|\|
197	1.03k	aCharset.LowerCaseEqualsLiteral("utf-7") \|\|
198	1.03k	aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
199	0	CopyASCIItoUTF16(aURIFragment, _retval);
200	0	return NS_OK;
201	0	}
202	1.79k
203	1.79k	nsresult rv = convertURItoUnicode(PromiseFlatCString(aCharset),
204	1.79k	unescapedSpec, _retval);
205	1.79k	// NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error
206	1.79k	// if the string ends with a valid (but incomplete) sequence.
207	1.79k	return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv;
208	1.79k	}
209
210		//----------------------------------------------------------------------