Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/intl/uconv/nsTextToSubURI.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
 * License, v. 2.0. If a copy of the MPL was not distributed with this
4
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
#include "nsString.h"
6
#include "nsITextToSubURI.h"
7
#include "nsEscape.h"
8
#include "nsTextToSubURI.h"
9
#include "nsCRT.h"
10
#include "mozilla/ArrayUtils.h"
11
#include "mozilla/Encoding.h"
12
#include "mozilla/Preferences.h"
13
#include "nsISupportsPrimitives.h"
14
15
using namespace mozilla;
16
17
// Fallback value for the pref "network.IDN.blacklist_chars".
18
// UnEscapeURIForUI allows unescaped space; other than that, this is
19
// the same as the default "network.IDN.blacklist_chars" value.
20
static const char16_t sNetworkIDNBlacklistChars[] =
21
{
22
/*0x0020,*/
23
          0x00A0, 0x00BC, 0x00BD, 0x00BE, 0x01C3, 0x02D0, 0x0337,
24
  0x0338, 0x0589, 0x058A, 0x05C3, 0x05F4, 0x0609, 0x060A, 0x066A, 0x06D4,
25
  0x0701, 0x0702, 0x0703, 0x0704, 0x115F, 0x1160, 0x1735, 0x2000,
26
  0x2001, 0x2002, 0x2003, 0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
27
  0x2009, 0x200A, 0x200B, 0x200E, 0x200F, 0x2010, 0x2019, 0x2024, 0x2027, 0x2028,
28
  0x2029, 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x202F, 0x2039,
29
  0x203A, 0x2041, 0x2044, 0x2052, 0x205F, 0x2153, 0x2154, 0x2155,
30
  0x2156, 0x2157, 0x2158, 0x2159, 0x215A, 0x215B, 0x215C, 0x215D,
31
  0x215E, 0x215F, 0x2215, 0x2236, 0x23AE, 0x2571, 0x29F6, 0x29F8,
32
  0x2AFB, 0x2AFD, 0x2FF0, 0x2FF1, 0x2FF2, 0x2FF3, 0x2FF4, 0x2FF5,
33
  0x2FF6, 0x2FF7, 0x2FF8, 0x2FF9, 0x2FFA, 0x2FFB, /*0x3000,*/ 0x3002,
34
  0x3014, 0x3015, 0x3033, 0x30A0, 0x3164, 0x321D, 0x321E, 0x33AE, 0x33AF,
35
  0x33C6, 0x33DF, 0xA789, 0xFE14, 0xFE15, 0xFE3F, 0xFE5D, 0xFE5E,
36
  0xFEFF, 0xFF0E, 0xFF0F, 0xFF61, 0xFFA0, 0xFFF9, 0xFFFA, 0xFFFB,
37
  0xFFFC, 0xFFFD
38
};
39
40
nsTextToSubURI::~nsTextToSubURI()
41
0
{
42
0
}
43
44
NS_IMPL_ISUPPORTS(nsTextToSubURI, nsITextToSubURI)
45
46
NS_IMETHODIMP
47
nsTextToSubURI::ConvertAndEscape(const nsACString& aCharset,
48
                                 const nsAString& aText,
49
                                 nsACString& aOut)
50
0
{
51
0
  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
52
0
  if (!encoding) {
53
0
    aOut.Truncate();
54
0
    return NS_ERROR_UCONV_NOCONV;
55
0
  }
56
0
  nsresult rv;
57
0
  const Encoding* actualEncoding;
58
0
  nsAutoCString intermediate;
59
0
  Tie(rv, actualEncoding) = encoding->Encode(aText, intermediate);
60
0
  Unused << actualEncoding;
61
0
  if (NS_FAILED(rv)) {
62
0
    aOut.Truncate();
63
0
    return rv;
64
0
  }
65
0
  bool ok = NS_Escape(intermediate, aOut, url_XPAlphas);
66
0
  if (!ok) {
67
0
    aOut.Truncate();
68
0
    return NS_ERROR_OUT_OF_MEMORY;
69
0
  }
70
0
  return NS_OK;
71
0
}
72
73
NS_IMETHODIMP
74
nsTextToSubURI::UnEscapeAndConvert(const nsACString& aCharset,
75
                                   const nsACString& aText,
76
                                   nsAString& aOut)
77
0
{
78
0
  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
79
0
  if (!encoding) {
80
0
    aOut.Truncate();
81
0
    return NS_ERROR_UCONV_NOCONV;
82
0
  }
83
0
  nsAutoCString unescaped(aText);
84
0
  NS_UnescapeURL(unescaped);
85
0
  auto rv = encoding->DecodeWithoutBOMHandling(unescaped, aOut);
86
0
  if (NS_SUCCEEDED(rv)) {
87
0
    return NS_OK;
88
0
  }
89
0
  return rv;
90
0
}
91
92
static bool statefulCharset(const char *charset)
93
1.79k
{
94
1.79k
  // HZ, UTF-7 and the CN and KR ISO-2022 variants are no longer in
95
1.79k
  // mozilla-central but keeping them here just in case for the benefit of
96
1.79k
  // comm-central.
97
1.79k
  if (!nsCRT::strncasecmp(charset, "ISO-2022-", sizeof("ISO-2022-")-1) ||
98
1.79k
      !nsCRT::strcasecmp(charset, "UTF-7") ||
99
1.79k
      !nsCRT::strcasecmp(charset, "HZ-GB-2312"))
100
0
    return true;
101
1.79k
102
1.79k
  return false;
103
1.79k
}
104
105
nsresult
106
nsTextToSubURI::convertURItoUnicode(const nsCString& aCharset,
107
                                    const nsCString& aURI,
108
                                    nsAString& aOut)
109
1.79k
{
110
1.79k
  // check for 7bit encoding the data may not be ASCII after we decode
111
1.79k
  bool isStatefulCharset = statefulCharset(aCharset.get());
112
1.79k
113
1.79k
  if (!isStatefulCharset) {
114
1.79k
    if (IsASCII(aURI)) {
115
496
      CopyASCIItoUTF16(aURI, aOut);
116
496
      return NS_OK;
117
496
    }
118
1.30k
    if (IsUTF8(aURI)) {
119
272
      CopyUTF8toUTF16(aURI, aOut);
120
272
      return NS_OK;
121
272
    }
122
1.03k
  }
123
1.03k
124
1.03k
  // empty charset could indicate UTF-8, but aURI turns out not to be UTF-8.
125
1.03k
  NS_ENSURE_FALSE(aCharset.IsEmpty(), NS_ERROR_INVALID_ARG);
126
1.03k
127
1.03k
  auto encoding = Encoding::ForLabelNoReplacement(aCharset);
128
0
  if (!encoding) {
129
0
    aOut.Truncate();
130
0
    return NS_ERROR_UCONV_NOCONV;
131
0
  }
132
0
  return encoding->DecodeWithoutBOMHandlingAndWithoutReplacement(aURI, aOut);
133
0
}
134
135
NS_IMETHODIMP  nsTextToSubURI::UnEscapeURIForUI(const nsACString & aCharset,
136
                                                const nsACString &aURIFragment,
137
                                                nsAString &_retval)
138
0
{
139
0
  nsAutoCString unescapedSpec;
140
0
  // skip control octets (0x00 - 0x1f and 0x7f) when unescaping
141
0
  NS_UnescapeURL(PromiseFlatCString(aURIFragment),
142
0
                 esc_SkipControl | esc_AlwaysCopy, unescapedSpec);
143
0
144
0
  // in case of failure, return escaped URI
145
0
  // Test for != NS_OK rather than NS_FAILED, because incomplete multi-byte
146
0
  // sequences are also considered failure in this context
147
0
  if (convertURItoUnicode(
148
0
                PromiseFlatCString(aCharset), unescapedSpec, _retval)
149
0
      != NS_OK) {
150
0
    // assume UTF-8 instead of ASCII  because hostname (IDN) may be in UTF-8
151
0
    CopyUTF8toUTF16(aURIFragment, _retval);
152
0
  }
153
0
154
0
  // If there are any characters that are unsafe for URIs, reescape those.
155
0
  if (mUnsafeChars.IsEmpty()) {
156
0
    nsAutoString blacklist;
157
0
    nsresult rv = mozilla::Preferences::GetString("network.IDN.blacklist_chars",
158
0
                                                  blacklist);
159
0
    if (NS_SUCCEEDED(rv)) {
160
0
      // we allow SPACE and IDEOGRAPHIC SPACE in this method
161
0
      blacklist.StripChars(u" \u3000");
162
0
      mUnsafeChars.AppendElements(static_cast<const char16_t*>(blacklist.Data()),
163
0
                                  blacklist.Length());
164
0
    } else {
165
0
      NS_WARNING("Failed to get the 'network.IDN.blacklist_chars' preference");
166
0
    }
167
0
    // We check IsEmpty() intentionally here because an empty (or just spaces)
168
0
    // pref value is likely a mistake/error of some sort.
169
0
    if (mUnsafeChars.IsEmpty()) {
170
0
      mUnsafeChars.AppendElements(sNetworkIDNBlacklistChars,
171
0
                                  mozilla::ArrayLength(sNetworkIDNBlacklistChars));
172
0
    }
173
0
    mUnsafeChars.Sort();
174
0
  }
175
0
  const nsPromiseFlatString& unescapedResult = PromiseFlatString(_retval);
176
0
  nsString reescapedSpec;
177
0
  _retval = NS_EscapeURL(unescapedResult, mUnsafeChars, reescapedSpec);
178
0
179
0
  return NS_OK;
180
0
}
181
182
NS_IMETHODIMP
183
nsTextToSubURI::UnEscapeNonAsciiURI(const nsACString& aCharset,
184
                                    const nsACString& aURIFragment,
185
                                    nsAString& _retval)
186
1.79k
{
187
1.79k
  nsAutoCString unescapedSpec;
188
1.79k
  NS_UnescapeURL(PromiseFlatCString(aURIFragment),
189
1.79k
                 esc_AlwaysCopy | esc_OnlyNonASCII, unescapedSpec);
190
1.79k
  // leave the URI as it is if it's not UTF-8 and aCharset is not a ASCII
191
1.79k
  // superset since converting "http:" with such an encoding is always a bad
192
1.79k
  // idea.
193
1.79k
  if (!IsUTF8(unescapedSpec) &&
194
1.79k
      (aCharset.LowerCaseEqualsLiteral("utf-16") ||
195
1.03k
       aCharset.LowerCaseEqualsLiteral("utf-16be") ||
196
1.03k
       aCharset.LowerCaseEqualsLiteral("utf-16le") ||
197
1.03k
       aCharset.LowerCaseEqualsLiteral("utf-7") ||
198
1.03k
       aCharset.LowerCaseEqualsLiteral("x-imap4-modified-utf7"))){
199
0
    CopyASCIItoUTF16(aURIFragment, _retval);
200
0
    return NS_OK;
201
0
  }
202
1.79k
203
1.79k
  nsresult rv = convertURItoUnicode(PromiseFlatCString(aCharset),
204
1.79k
                                    unescapedSpec, _retval);
205
1.79k
  // NS_OK_UDEC_MOREINPUT is a success code, so caller can't catch the error
206
1.79k
  // if the string ends with a valid (but incomplete) sequence.
207
1.79k
  return rv == NS_OK_UDEC_MOREINPUT ? NS_ERROR_UDEC_ILLEGALINPUT : rv;
208
1.79k
}
209
210
//----------------------------------------------------------------------