Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/browser/components/feeds/nsFeedSniffer.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
 * License, v. 2.0. If a copy of the MPL was not distributed with this
4
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
#include "nsFeedSniffer.h"
7
8
#include "mozilla/Preferences.h"
9
#include "mozilla/Unused.h"
10
11
#include "nsNetCID.h"
12
#include "nsXPCOM.h"
13
#include "nsCOMPtr.h"
14
#include "nsStringStream.h"
15
16
#include "nsBrowserCompsCID.h"
17
18
#include "nsICategoryManager.h"
19
#include "nsIServiceManager.h"
20
#include "nsComponentManagerUtils.h"
21
#include "nsServiceManagerUtils.h"
22
23
#include "nsIStreamConverterService.h"
24
#include "nsIStreamConverter.h"
25
26
#include "nsIStreamListener.h"
27
28
#include "nsIHttpChannel.h"
29
#include "nsIMIMEHeaderParam.h"
30
31
#include "nsMimeTypes.h"
32
#include "nsIURI.h"
33
#include <algorithm>
34
35
0
#define TYPE_ATOM "application/atom+xml"
36
0
#define TYPE_RSS "application/rss+xml"
37
0
#define TYPE_MAYBE_FEED "application/vnd.mozilla.maybe.feed"
38
39
#define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
40
#define NS_RSS "http://purl.org/rss/1.0/"
41
42
0
#define MAX_BYTES 512u
43
44
static bool sFramePrefCached = false;
45
static bool sFramingAllowed = false;
46
47
using namespace mozilla;
48
49
NS_IMPL_ISUPPORTS(nsFeedSniffer,
50
                  nsIContentSniffer,
51
                  nsIStreamListener,
52
                  nsIRequestObserver)
53
54
nsresult
55
nsFeedSniffer::ConvertEncodedData(nsIRequest* request,
56
                                  const uint8_t* data,
57
                                  uint32_t length)
58
0
{
59
0
  nsresult rv = NS_OK;
60
0
61
0
 mDecodedData = "";
62
0
 nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(request));
63
0
  if (!httpChannel)
64
0
    return NS_ERROR_NO_INTERFACE;
65
0
66
0
  nsAutoCString contentEncoding;
67
0
  mozilla::Unused << httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"),
68
0
                                                    contentEncoding);
69
0
  if (!contentEncoding.IsEmpty()) {
70
0
    nsCOMPtr<nsIStreamConverterService> converterService(do_GetService(NS_STREAMCONVERTERSERVICE_CONTRACTID));
71
0
    if (converterService) {
72
0
      ToLowerCase(contentEncoding);
73
0
74
0
      nsCOMPtr<nsIStreamListener> converter;
75
0
      rv = converterService->AsyncConvertData(contentEncoding.get(),
76
0
                                              "uncompressed", this, nullptr,
77
0
                                              getter_AddRefs(converter));
78
0
      NS_ENSURE_SUCCESS(rv, rv);
79
0
80
0
      converter->OnStartRequest(request, nullptr);
81
0
82
0
      nsCOMPtr<nsIStringInputStream> rawStream =
83
0
        do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID);
84
0
      if (!rawStream)
85
0
        return NS_ERROR_FAILURE;
86
0
87
0
      rv = rawStream->SetData((const char*)data, length);
88
0
      NS_ENSURE_SUCCESS(rv, rv);
89
0
90
0
      rv = converter->OnDataAvailable(request, nullptr, rawStream, 0, length);
91
0
      NS_ENSURE_SUCCESS(rv, rv);
92
0
93
0
      converter->OnStopRequest(request, nullptr, NS_OK);
94
0
    }
95
0
  }
96
0
  return rv;
97
0
}
98
99
template<int N>
100
static bool
101
StringBeginsWithLowercaseLiteral(nsAString& aString,
102
                                 const char (&aSubstring)[N])
103
{
104
  return StringHead(aString, N).LowerCaseEqualsLiteral(aSubstring);
105
}
106
107
bool
108
HasAttachmentDisposition(nsIHttpChannel* httpChannel)
109
0
{
110
0
  if (!httpChannel)
111
0
    return false;
112
0
113
0
  uint32_t disp;
114
0
  nsresult rv = httpChannel->GetContentDisposition(&disp);
115
0
116
0
  if (NS_SUCCEEDED(rv) && disp == nsIChannel::DISPOSITION_ATTACHMENT)
117
0
    return true;
118
0
119
0
  return false;
120
0
}
121
122
/**
123
 * @return the first occurrence of a character within a string buffer,
124
 *         or nullptr if not found
125
 */
126
static const char*
127
FindChar(char c, const char *begin, const char *end)
128
0
{
129
0
  for (; begin < end; ++begin) {
130
0
    if (*begin == c)
131
0
      return begin;
132
0
  }
133
0
  return nullptr;
134
0
}
135
136
/**
137
 *
138
 * Determine if a substring is the "documentElement" in the document.
139
 *
140
 * All of our sniffed substrings: <rss, <feed, <rdf:RDF must be the "document"
141
 * element within the XML DOM, i.e. the root container element. Otherwise,
142
 * it's possible that someone embedded one of these tags inside a document of
143
 * another type, e.g. a HTML document, and we don't want to show the preview
144
 * page if the document isn't actually a feed.
145
 *
146
 * @param   start
147
 *          The beginning of the data being sniffed
148
 * @param   end
149
 *          The end of the data being sniffed, right before the substring that
150
 *          was found.
151
 * @returns true if the found substring is the documentElement, false
152
 *          otherwise.
153
 */
154
static bool
155
IsDocumentElement(const char *start, const char* end)
156
0
{
157
0
  // For every tag in the buffer, check to see if it's a PI, Doctype or
158
0
  // comment, our desired substring or something invalid.
159
0
  while ( (start = FindChar('<', start, end)) ) {
160
0
    ++start;
161
0
    if (start >= end)
162
0
      return false;
163
0
164
0
    // Check to see if the character following the '<' is either '?' or '!'
165
0
    // (processing instruction or doctype or comment)... these are valid nodes
166
0
    // to have in the prologue.
167
0
    if (*start != '?' && *start != '!')
168
0
      return false;
169
0
170
0
    // Now advance the iterator until the '>' (We do this because we don't want
171
0
    // to sniff indicator substrings that are embedded within other nodes, e.g.
172
0
    // comments: <!-- <rdf:RDF .. > -->
173
0
    start = FindChar('>', start, end);
174
0
    if (!start)
175
0
      return false;
176
0
177
0
    ++start;
178
0
  }
179
0
  return true;
180
0
}
181
182
/**
183
 * Determines whether or not a string exists as the root element in an XML data
184
 * string buffer.
185
 * @param   dataString
186
 *          The data being sniffed
187
 * @param   substring
188
 *          The substring being tested for existence and root-ness.
189
 * @returns true if the substring exists and is the documentElement, false
190
 *          otherwise.
191
 */
192
static bool
193
ContainsTopLevelSubstring(nsACString& dataString, const char *substring)
194
0
{
195
0
  nsACString::const_iterator start, end;
196
0
  dataString.BeginReading(start);
197
0
  dataString.EndReading(end);
198
0
199
0
  if (!FindInReadable(nsCString(substring), start, end)){
200
0
    return false;
201
0
  }
202
0
203
0
  auto offset = start.get() - dataString.Data();
204
0
205
0
  const char *begin = dataString.BeginReading();
206
0
207
0
  // Only do the validation when we find the substring.
208
0
  return IsDocumentElement(begin, begin + offset);
209
0
}
210
211
NS_IMETHODIMP
212
nsFeedSniffer::GetMIMETypeFromContent(nsIRequest* request,
213
                                      const uint8_t* data,
214
                                      uint32_t length,
215
                                      nsACString& sniffedType)
216
0
{
217
0
  nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(request));
218
0
  if (!channel)
219
0
    return NS_ERROR_NO_INTERFACE;
220
0
221
0
  // Check that this is a GET request, since you can't subscribe to a POST...
222
0
  nsAutoCString method;
223
0
  mozilla::Unused << channel->GetRequestMethod(method);
224
0
  if (!method.EqualsLiteral("GET")) {
225
0
    sniffedType.Truncate();
226
0
    return NS_OK;
227
0
  }
228
0
229
0
  if (!sFramePrefCached) {
230
0
    sFramePrefCached = true;
231
0
    Preferences::AddBoolVarCache(&sFramingAllowed,
232
0
                                 "browser.feeds.unsafelyFrameFeeds");
233
0
  }
234
0
235
0
  if (!sFramingAllowed) {
236
0
    // Check that we're the toplevel frame:
237
0
    nsCOMPtr<nsILoadInfo> loadInfo = channel->GetLoadInfo();
238
0
    if (!loadInfo) {
239
0
      sniffedType.Truncate();
240
0
      return NS_OK;
241
0
    }
242
0
    auto frameID = loadInfo->GetFrameOuterWindowID();
243
0
    if (!frameID) {
244
0
      frameID = loadInfo->GetOuterWindowID();
245
0
    }
246
0
    if (loadInfo->GetTopOuterWindowID() != frameID) {
247
0
      sniffedType.Truncate();
248
0
      return NS_OK;
249
0
    }
250
0
  }
251
0
252
0
  // We need to find out if this is a load of a view-source document. In this
253
0
  // case we do not want to override the content type, since the source display
254
0
  // does not need to be converted from feed format to XUL. More importantly,
255
0
  // we don't want to change the content type from something
256
0
  // nsContentDLF::CreateInstance knows about (e.g. application/xml, text/html
257
0
  // etc) to something that only the application fe knows about (maybe.feed)
258
0
  // thus deactivating syntax highlighting.
259
0
  nsCOMPtr<nsIURI> originalURI;
260
0
  channel->GetOriginalURI(getter_AddRefs(originalURI));
261
0
262
0
  nsAutoCString scheme;
263
0
  originalURI->GetScheme(scheme);
264
0
  if (scheme.EqualsLiteral("view-source")) {
265
0
    sniffedType.Truncate();
266
0
    return NS_OK;
267
0
  }
268
0
269
0
  // Check the Content-Type to see if it is set correctly. If it is set to
270
0
  // something specific that we think is a reliable indication of a feed, don't
271
0
  // bother sniffing since we assume the site maintainer knows what they're
272
0
  // doing.
273
0
  nsAutoCString contentType;
274
0
  channel->GetContentType(contentType);
275
0
  bool noSniff = contentType.EqualsLiteral(TYPE_RSS) ||
276
0
                   contentType.EqualsLiteral(TYPE_ATOM);
277
0
278
0
  if (noSniff) {
279
0
    // check for an attachment after we have a likely feed.
280
0
    if(HasAttachmentDisposition(channel)) {
281
0
      sniffedType.Truncate();
282
0
      return NS_OK;
283
0
    }
284
0
285
0
    // set the feed header as a response header, since we have good metadata
286
0
    // telling us that the feed is supposed to be RSS or Atom
287
0
    mozilla::DebugOnly<nsresult> rv =
288
0
      channel->SetResponseHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"),
289
0
                                 NS_LITERAL_CSTRING("1"), false);
290
0
    MOZ_ASSERT(NS_SUCCEEDED(rv));
291
0
    sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
292
0
    return NS_OK;
293
0
  }
294
0
295
0
  // Don't sniff arbitrary types.  Limit sniffing to situations that
296
0
  // we think can reasonably arise.
297
0
  if (!contentType.EqualsLiteral(TEXT_HTML) &&
298
0
      !contentType.EqualsLiteral(APPLICATION_OCTET_STREAM) &&
299
0
      // Same criterion as XMLHttpRequest.  Should we be checking for "+xml"
300
0
      // and check for text/xml and application/xml by hand instead?
301
0
      contentType.Find("xml") == -1) {
302
0
    sniffedType.Truncate();
303
0
    return NS_OK;
304
0
  }
305
0
306
0
  // Now we need to potentially decompress data served with
307
0
  // Content-Encoding: gzip
308
0
  nsresult rv = ConvertEncodedData(request, data, length);
309
0
  if (NS_FAILED(rv))
310
0
    return rv;
311
0
312
0
  // We cap the number of bytes to scan at MAX_BYTES to prevent picking up
313
0
  // false positives by accidentally reading document content, e.g. a "how to
314
0
  // make a feed" page.
315
0
  const char* testData;
316
0
  if (mDecodedData.IsEmpty()) {
317
0
    testData = (const char*)data;
318
0
    length = std::min(length, MAX_BYTES);
319
0
  } else {
320
0
    testData = mDecodedData.get();
321
0
    length = std::min(mDecodedData.Length(), MAX_BYTES);
322
0
  }
323
0
324
0
  // The strategy here is based on that described in:
325
0
  // http://blogs.msdn.com/rssteam/articles/PublishersGuide.aspx
326
0
  // for interoperarbility purposes.
327
0
328
0
  // Thus begins the actual sniffing.
329
0
  nsDependentCSubstring dataString((const char*)testData, length);
330
0
331
0
  bool isFeed = false;
332
0
333
0
  // RSS 0.91/0.92/2.0
334
0
  isFeed = ContainsTopLevelSubstring(dataString, "<rss");
335
0
336
0
  // Atom 1.0
337
0
  if (!isFeed)
338
0
    isFeed = ContainsTopLevelSubstring(dataString, "<feed");
339
0
340
0
  // RSS 1.0
341
0
  if (!isFeed) {
342
0
    bool foundNS_RDF = FindInReadable(NS_LITERAL_CSTRING(NS_RDF), dataString);
343
0
    bool foundNS_RSS = FindInReadable(NS_LITERAL_CSTRING(NS_RSS), dataString);
344
0
    isFeed = ContainsTopLevelSubstring(dataString, "<rdf:RDF") &&
345
0
      foundNS_RDF && foundNS_RSS;
346
0
  }
347
0
348
0
  // If we sniffed a feed, coerce our internal type
349
0
  if (isFeed && !HasAttachmentDisposition(channel))
350
0
    sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
351
0
  else
352
0
    sniffedType.Truncate();
353
0
  return NS_OK;
354
0
}
355
356
NS_IMETHODIMP
357
nsFeedSniffer::OnStartRequest(nsIRequest* request, nsISupports* context)
358
0
{
359
0
  return NS_OK;
360
0
}
361
362
nsresult
363
nsFeedSniffer::AppendSegmentToString(nsIInputStream* inputStream,
364
                                     void* closure,
365
                                     const char* rawSegment,
366
                                     uint32_t toOffset,
367
                                     uint32_t count,
368
                                     uint32_t* writeCount)
369
0
{
370
0
  nsCString* decodedData = static_cast<nsCString*>(closure);
371
0
  decodedData->Append(rawSegment, count);
372
0
  *writeCount = count;
373
0
  return NS_OK;
374
0
}
375
376
NS_IMETHODIMP
377
nsFeedSniffer::OnDataAvailable(nsIRequest* request, nsISupports* context,
378
                               nsIInputStream* stream, uint64_t offset,
379
                               uint32_t count)
380
0
{
381
0
  uint32_t read;
382
0
  return stream->ReadSegments(AppendSegmentToString, &mDecodedData, count,
383
0
                              &read);
384
0
}
385
386
NS_IMETHODIMP
387
nsFeedSniffer::OnStopRequest(nsIRequest* request, nsISupports* context,
388
                             nsresult status)
389
0
{
390
0
  return NS_OK;
391
0
}