/src/mozilla-central/browser/components/feeds/nsFeedSniffer.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | #include "nsFeedSniffer.h" |
7 | | |
8 | | #include "mozilla/Preferences.h" |
9 | | #include "mozilla/Unused.h" |
10 | | |
11 | | #include "nsNetCID.h" |
12 | | #include "nsXPCOM.h" |
13 | | #include "nsCOMPtr.h" |
14 | | #include "nsStringStream.h" |
15 | | |
16 | | #include "nsBrowserCompsCID.h" |
17 | | |
18 | | #include "nsICategoryManager.h" |
19 | | #include "nsIServiceManager.h" |
20 | | #include "nsComponentManagerUtils.h" |
21 | | #include "nsServiceManagerUtils.h" |
22 | | |
23 | | #include "nsIStreamConverterService.h" |
24 | | #include "nsIStreamConverter.h" |
25 | | |
26 | | #include "nsIStreamListener.h" |
27 | | |
28 | | #include "nsIHttpChannel.h" |
29 | | #include "nsIMIMEHeaderParam.h" |
30 | | |
31 | | #include "nsMimeTypes.h" |
32 | | #include "nsIURI.h" |
33 | | #include <algorithm> |
34 | | |
35 | 0 | #define TYPE_ATOM "application/atom+xml" |
36 | 0 | #define TYPE_RSS "application/rss+xml" |
37 | 0 | #define TYPE_MAYBE_FEED "application/vnd.mozilla.maybe.feed" |
38 | | |
39 | | #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#" |
40 | | #define NS_RSS "http://purl.org/rss/1.0/" |
41 | | |
42 | 0 | #define MAX_BYTES 512u |
43 | | |
44 | | static bool sFramePrefCached = false; |
45 | | static bool sFramingAllowed = false; |
46 | | |
47 | | using namespace mozilla; |
48 | | |
49 | | NS_IMPL_ISUPPORTS(nsFeedSniffer, |
50 | | nsIContentSniffer, |
51 | | nsIStreamListener, |
52 | | nsIRequestObserver) |
53 | | |
54 | | nsresult |
55 | | nsFeedSniffer::ConvertEncodedData(nsIRequest* request, |
56 | | const uint8_t* data, |
57 | | uint32_t length) |
58 | 0 | { |
59 | 0 | nsresult rv = NS_OK; |
60 | 0 |
|
61 | 0 | mDecodedData = ""; |
62 | 0 | nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(request)); |
63 | 0 | if (!httpChannel) |
64 | 0 | return NS_ERROR_NO_INTERFACE; |
65 | 0 | |
66 | 0 | nsAutoCString contentEncoding; |
67 | 0 | mozilla::Unused << httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"), |
68 | 0 | contentEncoding); |
69 | 0 | if (!contentEncoding.IsEmpty()) { |
70 | 0 | nsCOMPtr<nsIStreamConverterService> converterService(do_GetService(NS_STREAMCONVERTERSERVICE_CONTRACTID)); |
71 | 0 | if (converterService) { |
72 | 0 | ToLowerCase(contentEncoding); |
73 | 0 |
|
74 | 0 | nsCOMPtr<nsIStreamListener> converter; |
75 | 0 | rv = converterService->AsyncConvertData(contentEncoding.get(), |
76 | 0 | "uncompressed", this, nullptr, |
77 | 0 | getter_AddRefs(converter)); |
78 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
79 | 0 |
|
80 | 0 | converter->OnStartRequest(request, nullptr); |
81 | 0 |
|
82 | 0 | nsCOMPtr<nsIStringInputStream> rawStream = |
83 | 0 | do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID); |
84 | 0 | if (!rawStream) |
85 | 0 | return NS_ERROR_FAILURE; |
86 | 0 | |
87 | 0 | rv = rawStream->SetData((const char*)data, length); |
88 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
89 | 0 |
|
90 | 0 | rv = converter->OnDataAvailable(request, nullptr, rawStream, 0, length); |
91 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
92 | 0 |
|
93 | 0 | converter->OnStopRequest(request, nullptr, NS_OK); |
94 | 0 | } |
95 | 0 | } |
96 | 0 | return rv; |
97 | 0 | } |
98 | | |
99 | | template<int N> |
100 | | static bool |
101 | | StringBeginsWithLowercaseLiteral(nsAString& aString, |
102 | | const char (&aSubstring)[N]) |
103 | | { |
104 | | return StringHead(aString, N).LowerCaseEqualsLiteral(aSubstring); |
105 | | } |
106 | | |
107 | | bool |
108 | | HasAttachmentDisposition(nsIHttpChannel* httpChannel) |
109 | 0 | { |
110 | 0 | if (!httpChannel) |
111 | 0 | return false; |
112 | 0 | |
113 | 0 | uint32_t disp; |
114 | 0 | nsresult rv = httpChannel->GetContentDisposition(&disp); |
115 | 0 |
|
116 | 0 | if (NS_SUCCEEDED(rv) && disp == nsIChannel::DISPOSITION_ATTACHMENT) |
117 | 0 | return true; |
118 | 0 | |
119 | 0 | return false; |
120 | 0 | } |
121 | | |
122 | | /** |
123 | | * @return the first occurrence of a character within a string buffer, |
124 | | * or nullptr if not found |
125 | | */ |
126 | | static const char* |
127 | | FindChar(char c, const char *begin, const char *end) |
128 | 0 | { |
129 | 0 | for (; begin < end; ++begin) { |
130 | 0 | if (*begin == c) |
131 | 0 | return begin; |
132 | 0 | } |
133 | 0 | return nullptr; |
134 | 0 | } |
135 | | |
136 | | /** |
137 | | * |
138 | | * Determine if a substring is the "documentElement" in the document. |
139 | | * |
140 | | * All of our sniffed substrings: <rss, <feed, <rdf:RDF must be the "document" |
141 | | * element within the XML DOM, i.e. the root container element. Otherwise, |
142 | | * it's possible that someone embedded one of these tags inside a document of |
143 | | * another type, e.g. a HTML document, and we don't want to show the preview |
144 | | * page if the document isn't actually a feed. |
145 | | * |
146 | | * @param start |
147 | | * The beginning of the data being sniffed |
148 | | * @param end |
149 | | * The end of the data being sniffed, right before the substring that |
150 | | * was found. |
151 | | * @returns true if the found substring is the documentElement, false |
152 | | * otherwise. |
153 | | */ |
154 | | static bool |
155 | | IsDocumentElement(const char *start, const char* end) |
156 | 0 | { |
157 | 0 | // For every tag in the buffer, check to see if it's a PI, Doctype or |
158 | 0 | // comment, our desired substring or something invalid. |
159 | 0 | while ( (start = FindChar('<', start, end)) ) { |
160 | 0 | ++start; |
161 | 0 | if (start >= end) |
162 | 0 | return false; |
163 | 0 | |
164 | 0 | // Check to see if the character following the '<' is either '?' or '!' |
165 | 0 | // (processing instruction or doctype or comment)... these are valid nodes |
166 | 0 | // to have in the prologue. |
167 | 0 | if (*start != '?' && *start != '!') |
168 | 0 | return false; |
169 | 0 | |
170 | 0 | // Now advance the iterator until the '>' (We do this because we don't want |
171 | 0 | // to sniff indicator substrings that are embedded within other nodes, e.g. |
172 | 0 | // comments: <!-- <rdf:RDF .. > --> |
173 | 0 | start = FindChar('>', start, end); |
174 | 0 | if (!start) |
175 | 0 | return false; |
176 | 0 | |
177 | 0 | ++start; |
178 | 0 | } |
179 | 0 | return true; |
180 | 0 | } |
181 | | |
182 | | /** |
183 | | * Determines whether or not a string exists as the root element in an XML data |
184 | | * string buffer. |
185 | | * @param dataString |
186 | | * The data being sniffed |
187 | | * @param substring |
188 | | * The substring being tested for existence and root-ness. |
189 | | * @returns true if the substring exists and is the documentElement, false |
190 | | * otherwise. |
191 | | */ |
192 | | static bool |
193 | | ContainsTopLevelSubstring(nsACString& dataString, const char *substring) |
194 | 0 | { |
195 | 0 | nsACString::const_iterator start, end; |
196 | 0 | dataString.BeginReading(start); |
197 | 0 | dataString.EndReading(end); |
198 | 0 |
|
199 | 0 | if (!FindInReadable(nsCString(substring), start, end)){ |
200 | 0 | return false; |
201 | 0 | } |
202 | 0 | |
203 | 0 | auto offset = start.get() - dataString.Data(); |
204 | 0 |
|
205 | 0 | const char *begin = dataString.BeginReading(); |
206 | 0 |
|
207 | 0 | // Only do the validation when we find the substring. |
208 | 0 | return IsDocumentElement(begin, begin + offset); |
209 | 0 | } |
210 | | |
211 | | NS_IMETHODIMP |
212 | | nsFeedSniffer::GetMIMETypeFromContent(nsIRequest* request, |
213 | | const uint8_t* data, |
214 | | uint32_t length, |
215 | | nsACString& sniffedType) |
216 | 0 | { |
217 | 0 | nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(request)); |
218 | 0 | if (!channel) |
219 | 0 | return NS_ERROR_NO_INTERFACE; |
220 | 0 | |
221 | 0 | // Check that this is a GET request, since you can't subscribe to a POST... |
222 | 0 | nsAutoCString method; |
223 | 0 | mozilla::Unused << channel->GetRequestMethod(method); |
224 | 0 | if (!method.EqualsLiteral("GET")) { |
225 | 0 | sniffedType.Truncate(); |
226 | 0 | return NS_OK; |
227 | 0 | } |
228 | 0 | |
229 | 0 | if (!sFramePrefCached) { |
230 | 0 | sFramePrefCached = true; |
231 | 0 | Preferences::AddBoolVarCache(&sFramingAllowed, |
232 | 0 | "browser.feeds.unsafelyFrameFeeds"); |
233 | 0 | } |
234 | 0 |
|
235 | 0 | if (!sFramingAllowed) { |
236 | 0 | // Check that we're the toplevel frame: |
237 | 0 | nsCOMPtr<nsILoadInfo> loadInfo = channel->GetLoadInfo(); |
238 | 0 | if (!loadInfo) { |
239 | 0 | sniffedType.Truncate(); |
240 | 0 | return NS_OK; |
241 | 0 | } |
242 | 0 | auto frameID = loadInfo->GetFrameOuterWindowID(); |
243 | 0 | if (!frameID) { |
244 | 0 | frameID = loadInfo->GetOuterWindowID(); |
245 | 0 | } |
246 | 0 | if (loadInfo->GetTopOuterWindowID() != frameID) { |
247 | 0 | sniffedType.Truncate(); |
248 | 0 | return NS_OK; |
249 | 0 | } |
250 | 0 | } |
251 | 0 | |
252 | 0 | // We need to find out if this is a load of a view-source document. In this |
253 | 0 | // case we do not want to override the content type, since the source display |
254 | 0 | // does not need to be converted from feed format to XUL. More importantly, |
255 | 0 | // we don't want to change the content type from something |
256 | 0 | // nsContentDLF::CreateInstance knows about (e.g. application/xml, text/html |
257 | 0 | // etc) to something that only the application fe knows about (maybe.feed) |
258 | 0 | // thus deactivating syntax highlighting. |
259 | 0 | nsCOMPtr<nsIURI> originalURI; |
260 | 0 | channel->GetOriginalURI(getter_AddRefs(originalURI)); |
261 | 0 |
|
262 | 0 | nsAutoCString scheme; |
263 | 0 | originalURI->GetScheme(scheme); |
264 | 0 | if (scheme.EqualsLiteral("view-source")) { |
265 | 0 | sniffedType.Truncate(); |
266 | 0 | return NS_OK; |
267 | 0 | } |
268 | 0 | |
269 | 0 | // Check the Content-Type to see if it is set correctly. If it is set to |
270 | 0 | // something specific that we think is a reliable indication of a feed, don't |
271 | 0 | // bother sniffing since we assume the site maintainer knows what they're |
272 | 0 | // doing. |
273 | 0 | nsAutoCString contentType; |
274 | 0 | channel->GetContentType(contentType); |
275 | 0 | bool noSniff = contentType.EqualsLiteral(TYPE_RSS) || |
276 | 0 | contentType.EqualsLiteral(TYPE_ATOM); |
277 | 0 |
|
278 | 0 | if (noSniff) { |
279 | 0 | // check for an attachment after we have a likely feed. |
280 | 0 | if(HasAttachmentDisposition(channel)) { |
281 | 0 | sniffedType.Truncate(); |
282 | 0 | return NS_OK; |
283 | 0 | } |
284 | 0 | |
285 | 0 | // set the feed header as a response header, since we have good metadata |
286 | 0 | // telling us that the feed is supposed to be RSS or Atom |
287 | 0 | mozilla::DebugOnly<nsresult> rv = |
288 | 0 | channel->SetResponseHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"), |
289 | 0 | NS_LITERAL_CSTRING("1"), false); |
290 | 0 | MOZ_ASSERT(NS_SUCCEEDED(rv)); |
291 | 0 | sniffedType.AssignLiteral(TYPE_MAYBE_FEED); |
292 | 0 | return NS_OK; |
293 | 0 | } |
294 | 0 |
|
295 | 0 | // Don't sniff arbitrary types. Limit sniffing to situations that |
296 | 0 | // we think can reasonably arise. |
297 | 0 | if (!contentType.EqualsLiteral(TEXT_HTML) && |
298 | 0 | !contentType.EqualsLiteral(APPLICATION_OCTET_STREAM) && |
299 | 0 | // Same criterion as XMLHttpRequest. Should we be checking for "+xml" |
300 | 0 | // and check for text/xml and application/xml by hand instead? |
301 | 0 | contentType.Find("xml") == -1) { |
302 | 0 | sniffedType.Truncate(); |
303 | 0 | return NS_OK; |
304 | 0 | } |
305 | 0 | |
306 | 0 | // Now we need to potentially decompress data served with |
307 | 0 | // Content-Encoding: gzip |
308 | 0 | nsresult rv = ConvertEncodedData(request, data, length); |
309 | 0 | if (NS_FAILED(rv)) |
310 | 0 | return rv; |
311 | 0 | |
312 | 0 | // We cap the number of bytes to scan at MAX_BYTES to prevent picking up |
313 | 0 | // false positives by accidentally reading document content, e.g. a "how to |
314 | 0 | // make a feed" page. |
315 | 0 | const char* testData; |
316 | 0 | if (mDecodedData.IsEmpty()) { |
317 | 0 | testData = (const char*)data; |
318 | 0 | length = std::min(length, MAX_BYTES); |
319 | 0 | } else { |
320 | 0 | testData = mDecodedData.get(); |
321 | 0 | length = std::min(mDecodedData.Length(), MAX_BYTES); |
322 | 0 | } |
323 | 0 |
|
324 | 0 | // The strategy here is based on that described in: |
325 | 0 | // http://blogs.msdn.com/rssteam/articles/PublishersGuide.aspx |
326 | 0 | // for interoperarbility purposes. |
327 | 0 |
|
328 | 0 | // Thus begins the actual sniffing. |
329 | 0 | nsDependentCSubstring dataString((const char*)testData, length); |
330 | 0 |
|
331 | 0 | bool isFeed = false; |
332 | 0 |
|
333 | 0 | // RSS 0.91/0.92/2.0 |
334 | 0 | isFeed = ContainsTopLevelSubstring(dataString, "<rss"); |
335 | 0 |
|
336 | 0 | // Atom 1.0 |
337 | 0 | if (!isFeed) |
338 | 0 | isFeed = ContainsTopLevelSubstring(dataString, "<feed"); |
339 | 0 |
|
340 | 0 | // RSS 1.0 |
341 | 0 | if (!isFeed) { |
342 | 0 | bool foundNS_RDF = FindInReadable(NS_LITERAL_CSTRING(NS_RDF), dataString); |
343 | 0 | bool foundNS_RSS = FindInReadable(NS_LITERAL_CSTRING(NS_RSS), dataString); |
344 | 0 | isFeed = ContainsTopLevelSubstring(dataString, "<rdf:RDF") && |
345 | 0 | foundNS_RDF && foundNS_RSS; |
346 | 0 | } |
347 | 0 |
|
348 | 0 | // If we sniffed a feed, coerce our internal type |
349 | 0 | if (isFeed && !HasAttachmentDisposition(channel)) |
350 | 0 | sniffedType.AssignLiteral(TYPE_MAYBE_FEED); |
351 | 0 | else |
352 | 0 | sniffedType.Truncate(); |
353 | 0 | return NS_OK; |
354 | 0 | } |
355 | | |
356 | | NS_IMETHODIMP |
357 | | nsFeedSniffer::OnStartRequest(nsIRequest* request, nsISupports* context) |
358 | 0 | { |
359 | 0 | return NS_OK; |
360 | 0 | } |
361 | | |
362 | | nsresult |
363 | | nsFeedSniffer::AppendSegmentToString(nsIInputStream* inputStream, |
364 | | void* closure, |
365 | | const char* rawSegment, |
366 | | uint32_t toOffset, |
367 | | uint32_t count, |
368 | | uint32_t* writeCount) |
369 | 0 | { |
370 | 0 | nsCString* decodedData = static_cast<nsCString*>(closure); |
371 | 0 | decodedData->Append(rawSegment, count); |
372 | 0 | *writeCount = count; |
373 | 0 | return NS_OK; |
374 | 0 | } |
375 | | |
376 | | NS_IMETHODIMP |
377 | | nsFeedSniffer::OnDataAvailable(nsIRequest* request, nsISupports* context, |
378 | | nsIInputStream* stream, uint64_t offset, |
379 | | uint32_t count) |
380 | 0 | { |
381 | 0 | uint32_t read; |
382 | 0 | return stream->ReadSegments(AppendSegmentToString, &mDecodedData, count, |
383 | 0 | &read); |
384 | 0 | } |
385 | | |
386 | | NS_IMETHODIMP |
387 | | nsFeedSniffer::OnStopRequest(nsIRequest* request, nsISupports* context, |
388 | | nsresult status) |
389 | 0 | { |
390 | 0 | return NS_OK; |
391 | 0 | } |