/src/mozilla-central/netwerk/streamconv/converters/nsUnknownDecoder.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ |
2 | | /* This Source Code Form is subject to the terms of the Mozilla Public |
3 | | * License, v. 2.0. If a copy of the MPL was not distributed with this |
4 | | * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ |
5 | | |
6 | | #include "nsUnknownDecoder.h" |
7 | | #include "nsIPipe.h" |
8 | | #include "nsIInputStream.h" |
9 | | #include "nsIOutputStream.h" |
10 | | #include "nsMimeTypes.h" |
11 | | #include "nsIPrefService.h" |
12 | | #include "nsIPrefBranch.h" |
13 | | |
14 | | #include "nsCRT.h" |
15 | | |
16 | | #include "nsIMIMEService.h" |
17 | | |
18 | | #include "nsIDivertableChannel.h" |
19 | | #include "nsIViewSourceChannel.h" |
20 | | #include "nsIHttpChannel.h" |
21 | | #include "nsIForcePendingChannel.h" |
22 | | #include "nsIEncodedChannel.h" |
23 | | #include "nsIURI.h" |
24 | | #include "nsStringStream.h" |
25 | | #include "nsNetCID.h" |
26 | | #include "nsNetUtil.h" |
27 | | |
28 | | #include <algorithm> |
29 | | |
30 | 0 | #define MAX_BUFFER_SIZE 512u |
31 | | |
32 | | NS_IMPL_ISUPPORTS(nsUnknownDecoder::ConvertedStreamListener, |
33 | | nsIStreamListener, |
34 | | nsIRequestObserver) |
35 | | |
36 | | nsUnknownDecoder::ConvertedStreamListener:: |
37 | | ConvertedStreamListener(nsUnknownDecoder *aDecoder) |
38 | 0 | { |
39 | 0 | mDecoder = aDecoder; |
40 | 0 | } |
41 | | |
42 | | nsresult |
43 | | nsUnknownDecoder::ConvertedStreamListener:: |
44 | | AppendDataToString(nsIInputStream* inputStream, |
45 | | void* closure, |
46 | | const char* rawSegment, |
47 | | uint32_t toOffset, |
48 | | uint32_t count, |
49 | | uint32_t* writeCount) |
50 | 0 | { |
51 | 0 | nsCString* decodedData = static_cast<nsCString*>(closure); |
52 | 0 | decodedData->Append(rawSegment, count); |
53 | 0 | *writeCount = count; |
54 | 0 | return NS_OK; |
55 | 0 | } |
56 | | |
57 | | NS_IMETHODIMP |
58 | | nsUnknownDecoder::ConvertedStreamListener::OnStartRequest(nsIRequest* request, |
59 | | nsISupports* context) |
60 | 0 | { |
61 | 0 | return NS_OK; |
62 | 0 | } |
63 | | |
64 | | NS_IMETHODIMP |
65 | | nsUnknownDecoder::ConvertedStreamListener:: |
66 | | OnDataAvailable(nsIRequest* request, |
67 | | nsISupports* context, |
68 | | nsIInputStream* stream, |
69 | | uint64_t offset, |
70 | | uint32_t count) |
71 | 0 | { |
72 | 0 | uint32_t read; |
73 | 0 | nsAutoCString decodedData; |
74 | 0 | { |
75 | 0 | MutexAutoLock lock(mDecoder->mMutex); |
76 | 0 | decodedData = mDecoder->mDecodedData; |
77 | 0 | } |
78 | 0 | nsresult rv = stream->ReadSegments(AppendDataToString, &decodedData, count, |
79 | 0 | &read); |
80 | 0 | if (NS_FAILED(rv)) { |
81 | 0 | return rv; |
82 | 0 | } |
83 | 0 | MutexAutoLock lock(mDecoder->mMutex); |
84 | 0 | mDecoder->mDecodedData = decodedData; |
85 | 0 | return NS_OK; |
86 | 0 | } |
87 | | |
88 | | NS_IMETHODIMP |
89 | | nsUnknownDecoder::ConvertedStreamListener::OnStopRequest(nsIRequest* request, |
90 | | nsISupports* context, |
91 | | nsresult status) |
92 | 0 | { |
93 | 0 | return NS_OK; |
94 | 0 | } |
95 | | |
96 | | nsUnknownDecoder::nsUnknownDecoder() |
97 | | : mBuffer(nullptr) |
98 | | , mBufferLen(0) |
99 | | , mRequireHTMLsuffix(false) |
100 | | , mMutex("nsUnknownDecoder") |
101 | | , mDecodedData("") |
102 | 0 | { |
103 | 0 | nsCOMPtr<nsIPrefBranch> prefs = do_GetService(NS_PREFSERVICE_CONTRACTID); |
104 | 0 | if (prefs) { |
105 | 0 | bool val; |
106 | 0 | if (NS_SUCCEEDED(prefs->GetBoolPref("security.requireHTMLsuffix", &val))) |
107 | 0 | mRequireHTMLsuffix = val; |
108 | 0 | } |
109 | 0 | } |
110 | | |
111 | | nsUnknownDecoder::~nsUnknownDecoder() |
112 | 0 | { |
113 | 0 | if (mBuffer) { |
114 | 0 | delete [] mBuffer; |
115 | 0 | mBuffer = nullptr; |
116 | 0 | } |
117 | 0 | } |
118 | | |
119 | | // ---- |
120 | | // |
121 | | // nsISupports implementation... |
122 | | // |
123 | | // ---- |
124 | | |
125 | | NS_IMPL_ADDREF(nsUnknownDecoder) |
126 | | NS_IMPL_RELEASE(nsUnknownDecoder) |
127 | | |
128 | 0 | NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder) |
129 | 0 | NS_INTERFACE_MAP_ENTRY(nsIStreamConverter) |
130 | 0 | NS_INTERFACE_MAP_ENTRY(nsIStreamListener) |
131 | 0 | NS_INTERFACE_MAP_ENTRY(nsIRequestObserver) |
132 | 0 | NS_INTERFACE_MAP_ENTRY(nsIContentSniffer) |
133 | 0 | NS_INTERFACE_MAP_ENTRY(nsIThreadRetargetableStreamListener) |
134 | 0 | NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIStreamListener) |
135 | 0 | NS_INTERFACE_MAP_END |
136 | | |
137 | | |
138 | | // ---- |
139 | | // |
140 | | // nsIStreamConverter methods... |
141 | | // |
142 | | // ---- |
143 | | |
144 | | NS_IMETHODIMP |
145 | | nsUnknownDecoder::Convert(nsIInputStream *aFromStream, |
146 | | const char *aFromType, |
147 | | const char *aToType, |
148 | | nsISupports *aCtxt, |
149 | | nsIInputStream **aResultStream) |
150 | 0 | { |
151 | 0 | return NS_ERROR_NOT_IMPLEMENTED; |
152 | 0 | } |
153 | | |
154 | | NS_IMETHODIMP |
155 | | nsUnknownDecoder::AsyncConvertData(const char *aFromType, |
156 | | const char *aToType, |
157 | | nsIStreamListener *aListener, |
158 | | nsISupports *aCtxt) |
159 | 0 | { |
160 | 0 | NS_ASSERTION(aListener && aFromType && aToType, |
161 | 0 | "null pointer passed into multi mixed converter"); |
162 | 0 | // hook up our final listener. this guy gets the various On*() calls we want to throw |
163 | 0 | // at him. |
164 | 0 | // |
165 | 0 |
|
166 | 0 | MutexAutoLock lock(mMutex); |
167 | 0 | mNextListener = aListener; |
168 | 0 | return (aListener) ? NS_OK : NS_ERROR_FAILURE; |
169 | 0 | } |
170 | | |
171 | | // ---- |
172 | | // |
173 | | // nsIStreamListener methods... |
174 | | // |
175 | | // ---- |
176 | | |
177 | | NS_IMETHODIMP |
178 | | nsUnknownDecoder::OnDataAvailable(nsIRequest* request, |
179 | | nsISupports *aCtxt, |
180 | | nsIInputStream *aStream, |
181 | | uint64_t aSourceOffset, |
182 | | uint32_t aCount) |
183 | 0 | { |
184 | 0 | nsresult rv = NS_OK; |
185 | 0 |
|
186 | 0 | bool contentTypeEmpty; |
187 | 0 | { |
188 | 0 | MutexAutoLock lock(mMutex); |
189 | 0 | if (!mNextListener) return NS_ERROR_FAILURE; |
190 | 0 | |
191 | 0 | contentTypeEmpty = mContentType.IsEmpty(); |
192 | 0 | } |
193 | 0 |
|
194 | 0 | if (contentTypeEmpty) { |
195 | 0 | uint32_t count, len; |
196 | 0 |
|
197 | 0 | // If the buffer has not been allocated by now, just fail... |
198 | 0 | if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; |
199 | 0 | |
200 | 0 | // |
201 | 0 | // Determine how much of the stream should be read to fill up the |
202 | 0 | // sniffer buffer... |
203 | 0 | // |
204 | 0 | if (mBufferLen + aCount >= MAX_BUFFER_SIZE) { |
205 | 0 | count = MAX_BUFFER_SIZE - mBufferLen; |
206 | 0 | } else { |
207 | 0 | count = aCount; |
208 | 0 | } |
209 | 0 |
|
210 | 0 | // Read the data into the buffer... |
211 | 0 | rv = aStream->Read((mBuffer+mBufferLen), count, &len); |
212 | 0 | if (NS_FAILED(rv)) return rv; |
213 | 0 | |
214 | 0 | mBufferLen += len; |
215 | 0 | aCount -= len; |
216 | 0 |
|
217 | 0 | if (aCount) { |
218 | 0 | // |
219 | 0 | // Adjust the source offset... The call to FireListenerNotifications(...) |
220 | 0 | // will make the first OnDataAvailable(...) call with an offset of 0. |
221 | 0 | // So, this offset needs to be adjusted to reflect that... |
222 | 0 | // |
223 | 0 | aSourceOffset += mBufferLen; |
224 | 0 |
|
225 | 0 | DetermineContentType(request); |
226 | 0 |
|
227 | 0 | rv = FireListenerNotifications(request, aCtxt); |
228 | 0 | } |
229 | 0 | } |
230 | 0 |
|
231 | 0 | // Must not fire ODA again if it failed once |
232 | 0 | if (aCount && NS_SUCCEEDED(rv)) { |
233 | | #ifdef DEBUG |
234 | | { |
235 | | MutexAutoLock lock(mMutex); |
236 | | NS_ASSERTION(!mContentType.IsEmpty(), |
237 | | "Content type should be known by now."); |
238 | | } |
239 | | #endif |
240 | |
|
241 | 0 | nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request); |
242 | 0 | if (divertable) { |
243 | 0 | bool diverting; |
244 | 0 | divertable->GetDivertingToParent(&diverting); |
245 | 0 | if (diverting) { |
246 | 0 | // The channel is diverted to the parent do not send any more data here. |
247 | 0 | return rv; |
248 | 0 | } |
249 | 0 | } |
250 | 0 | |
251 | 0 | nsCOMPtr<nsIStreamListener> listener; |
252 | 0 | { |
253 | 0 | MutexAutoLock lock(mMutex); |
254 | 0 | listener = mNextListener; |
255 | 0 | } |
256 | 0 | rv = listener->OnDataAvailable(request, aCtxt, aStream, |
257 | 0 | aSourceOffset, aCount); |
258 | 0 | } |
259 | 0 |
|
260 | 0 | return rv; |
261 | 0 | } |
262 | | |
263 | | // ---- |
264 | | // |
265 | | // nsIRequestObserver methods... |
266 | | // |
267 | | // ---- |
268 | | |
269 | | NS_IMETHODIMP |
270 | | nsUnknownDecoder::OnStartRequest(nsIRequest* request, nsISupports *aCtxt) |
271 | 0 | { |
272 | 0 | nsresult rv = NS_OK; |
273 | 0 |
|
274 | 0 | { |
275 | 0 | MutexAutoLock lock(mMutex); |
276 | 0 | if (!mNextListener) return NS_ERROR_FAILURE; |
277 | 0 | } |
278 | 0 | |
279 | 0 | // Allocate the sniffer buffer... |
280 | 0 | if (NS_SUCCEEDED(rv) && !mBuffer) { |
281 | 0 | mBuffer = new char[MAX_BUFFER_SIZE]; |
282 | 0 |
|
283 | 0 | if (!mBuffer) { |
284 | 0 | rv = NS_ERROR_OUT_OF_MEMORY; |
285 | 0 | } |
286 | 0 | } |
287 | 0 |
|
288 | 0 | nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request); |
289 | 0 | if (divertable) { |
290 | 0 | divertable->UnknownDecoderInvolvedKeepData(); |
291 | 0 | } |
292 | 0 |
|
293 | 0 | // Do not pass the OnStartRequest on to the next listener (yet)... |
294 | 0 | return rv; |
295 | 0 | } |
296 | | |
297 | | NS_IMETHODIMP |
298 | | nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsISupports *aCtxt, |
299 | | nsresult aStatus) |
300 | 0 | { |
301 | 0 | nsresult rv = NS_OK; |
302 | 0 |
|
303 | 0 | bool contentTypeEmpty; |
304 | 0 | { |
305 | 0 | MutexAutoLock lock(mMutex); |
306 | 0 | if (!mNextListener) return NS_ERROR_FAILURE; |
307 | 0 | |
308 | 0 | contentTypeEmpty = mContentType.IsEmpty(); |
309 | 0 | } |
310 | 0 |
|
311 | 0 | // |
312 | 0 | // The total amount of data is less than the size of the sniffer buffer. |
313 | 0 | // Analyze the buffer now... |
314 | 0 | // |
315 | 0 | if (contentTypeEmpty) { |
316 | 0 | DetermineContentType(request); |
317 | 0 |
|
318 | 0 | // Make sure channel listeners see channel as pending while we call |
319 | 0 | // OnStartRequest/OnDataAvailable, even though the underlying channel |
320 | 0 | // has already hit OnStopRequest. |
321 | 0 | nsCOMPtr<nsIForcePendingChannel> forcePendingChannel = do_QueryInterface(request); |
322 | 0 | if (forcePendingChannel) { |
323 | 0 | forcePendingChannel->ForcePending(true); |
324 | 0 | } |
325 | 0 |
|
326 | 0 | rv = FireListenerNotifications(request, aCtxt); |
327 | 0 |
|
328 | 0 | if (NS_FAILED(rv)) { |
329 | 0 | aStatus = rv; |
330 | 0 | } |
331 | 0 |
|
332 | 0 | // now we need to set pending state to false before calling OnStopRequest |
333 | 0 | if (forcePendingChannel) { |
334 | 0 | forcePendingChannel->ForcePending(false); |
335 | 0 | } |
336 | 0 | } |
337 | 0 |
|
338 | 0 | nsCOMPtr<nsIStreamListener> listener; |
339 | 0 | { |
340 | 0 | MutexAutoLock lock(mMutex); |
341 | 0 | listener = mNextListener; |
342 | 0 | mNextListener = nullptr; |
343 | 0 | } |
344 | 0 | rv = listener->OnStopRequest(request, aCtxt, aStatus); |
345 | 0 |
|
346 | 0 | return rv; |
347 | 0 | } |
348 | | |
349 | | // ---- |
350 | | // |
351 | | // nsIContentSniffer methods... |
352 | | // |
353 | | // ---- |
354 | | NS_IMETHODIMP |
355 | | nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest, |
356 | | const uint8_t* aData, |
357 | | uint32_t aLength, |
358 | | nsACString& type) |
359 | 0 | { |
360 | 0 | // This is only used by sniffer, therefore we do not need to lock anything |
361 | 0 | // here. |
362 | 0 |
|
363 | 0 | mBuffer = const_cast<char*>(reinterpret_cast<const char*>(aData)); |
364 | 0 | mBufferLen = aLength; |
365 | 0 | DetermineContentType(aRequest); |
366 | 0 | mBuffer = nullptr; |
367 | 0 | mBufferLen = 0; |
368 | 0 | type.Assign(mContentType); |
369 | 0 | mContentType.Truncate(); |
370 | 0 | return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK; |
371 | 0 | } |
372 | | |
373 | | |
374 | | // Actual sniffing code |
375 | | |
376 | | bool nsUnknownDecoder::AllowSniffing(nsIRequest* aRequest) |
377 | 0 | { |
378 | 0 | if (!mRequireHTMLsuffix) { |
379 | 0 | return true; |
380 | 0 | } |
381 | 0 | |
382 | 0 | nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); |
383 | 0 | if (!channel) { |
384 | 0 | NS_ERROR("QI failed"); |
385 | 0 | return false; |
386 | 0 | } |
387 | 0 |
|
388 | 0 | nsCOMPtr<nsIURI> uri; |
389 | 0 | if (NS_FAILED(channel->GetURI(getter_AddRefs(uri))) || !uri) { |
390 | 0 | return false; |
391 | 0 | } |
392 | 0 | |
393 | 0 | bool isLocalFile = false; |
394 | 0 | if (NS_FAILED(uri->SchemeIs("file", &isLocalFile)) || isLocalFile) { |
395 | 0 | return false; |
396 | 0 | } |
397 | 0 | |
398 | 0 | return true; |
399 | 0 | } |
400 | | |
401 | | /** |
402 | | * This is the array of sniffer entries that depend on "magic numbers" |
403 | | * in the file. Each entry has either a type associated with it (set |
404 | | * these with the SNIFFER_ENTRY macro) or a function to be executed |
405 | | * (set these with the SNIFFER_ENTRY_WITH_FUNC macro). The function |
406 | | * should take a single nsIRequest* and returns bool -- true if |
407 | | * it sets mContentType, false otherwise |
408 | | */ |
409 | | nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = { |
410 | | SNIFFER_ENTRY("%PDF-", APPLICATION_PDF), |
411 | | |
412 | | SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT), |
413 | | |
414 | | // Files that start with mailbox delimiters let's provisionally call |
415 | | // text/plain |
416 | | SNIFFER_ENTRY("From", TEXT_PLAIN), |
417 | | SNIFFER_ENTRY(">From", TEXT_PLAIN), |
418 | | |
419 | | // If the buffer begins with "#!" or "%!" then it is a script of |
420 | | // some sort... "Scripts" can include arbitrary data to be passed |
421 | | // to an interpreter, so we need to decide whether we can call this |
422 | | // text or whether it's data. |
423 | | SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff), |
424 | | |
425 | | // XXXbz should (and can) we also include the various ways that <?xml can |
426 | | // appear as UTF-16 and such? See http://www.w3.org/TR/REC-xml#sec-guessing |
427 | | SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML) |
428 | | }; |
429 | | |
430 | | uint32_t nsUnknownDecoder::sSnifferEntryNum = |
431 | | sizeof(nsUnknownDecoder::sSnifferEntries) / |
432 | | sizeof(nsUnknownDecoder::nsSnifferEntry); |
433 | | |
434 | | void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest) |
435 | 0 | { |
436 | 0 | { |
437 | 0 | MutexAutoLock lock(mMutex); |
438 | 0 | NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known."); |
439 | 0 | if (!mContentType.IsEmpty()) return; |
440 | 0 | } |
441 | 0 | |
442 | 0 | const char* testData = mBuffer; |
443 | 0 | uint32_t testDataLen = mBufferLen; |
444 | 0 | // Check if data are compressed. |
445 | 0 | nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(aRequest)); |
446 | 0 | nsAutoCString decodedData; |
447 | 0 |
|
448 | 0 | if (channel) { |
449 | 0 | // ConvertEncodedData is always called only on a single thread for each |
450 | 0 | // instance of an object. |
451 | 0 | nsresult rv = ConvertEncodedData(aRequest, mBuffer, mBufferLen); |
452 | 0 | if (NS_SUCCEEDED(rv)) { |
453 | 0 | MutexAutoLock lock(mMutex); |
454 | 0 | decodedData = mDecodedData; |
455 | 0 | } |
456 | 0 | if (!decodedData.IsEmpty()) { |
457 | 0 | testData = decodedData.get(); |
458 | 0 | testDataLen = std::min(decodedData.Length(), MAX_BUFFER_SIZE); |
459 | 0 | } |
460 | 0 | } |
461 | 0 |
|
462 | 0 | // First, run through all the types we can detect reliably based on |
463 | 0 | // magic numbers |
464 | 0 | uint32_t i; |
465 | 0 | for (i = 0; i < sSnifferEntryNum; ++i) { |
466 | 0 | if (testDataLen >= sSnifferEntries[i].mByteLen && // enough data |
467 | 0 | memcmp(testData, sSnifferEntries[i].mBytes, sSnifferEntries[i].mByteLen) == 0) { // and type matches |
468 | 0 | NS_ASSERTION(sSnifferEntries[i].mMimeType || |
469 | 0 | sSnifferEntries[i].mContentTypeSniffer, |
470 | 0 | "Must have either a type string or a function to set the type"); |
471 | 0 | NS_ASSERTION(!sSnifferEntries[i].mMimeType || |
472 | 0 | !sSnifferEntries[i].mContentTypeSniffer, |
473 | 0 | "Both a type string and a type sniffing function set;" |
474 | 0 | " using type string"); |
475 | 0 | if (sSnifferEntries[i].mMimeType) { |
476 | 0 | MutexAutoLock lock(mMutex); |
477 | 0 | mContentType = sSnifferEntries[i].mMimeType; |
478 | 0 | NS_ASSERTION(!mContentType.IsEmpty(), |
479 | 0 | "Content type should be known by now."); |
480 | 0 | return; |
481 | 0 | } |
482 | 0 | if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) { |
483 | | #ifdef DEBUG |
484 | | MutexAutoLock lock(mMutex); |
485 | | NS_ASSERTION(!mContentType.IsEmpty(), |
486 | | "Content type should be known by now."); |
487 | | #endif |
488 | | return; |
489 | 0 | } |
490 | 0 | } |
491 | 0 | } |
492 | 0 |
|
493 | 0 | nsAutoCString sniffedType; |
494 | 0 | NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest, |
495 | 0 | (const uint8_t*)testData, testDataLen, sniffedType); |
496 | 0 | { |
497 | 0 | MutexAutoLock lock(mMutex); |
498 | 0 | mContentType = sniffedType; |
499 | 0 | if (!mContentType.IsEmpty()) { |
500 | 0 | return; |
501 | 0 | } |
502 | 0 | } |
503 | 0 | |
504 | 0 | if (SniffForHTML(aRequest)) { |
505 | | #ifdef DEBUG |
506 | | MutexAutoLock lock(mMutex); |
507 | | NS_ASSERTION(!mContentType.IsEmpty(), |
508 | | "Content type should be known by now."); |
509 | | #endif |
510 | | return; |
511 | 0 | } |
512 | 0 |
|
513 | 0 | // We don't know what this is yet. Before we just give up, try |
514 | 0 | // the URI from the request. |
515 | 0 | if (SniffURI(aRequest)) { |
516 | | #ifdef DEBUG |
517 | | MutexAutoLock lock(mMutex); |
518 | | NS_ASSERTION(!mContentType.IsEmpty(), |
519 | | "Content type should be known by now."); |
520 | | #endif |
521 | | return; |
522 | 0 | } |
523 | 0 |
|
524 | 0 | LastDitchSniff(aRequest); |
525 | | #ifdef DEBUG |
526 | | MutexAutoLock lock(mMutex); |
527 | | NS_ASSERTION(!mContentType.IsEmpty(), |
528 | | "Content type should be known by now."); |
529 | | #endif |
530 | | } |
531 | | |
532 | | bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest) |
533 | 0 | { |
534 | 0 | /* |
535 | 0 | * To prevent a possible attack, we will not consider this to be |
536 | 0 | * html content if it comes from the local file system and our prefs |
537 | 0 | * are set right |
538 | 0 | */ |
539 | 0 | if (!AllowSniffing(aRequest)) { |
540 | 0 | return false; |
541 | 0 | } |
542 | 0 | |
543 | 0 | MutexAutoLock lock(mMutex); |
544 | 0 |
|
545 | 0 | // Now look for HTML. |
546 | 0 | const char* str; |
547 | 0 | const char* end; |
548 | 0 | if (mDecodedData.IsEmpty()) { |
549 | 0 | str = mBuffer; |
550 | 0 | end = mBuffer + mBufferLen; |
551 | 0 | } else { |
552 | 0 | str = mDecodedData.get(); |
553 | 0 | end = mDecodedData.get() + std::min(mDecodedData.Length(), |
554 | 0 | MAX_BUFFER_SIZE); |
555 | 0 | } |
556 | 0 |
|
557 | 0 | // skip leading whitespace |
558 | 0 | while (str != end && nsCRT::IsAsciiSpace(*str)) { |
559 | 0 | ++str; |
560 | 0 | } |
561 | 0 |
|
562 | 0 | // did we find something like a start tag? |
563 | 0 | if (str == end || *str != '<' || ++str == end) { |
564 | 0 | return false; |
565 | 0 | } |
566 | 0 | |
567 | 0 | // If we seem to be SGML or XML and we got down here, just pretend we're HTML |
568 | 0 | if (*str == '!' || *str == '?') { |
569 | 0 | mContentType = TEXT_HTML; |
570 | 0 | return true; |
571 | 0 | } |
572 | 0 |
|
573 | 0 | uint32_t bufSize = end - str; |
574 | 0 | // We use sizeof(_tagstr) below because that's the length of _tagstr |
575 | 0 | // with the one char " " or ">" appended. |
576 | 0 | #define MATCHES_TAG(_tagstr) \ |
577 | 0 | (bufSize >= sizeof(_tagstr) && \ |
578 | 0 | (PL_strncasecmp(str, _tagstr " ", sizeof(_tagstr)) == 0 || \ |
579 | 0 | PL_strncasecmp(str, _tagstr ">", sizeof(_tagstr)) == 0)) |
580 | 0 |
|
581 | 0 | if (MATCHES_TAG("html") || |
582 | 0 | MATCHES_TAG("frameset") || |
583 | 0 | MATCHES_TAG("body") || |
584 | 0 | MATCHES_TAG("head") || |
585 | 0 | MATCHES_TAG("script") || |
586 | 0 | MATCHES_TAG("iframe") || |
587 | 0 | MATCHES_TAG("a") || |
588 | 0 | MATCHES_TAG("img") || |
589 | 0 | MATCHES_TAG("table") || |
590 | 0 | MATCHES_TAG("title") || |
591 | 0 | MATCHES_TAG("link") || |
592 | 0 | MATCHES_TAG("base") || |
593 | 0 | MATCHES_TAG("style") || |
594 | 0 | MATCHES_TAG("div") || |
595 | 0 | MATCHES_TAG("p") || |
596 | 0 | MATCHES_TAG("font") || |
597 | 0 | MATCHES_TAG("applet") || |
598 | 0 | MATCHES_TAG("meta") || |
599 | 0 | MATCHES_TAG("center") || |
600 | 0 | MATCHES_TAG("form") || |
601 | 0 | MATCHES_TAG("isindex") || |
602 | 0 | MATCHES_TAG("h1") || |
603 | 0 | MATCHES_TAG("h2") || |
604 | 0 | MATCHES_TAG("h3") || |
605 | 0 | MATCHES_TAG("h4") || |
606 | 0 | MATCHES_TAG("h5") || |
607 | 0 | MATCHES_TAG("h6") || |
608 | 0 | MATCHES_TAG("b") || |
609 | 0 | MATCHES_TAG("pre")) { |
610 | 0 |
|
611 | 0 | mContentType = TEXT_HTML; |
612 | 0 | return true; |
613 | 0 | } |
614 | 0 |
|
615 | 0 | #undef MATCHES_TAG |
616 | 0 |
|
617 | 0 | return false; |
618 | 0 | } |
619 | | |
620 | | bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest) |
621 | 0 | { |
622 | 0 | // Just like HTML, this should be able to be shut off. |
623 | 0 | if (!AllowSniffing(aRequest)) { |
624 | 0 | return false; |
625 | 0 | } |
626 | 0 | |
627 | 0 | // First see whether we can glean anything from the uri... |
628 | 0 | if (!SniffURI(aRequest)) { |
629 | 0 | // Oh well; just generic XML will have to do |
630 | 0 | MutexAutoLock lock(mMutex); |
631 | 0 | mContentType = TEXT_XML; |
632 | 0 | } |
633 | 0 |
|
634 | 0 | return true; |
635 | 0 | } |
636 | | |
637 | | bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest) |
638 | 0 | { |
639 | 0 | nsCOMPtr<nsIMIMEService> mimeService(do_GetService("@mozilla.org/mime;1")); |
640 | 0 | if (mimeService) { |
641 | 0 | nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest); |
642 | 0 | if (channel) { |
643 | 0 | nsCOMPtr<nsIURI> uri; |
644 | 0 | nsresult result = channel->GetURI(getter_AddRefs(uri)); |
645 | 0 | if (NS_SUCCEEDED(result) && uri) { |
646 | 0 | nsAutoCString type; |
647 | 0 | result = mimeService->GetTypeFromURI(uri, type); |
648 | 0 | if (NS_SUCCEEDED(result)) { |
649 | 0 | MutexAutoLock lock(mMutex); |
650 | 0 | mContentType = type; |
651 | 0 | return true; |
652 | 0 | } |
653 | 0 | } |
654 | 0 | } |
655 | 0 | } |
656 | 0 | |
657 | 0 | return false; |
658 | 0 | } |
659 | | |
660 | | // This macro is based on RFC 2046 Section 4.1.2. Treat any char 0-31 |
661 | | // except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by |
662 | | // encodings like Shift_JIS) as non-text |
663 | | #define IS_TEXT_CHAR(ch) \ |
664 | 0 | (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27) |
665 | | |
666 | | bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest) |
667 | 0 | { |
668 | 0 | // All we can do now is try to guess whether this is text/plain or |
669 | 0 | // application/octet-stream |
670 | 0 |
|
671 | 0 | MutexAutoLock lock(mMutex); |
672 | 0 |
|
673 | 0 | const char* testData; |
674 | 0 | uint32_t testDataLen; |
675 | 0 | if (mDecodedData.IsEmpty()) { |
676 | 0 | testData = mBuffer; |
677 | 0 | testDataLen = mBufferLen; |
678 | 0 | } else { |
679 | 0 | testData = mDecodedData.get(); |
680 | 0 | testDataLen = std::min(mDecodedData.Length(), MAX_BUFFER_SIZE); |
681 | 0 | } |
682 | 0 |
|
683 | 0 | // First, check for a BOM. If we see one, assume this is text/plain |
684 | 0 | // in whatever encoding. If there is a BOM _and_ text we will |
685 | 0 | // always have at least 4 bytes in the buffer (since the 2-byte BOMs |
686 | 0 | // are for 2-byte encodings and the UTF-8 BOM is 3 bytes). |
687 | 0 | if (testDataLen >= 4) { |
688 | 0 | const unsigned char* buf = (const unsigned char*)testData; |
689 | 0 | if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian |
690 | 0 | (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian |
691 | 0 | (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8 |
692 | 0 | (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)) { // UCS-4, Big Endian |
693 | 0 |
|
694 | 0 | mContentType = TEXT_PLAIN; |
695 | 0 | return true; |
696 | 0 | } |
697 | 0 | } |
698 | 0 |
|
699 | 0 | // Now see whether the buffer has any non-text chars. If not, then let's |
700 | 0 | // just call it text/plain... |
701 | 0 | // |
702 | 0 | uint32_t i; |
703 | 0 | for (i = 0; i < testDataLen && IS_TEXT_CHAR(testData[i]); i++) { |
704 | 0 | } |
705 | 0 |
|
706 | 0 | if (i == testDataLen) { |
707 | 0 | mContentType = TEXT_PLAIN; |
708 | 0 | } |
709 | 0 | else { |
710 | 0 | mContentType = APPLICATION_OCTET_STREAM; |
711 | 0 | } |
712 | 0 |
|
713 | 0 | return true; |
714 | 0 | } |
715 | | |
716 | | |
717 | | nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request, |
718 | | nsISupports *aCtxt) |
719 | 0 | { |
720 | 0 | nsresult rv = NS_OK; |
721 | 0 |
|
722 | 0 | nsCOMPtr<nsIStreamListener> listener; |
723 | 0 | nsAutoCString contentType; |
724 | 0 | { |
725 | 0 | MutexAutoLock lock(mMutex); |
726 | 0 | if (!mNextListener) return NS_ERROR_FAILURE; |
727 | 0 | |
728 | 0 | listener = mNextListener; |
729 | 0 | contentType = mContentType; |
730 | 0 | } |
731 | 0 |
|
732 | 0 | if (!contentType.IsEmpty()) { |
733 | 0 | nsCOMPtr<nsIViewSourceChannel> viewSourceChannel = |
734 | 0 | do_QueryInterface(request); |
735 | 0 | if (viewSourceChannel) { |
736 | 0 | rv = viewSourceChannel->SetOriginalContentType(contentType); |
737 | 0 | } else { |
738 | 0 | nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv); |
739 | 0 | if (NS_SUCCEEDED(rv)) { |
740 | 0 | // Set the new content type on the channel... |
741 | 0 | rv = channel->SetContentType(contentType); |
742 | 0 | } |
743 | 0 | } |
744 | 0 |
|
745 | 0 | NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!"); |
746 | 0 |
|
747 | 0 | if (NS_FAILED(rv)) { |
748 | 0 | // Cancel the request to make sure it has the correct status if |
749 | 0 | // mNextListener looks at it. |
750 | 0 | request->Cancel(rv); |
751 | 0 | listener->OnStartRequest(request, aCtxt); |
752 | 0 |
|
753 | 0 | nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request); |
754 | 0 | if (divertable) { |
755 | 0 | rv = divertable->UnknownDecoderInvolvedOnStartRequestCalled(); |
756 | 0 | } |
757 | 0 |
|
758 | 0 | return rv; |
759 | 0 | } |
760 | 0 | } |
761 | 0 |
|
762 | 0 | // Fire the OnStartRequest(...) |
763 | 0 | rv = listener->OnStartRequest(request, aCtxt); |
764 | 0 |
|
765 | 0 | nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request); |
766 | 0 | if (divertable) { |
767 | 0 | rv = divertable->UnknownDecoderInvolvedOnStartRequestCalled(); |
768 | 0 | bool diverting; |
769 | 0 | divertable->GetDivertingToParent(&diverting); |
770 | 0 | if (diverting) { |
771 | 0 | // The channel is diverted to the parent do not send any more data here. |
772 | 0 | return rv; |
773 | 0 | } |
774 | 0 | } |
775 | 0 | |
776 | 0 | if (NS_SUCCEEDED(rv)) { |
777 | 0 | // install stream converter if required |
778 | 0 | nsCOMPtr<nsIEncodedChannel> encodedChannel = do_QueryInterface(request); |
779 | 0 | if (encodedChannel) { |
780 | 0 | nsCOMPtr<nsIStreamListener> listenerNew; |
781 | 0 | rv = encodedChannel->DoApplyContentConversions(listener, getter_AddRefs(listenerNew), aCtxt); |
782 | 0 | if (NS_SUCCEEDED(rv) && listenerNew) { |
783 | 0 | MutexAutoLock lock(mMutex); |
784 | 0 | mNextListener = listenerNew; |
785 | 0 | listener = listenerNew; |
786 | 0 | } |
787 | 0 | } |
788 | 0 | } |
789 | 0 |
|
790 | 0 | if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY; |
791 | 0 | |
792 | 0 | // If the request was canceled, then we need to treat that equivalently |
793 | 0 | // to an error returned by OnStartRequest. |
794 | 0 | if (NS_SUCCEEDED(rv)) |
795 | 0 | request->GetStatus(&rv); |
796 | 0 |
|
797 | 0 | // Fire the first OnDataAvailable for the data that was read from the |
798 | 0 | // stream into the sniffer buffer... |
799 | 0 | if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) { |
800 | 0 | uint32_t len = 0; |
801 | 0 | nsCOMPtr<nsIInputStream> in; |
802 | 0 | nsCOMPtr<nsIOutputStream> out; |
803 | 0 |
|
804 | 0 | // Create a pipe and fill it with the data from the sniffer buffer. |
805 | 0 | rv = NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out), |
806 | 0 | MAX_BUFFER_SIZE, MAX_BUFFER_SIZE); |
807 | 0 |
|
808 | 0 | if (NS_SUCCEEDED(rv)) { |
809 | 0 | rv = out->Write(mBuffer, mBufferLen, &len); |
810 | 0 | if (NS_SUCCEEDED(rv)) { |
811 | 0 | if (len == mBufferLen) { |
812 | 0 | rv = listener->OnDataAvailable(request, aCtxt, in, 0, len); |
813 | 0 | } else { |
814 | 0 | NS_ERROR("Unable to write all the data into the pipe."); |
815 | 0 | rv = NS_ERROR_FAILURE; |
816 | 0 | } |
817 | 0 | } |
818 | 0 | } |
819 | 0 | } |
820 | 0 |
|
821 | 0 | delete [] mBuffer; |
822 | 0 | mBuffer = nullptr; |
823 | 0 | mBufferLen = 0; |
824 | 0 |
|
825 | 0 | return rv; |
826 | 0 | } |
827 | | |
828 | | |
829 | | nsresult |
830 | | nsUnknownDecoder::ConvertEncodedData(nsIRequest* request, |
831 | | const char* data, |
832 | | uint32_t length) |
833 | 0 | { |
834 | 0 | nsresult rv = NS_OK; |
835 | 0 |
|
836 | 0 | { |
837 | 0 | MutexAutoLock lock(mMutex); |
838 | 0 | mDecodedData = ""; |
839 | 0 | } |
840 | 0 | nsCOMPtr<nsIEncodedChannel> encodedChannel(do_QueryInterface(request)); |
841 | 0 | if (encodedChannel) { |
842 | 0 |
|
843 | 0 | RefPtr<ConvertedStreamListener> strListener = |
844 | 0 | new ConvertedStreamListener(this); |
845 | 0 |
|
846 | 0 | nsCOMPtr<nsIStreamListener> listener; |
847 | 0 | rv = encodedChannel->DoApplyContentConversions(strListener, |
848 | 0 | getter_AddRefs(listener), |
849 | 0 | nullptr); |
850 | 0 |
|
851 | 0 | if (NS_FAILED(rv)) { |
852 | 0 | return rv; |
853 | 0 | } |
854 | 0 | |
855 | 0 | if (listener) { |
856 | 0 | listener->OnStartRequest(request, nullptr); |
857 | 0 |
|
858 | 0 | if (length) { |
859 | 0 | nsCOMPtr<nsIStringInputStream> rawStream = |
860 | 0 | do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID); |
861 | 0 | if (!rawStream) |
862 | 0 | return NS_ERROR_FAILURE; |
863 | 0 | |
864 | 0 | rv = rawStream->SetData((const char*)data, length); |
865 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
866 | 0 |
|
867 | 0 | rv = listener->OnDataAvailable(request, nullptr, rawStream, 0, |
868 | 0 | length); |
869 | 0 | NS_ENSURE_SUCCESS(rv, rv); |
870 | 0 | } |
871 | 0 |
|
872 | 0 | listener->OnStopRequest(request, nullptr, NS_OK); |
873 | 0 | } |
874 | 0 | } |
875 | 0 | return rv; |
876 | 0 | } |
877 | | |
878 | | // |
879 | | // nsIThreadRetargetableStreamListener methods |
880 | | // |
881 | | NS_IMETHODIMP |
882 | | nsUnknownDecoder::CheckListenerChain() |
883 | 0 | { |
884 | 0 | nsCOMPtr<nsIThreadRetargetableStreamListener> listener; |
885 | 0 | { |
886 | 0 | MutexAutoLock lock(mMutex); |
887 | 0 | listener = do_QueryInterface(mNextListener); |
888 | 0 | } |
889 | 0 | if (!listener) { |
890 | 0 | return NS_ERROR_NO_INTERFACE; |
891 | 0 | } |
892 | 0 | |
893 | 0 | return listener->CheckListenerChain(); |
894 | 0 | } |
895 | | |
896 | | void |
897 | | nsBinaryDetector::DetermineContentType(nsIRequest* aRequest) |
898 | 0 | { |
899 | 0 | nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest); |
900 | 0 | if (!httpChannel) { |
901 | 0 | return; |
902 | 0 | } |
903 | 0 | |
904 | 0 | // It's an HTTP channel. Check for the text/plain mess |
905 | 0 | nsAutoCString contentTypeHdr; |
906 | 0 | Unused << httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Type"), |
907 | 0 | contentTypeHdr); |
908 | 0 | nsAutoCString contentType; |
909 | 0 | httpChannel->GetContentType(contentType); |
910 | 0 |
|
911 | 0 | // Make sure to do a case-sensitive exact match comparison here. Apache |
912 | 0 | // 1.x just sends text/plain for "unknown", while Apache 2.x sends |
913 | 0 | // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to |
914 | 0 | // be different, sends text/plain with iso-8859-1 charset. For extra fun, |
915 | 0 | // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general |
916 | 0 | // case-insensitive comparison, since we really want to apply this crap as |
917 | 0 | // rarely as we can. |
918 | 0 | if (!contentType.EqualsLiteral("text/plain") || |
919 | 0 | (!contentTypeHdr.EqualsLiteral("text/plain") && |
920 | 0 | !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") && |
921 | 0 | !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") && |
922 | 0 | !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) { |
923 | 0 | return; |
924 | 0 | } |
925 | 0 | |
926 | 0 | // Check whether we have content-encoding. If we do, don't try to |
927 | 0 | // detect the type. |
928 | 0 | // XXXbz we could improve this by doing a local decompress if we |
929 | 0 | // wanted, I'm sure. |
930 | 0 | nsAutoCString contentEncoding; |
931 | 0 | Unused << httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"), |
932 | 0 | contentEncoding); |
933 | 0 | if (!contentEncoding.IsEmpty()) { |
934 | 0 | return; |
935 | 0 | } |
936 | 0 | |
937 | 0 | LastDitchSniff(aRequest); |
938 | 0 | MutexAutoLock lock(mMutex); |
939 | 0 | if (mContentType.EqualsLiteral(APPLICATION_OCTET_STREAM)) { |
940 | 0 | // We want to guess at it instead |
941 | 0 | mContentType = APPLICATION_GUESS_FROM_EXT; |
942 | 0 | } else { |
943 | 0 | // Let the text/plain type we already have be, so that other content |
944 | 0 | // sniffers can also get a shot at this data. |
945 | 0 | mContentType.Truncate(); |
946 | 0 | } |
947 | 0 | } |