Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/netwerk/streamconv/converters/nsUnknownDecoder.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
 * License, v. 2.0. If a copy of the MPL was not distributed with this
4
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5
6
#include "nsUnknownDecoder.h"
7
#include "nsIPipe.h"
8
#include "nsIInputStream.h"
9
#include "nsIOutputStream.h"
10
#include "nsMimeTypes.h"
11
#include "nsIPrefService.h"
12
#include "nsIPrefBranch.h"
13
14
#include "nsCRT.h"
15
16
#include "nsIMIMEService.h"
17
18
#include "nsIDivertableChannel.h"
19
#include "nsIViewSourceChannel.h"
20
#include "nsIHttpChannel.h"
21
#include "nsIForcePendingChannel.h"
22
#include "nsIEncodedChannel.h"
23
#include "nsIURI.h"
24
#include "nsStringStream.h"
25
#include "nsNetCID.h"
26
#include "nsNetUtil.h"
27
28
#include <algorithm>
29
30
0
#define MAX_BUFFER_SIZE 512u
31
32
NS_IMPL_ISUPPORTS(nsUnknownDecoder::ConvertedStreamListener,
33
                  nsIStreamListener,
34
                  nsIRequestObserver)
35
36
nsUnknownDecoder::ConvertedStreamListener::
37
                  ConvertedStreamListener(nsUnknownDecoder *aDecoder)
38
0
{
39
0
  mDecoder = aDecoder;
40
0
}
41
42
nsresult
43
nsUnknownDecoder::ConvertedStreamListener::
44
                  AppendDataToString(nsIInputStream* inputStream,
45
                                     void* closure,
46
                                     const char* rawSegment,
47
                                     uint32_t toOffset,
48
                                     uint32_t count,
49
                                     uint32_t* writeCount)
50
0
{
51
0
  nsCString* decodedData = static_cast<nsCString*>(closure);
52
0
  decodedData->Append(rawSegment, count);
53
0
  *writeCount = count;
54
0
  return NS_OK;
55
0
}
56
57
NS_IMETHODIMP
58
nsUnknownDecoder::ConvertedStreamListener::OnStartRequest(nsIRequest* request,
59
                                                          nsISupports* context)
60
0
{
61
0
  return NS_OK;
62
0
}
63
64
NS_IMETHODIMP
65
nsUnknownDecoder::ConvertedStreamListener::
66
                  OnDataAvailable(nsIRequest* request,
67
                                  nsISupports* context,
68
                                  nsIInputStream* stream,
69
                                  uint64_t offset,
70
                                  uint32_t count)
71
0
{
72
0
  uint32_t read;
73
0
  nsAutoCString decodedData;
74
0
  {
75
0
    MutexAutoLock lock(mDecoder->mMutex);
76
0
    decodedData = mDecoder->mDecodedData;
77
0
  }
78
0
  nsresult rv = stream->ReadSegments(AppendDataToString, &decodedData, count,
79
0
                                     &read);
80
0
  if (NS_FAILED(rv)) {
81
0
    return rv;
82
0
  }
83
0
  MutexAutoLock lock(mDecoder->mMutex);
84
0
  mDecoder->mDecodedData = decodedData;
85
0
  return NS_OK;
86
0
}
87
88
NS_IMETHODIMP
89
nsUnknownDecoder::ConvertedStreamListener::OnStopRequest(nsIRequest* request,
90
                                                         nsISupports* context,
91
                                                         nsresult status)
92
0
{
93
0
  return NS_OK;
94
0
}
95
96
nsUnknownDecoder::nsUnknownDecoder()
97
  : mBuffer(nullptr)
98
  , mBufferLen(0)
99
  , mRequireHTMLsuffix(false)
100
  , mMutex("nsUnknownDecoder")
101
  , mDecodedData("")
102
0
{
103
0
  nsCOMPtr<nsIPrefBranch> prefs = do_GetService(NS_PREFSERVICE_CONTRACTID);
104
0
  if (prefs) {
105
0
    bool val;
106
0
    if (NS_SUCCEEDED(prefs->GetBoolPref("security.requireHTMLsuffix", &val)))
107
0
      mRequireHTMLsuffix = val;
108
0
  }
109
0
}
110
111
nsUnknownDecoder::~nsUnknownDecoder()
112
0
{
113
0
  if (mBuffer) {
114
0
    delete [] mBuffer;
115
0
    mBuffer = nullptr;
116
0
  }
117
0
}
118
119
// ----
120
//
121
// nsISupports implementation...
122
//
123
// ----
124
125
NS_IMPL_ADDREF(nsUnknownDecoder)
126
NS_IMPL_RELEASE(nsUnknownDecoder)
127
128
0
NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder)
129
0
   NS_INTERFACE_MAP_ENTRY(nsIStreamConverter)
130
0
   NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
131
0
   NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
132
0
   NS_INTERFACE_MAP_ENTRY(nsIContentSniffer)
133
0
   NS_INTERFACE_MAP_ENTRY(nsIThreadRetargetableStreamListener)
134
0
   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIStreamListener)
135
0
NS_INTERFACE_MAP_END
136
137
138
// ----
139
//
140
// nsIStreamConverter methods...
141
//
142
// ----
143
144
NS_IMETHODIMP
145
nsUnknownDecoder::Convert(nsIInputStream *aFromStream,
146
                          const char *aFromType,
147
                          const char *aToType,
148
                          nsISupports *aCtxt,
149
                          nsIInputStream **aResultStream)
150
0
{
151
0
  return NS_ERROR_NOT_IMPLEMENTED;
152
0
}
153
154
NS_IMETHODIMP
155
nsUnknownDecoder::AsyncConvertData(const char *aFromType,
156
                                   const char *aToType,
157
                                   nsIStreamListener *aListener,
158
                                   nsISupports *aCtxt)
159
0
{
160
0
  NS_ASSERTION(aListener && aFromType && aToType,
161
0
               "null pointer passed into multi mixed converter");
162
0
  // hook up our final listener. this guy gets the various On*() calls we want to throw
163
0
  // at him.
164
0
  //
165
0
166
0
  MutexAutoLock lock(mMutex);
167
0
  mNextListener = aListener;
168
0
  return (aListener) ? NS_OK : NS_ERROR_FAILURE;
169
0
}
170
171
// ----
172
//
173
// nsIStreamListener methods...
174
//
175
// ----
176
177
NS_IMETHODIMP
178
nsUnknownDecoder::OnDataAvailable(nsIRequest* request,
179
                                  nsISupports *aCtxt,
180
                                  nsIInputStream *aStream,
181
                                  uint64_t aSourceOffset,
182
                                  uint32_t aCount)
183
0
{
184
0
  nsresult rv = NS_OK;
185
0
186
0
  bool contentTypeEmpty;
187
0
  {
188
0
    MutexAutoLock lock(mMutex);
189
0
    if (!mNextListener) return NS_ERROR_FAILURE;
190
0
191
0
    contentTypeEmpty = mContentType.IsEmpty();
192
0
  }
193
0
194
0
  if (contentTypeEmpty) {
195
0
    uint32_t count, len;
196
0
197
0
    // If the buffer has not been allocated by now, just fail...
198
0
    if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
199
0
200
0
    //
201
0
    // Determine how much of the stream should be read to fill up the
202
0
    // sniffer buffer...
203
0
    //
204
0
    if (mBufferLen + aCount >= MAX_BUFFER_SIZE) {
205
0
      count = MAX_BUFFER_SIZE - mBufferLen;
206
0
    } else {
207
0
      count = aCount;
208
0
    }
209
0
210
0
    // Read the data into the buffer...
211
0
    rv = aStream->Read((mBuffer+mBufferLen), count, &len);
212
0
    if (NS_FAILED(rv)) return rv;
213
0
214
0
    mBufferLen += len;
215
0
    aCount     -= len;
216
0
217
0
    if (aCount) {
218
0
      //
219
0
      // Adjust the source offset...  The call to FireListenerNotifications(...)
220
0
      // will make the first OnDataAvailable(...) call with an offset of 0.
221
0
      // So, this offset needs to be adjusted to reflect that...
222
0
      //
223
0
      aSourceOffset += mBufferLen;
224
0
225
0
      DetermineContentType(request);
226
0
227
0
      rv = FireListenerNotifications(request, aCtxt);
228
0
    }
229
0
  }
230
0
231
0
  // Must not fire ODA again if it failed once
232
0
  if (aCount && NS_SUCCEEDED(rv)) {
233
#ifdef DEBUG
234
    {
235
      MutexAutoLock lock(mMutex);
236
      NS_ASSERTION(!mContentType.IsEmpty(),
237
                   "Content type should be known by now.");
238
    }
239
#endif
240
241
0
    nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request);
242
0
    if (divertable) {
243
0
      bool diverting;
244
0
      divertable->GetDivertingToParent(&diverting);
245
0
      if (diverting) {
246
0
        // The channel is diverted to the parent do not send any more data here.
247
0
        return rv;
248
0
      }
249
0
    }
250
0
251
0
    nsCOMPtr<nsIStreamListener> listener;
252
0
    {
253
0
      MutexAutoLock lock(mMutex);
254
0
      listener = mNextListener;
255
0
    }
256
0
    rv = listener->OnDataAvailable(request, aCtxt, aStream,
257
0
                                   aSourceOffset, aCount);
258
0
  }
259
0
260
0
  return rv;
261
0
}
262
263
// ----
264
//
265
// nsIRequestObserver methods...
266
//
267
// ----
268
269
NS_IMETHODIMP
270
nsUnknownDecoder::OnStartRequest(nsIRequest* request, nsISupports *aCtxt)
271
0
{
272
0
  nsresult rv = NS_OK;
273
0
274
0
  {
275
0
    MutexAutoLock lock(mMutex);
276
0
    if (!mNextListener) return NS_ERROR_FAILURE;
277
0
  }
278
0
279
0
  // Allocate the sniffer buffer...
280
0
  if (NS_SUCCEEDED(rv) && !mBuffer) {
281
0
    mBuffer = new char[MAX_BUFFER_SIZE];
282
0
283
0
    if (!mBuffer) {
284
0
      rv = NS_ERROR_OUT_OF_MEMORY;
285
0
    }
286
0
  }
287
0
288
0
  nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request);
289
0
  if (divertable) {
290
0
    divertable->UnknownDecoderInvolvedKeepData();
291
0
  }
292
0
293
0
  // Do not pass the OnStartRequest on to the next listener (yet)...
294
0
  return rv;
295
0
}
296
297
NS_IMETHODIMP
298
nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsISupports *aCtxt,
299
                                nsresult aStatus)
300
0
{
301
0
  nsresult rv = NS_OK;
302
0
303
0
  bool contentTypeEmpty;
304
0
  {
305
0
    MutexAutoLock lock(mMutex);
306
0
    if (!mNextListener) return NS_ERROR_FAILURE;
307
0
308
0
    contentTypeEmpty = mContentType.IsEmpty();
309
0
  }
310
0
311
0
  //
312
0
  // The total amount of data is less than the size of the sniffer buffer.
313
0
  // Analyze the buffer now...
314
0
  //
315
0
  if (contentTypeEmpty) {
316
0
    DetermineContentType(request);
317
0
318
0
    // Make sure channel listeners see channel as pending while we call
319
0
    // OnStartRequest/OnDataAvailable, even though the underlying channel
320
0
    // has already hit OnStopRequest.
321
0
    nsCOMPtr<nsIForcePendingChannel> forcePendingChannel = do_QueryInterface(request);
322
0
    if (forcePendingChannel) {
323
0
      forcePendingChannel->ForcePending(true);
324
0
    }
325
0
326
0
    rv = FireListenerNotifications(request, aCtxt);
327
0
328
0
    if (NS_FAILED(rv)) {
329
0
      aStatus = rv;
330
0
    }
331
0
332
0
    // now we need to set pending state to false before calling OnStopRequest
333
0
    if (forcePendingChannel) {
334
0
      forcePendingChannel->ForcePending(false);
335
0
    }
336
0
  }
337
0
338
0
  nsCOMPtr<nsIStreamListener> listener;
339
0
  {
340
0
    MutexAutoLock lock(mMutex);
341
0
    listener = mNextListener;
342
0
    mNextListener = nullptr;
343
0
  }
344
0
  rv = listener->OnStopRequest(request, aCtxt, aStatus);
345
0
346
0
  return rv;
347
0
}
348
349
// ----
350
//
351
// nsIContentSniffer methods...
352
//
353
// ----
354
NS_IMETHODIMP
355
nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest,
356
                                         const uint8_t* aData,
357
                                         uint32_t aLength,
358
                                         nsACString& type)
359
0
{
360
0
  // This is only used by sniffer, therefore we do not need to lock anything
361
0
  // here.
362
0
363
0
  mBuffer = const_cast<char*>(reinterpret_cast<const char*>(aData));
364
0
  mBufferLen = aLength;
365
0
  DetermineContentType(aRequest);
366
0
  mBuffer = nullptr;
367
0
  mBufferLen = 0;
368
0
  type.Assign(mContentType);
369
0
  mContentType.Truncate();
370
0
  return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK;
371
0
}
372
373
374
// Actual sniffing code
375
376
bool nsUnknownDecoder::AllowSniffing(nsIRequest* aRequest)
377
0
{
378
0
  if (!mRequireHTMLsuffix) {
379
0
    return true;
380
0
  }
381
0
382
0
  nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
383
0
  if (!channel) {
384
0
    NS_ERROR("QI failed");
385
0
    return false;
386
0
  }
387
0
388
0
  nsCOMPtr<nsIURI> uri;
389
0
  if (NS_FAILED(channel->GetURI(getter_AddRefs(uri))) || !uri) {
390
0
    return false;
391
0
  }
392
0
393
0
  bool isLocalFile = false;
394
0
  if (NS_FAILED(uri->SchemeIs("file", &isLocalFile)) || isLocalFile) {
395
0
    return false;
396
0
  }
397
0
398
0
  return true;
399
0
}
400
401
/**
402
 * This is the array of sniffer entries that depend on "magic numbers"
403
 * in the file.  Each entry has either a type associated with it (set
404
 * these with the SNIFFER_ENTRY macro) or a function to be executed
405
 * (set these with the SNIFFER_ENTRY_WITH_FUNC macro).  The function
406
 * should take a single nsIRequest* and returns bool -- true if
407
 * it sets mContentType, false otherwise
408
 */
409
nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = {
410
  SNIFFER_ENTRY("%PDF-", APPLICATION_PDF),
411
412
  SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT),
413
414
  // Files that start with mailbox delimiters let's provisionally call
415
  // text/plain
416
  SNIFFER_ENTRY("From", TEXT_PLAIN),
417
  SNIFFER_ENTRY(">From", TEXT_PLAIN),
418
419
  // If the buffer begins with "#!" or "%!" then it is a script of
420
  // some sort...  "Scripts" can include arbitrary data to be passed
421
  // to an interpreter, so we need to decide whether we can call this
422
  // text or whether it's data.
423
  SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff),
424
425
  // XXXbz should (and can) we also include the various ways that <?xml can
426
  // appear as UTF-16 and such?  See http://www.w3.org/TR/REC-xml#sec-guessing
427
  SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML)
428
};
429
430
uint32_t nsUnknownDecoder::sSnifferEntryNum =
431
  sizeof(nsUnknownDecoder::sSnifferEntries) /
432
    sizeof(nsUnknownDecoder::nsSnifferEntry);
433
434
void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest)
435
0
{
436
0
  {
437
0
    MutexAutoLock lock(mMutex);
438
0
    NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known.");
439
0
    if (!mContentType.IsEmpty()) return;
440
0
  }
441
0
442
0
  const char* testData = mBuffer;
443
0
  uint32_t testDataLen = mBufferLen;
444
0
  // Check if data are compressed.
445
0
  nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(aRequest));
446
0
  nsAutoCString decodedData;
447
0
448
0
  if (channel) {
449
0
    // ConvertEncodedData is always called only on a single thread for each
450
0
    // instance of an object.
451
0
    nsresult rv = ConvertEncodedData(aRequest, mBuffer, mBufferLen);
452
0
    if (NS_SUCCEEDED(rv)) {
453
0
      MutexAutoLock lock(mMutex);
454
0
      decodedData = mDecodedData;
455
0
    }
456
0
    if (!decodedData.IsEmpty()) {
457
0
      testData = decodedData.get();
458
0
      testDataLen = std::min(decodedData.Length(), MAX_BUFFER_SIZE);
459
0
    }
460
0
  }
461
0
462
0
  // First, run through all the types we can detect reliably based on
463
0
  // magic numbers
464
0
  uint32_t i;
465
0
  for (i = 0; i < sSnifferEntryNum; ++i) {
466
0
    if (testDataLen >= sSnifferEntries[i].mByteLen &&  // enough data
467
0
        memcmp(testData, sSnifferEntries[i].mBytes, sSnifferEntries[i].mByteLen) == 0) {  // and type matches
468
0
      NS_ASSERTION(sSnifferEntries[i].mMimeType ||
469
0
                   sSnifferEntries[i].mContentTypeSniffer,
470
0
                   "Must have either a type string or a function to set the type");
471
0
      NS_ASSERTION(!sSnifferEntries[i].mMimeType ||
472
0
                   !sSnifferEntries[i].mContentTypeSniffer,
473
0
                   "Both a type string and a type sniffing function set;"
474
0
                   " using type string");
475
0
      if (sSnifferEntries[i].mMimeType) {
476
0
        MutexAutoLock lock(mMutex);
477
0
        mContentType = sSnifferEntries[i].mMimeType;
478
0
        NS_ASSERTION(!mContentType.IsEmpty(),
479
0
                     "Content type should be known by now.");
480
0
        return;
481
0
      }
482
0
      if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) {
483
#ifdef DEBUG
484
        MutexAutoLock lock(mMutex);
485
        NS_ASSERTION(!mContentType.IsEmpty(),
486
                     "Content type should be known by now.");
487
#endif
488
        return;
489
0
      }
490
0
    }
491
0
  }
492
0
493
0
  nsAutoCString sniffedType;
494
0
  NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest,
495
0
                  (const uint8_t*)testData, testDataLen, sniffedType);
496
0
  {
497
0
    MutexAutoLock lock(mMutex);
498
0
    mContentType = sniffedType;
499
0
    if (!mContentType.IsEmpty()) {
500
0
      return;
501
0
    }
502
0
  }
503
0
504
0
  if (SniffForHTML(aRequest)) {
505
#ifdef DEBUG
506
    MutexAutoLock lock(mMutex);
507
    NS_ASSERTION(!mContentType.IsEmpty(),
508
                 "Content type should be known by now.");
509
#endif
510
    return;
511
0
  }
512
0
513
0
  // We don't know what this is yet.  Before we just give up, try
514
0
  // the URI from the request.
515
0
  if (SniffURI(aRequest)) {
516
#ifdef DEBUG
517
    MutexAutoLock lock(mMutex);
518
    NS_ASSERTION(!mContentType.IsEmpty(),
519
                 "Content type should be known by now.");
520
#endif
521
    return;
522
0
  }
523
0
524
0
  LastDitchSniff(aRequest);
525
#ifdef DEBUG
526
  MutexAutoLock lock(mMutex);
527
  NS_ASSERTION(!mContentType.IsEmpty(),
528
               "Content type should be known by now.");
529
#endif
530
}
531
532
bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest)
533
0
{
534
0
  /*
535
0
   * To prevent a possible attack, we will not consider this to be
536
0
   * html content if it comes from the local file system and our prefs
537
0
   * are set right
538
0
   */
539
0
  if (!AllowSniffing(aRequest)) {
540
0
    return false;
541
0
  }
542
0
543
0
  MutexAutoLock lock(mMutex);
544
0
545
0
  // Now look for HTML.
546
0
  const char* str;
547
0
  const char* end;
548
0
  if (mDecodedData.IsEmpty()) {
549
0
    str = mBuffer;
550
0
    end = mBuffer + mBufferLen;
551
0
  } else {
552
0
    str = mDecodedData.get();
553
0
    end = mDecodedData.get() + std::min(mDecodedData.Length(),
554
0
                                        MAX_BUFFER_SIZE);
555
0
  }
556
0
557
0
  // skip leading whitespace
558
0
  while (str != end && nsCRT::IsAsciiSpace(*str)) {
559
0
    ++str;
560
0
  }
561
0
562
0
  // did we find something like a start tag?
563
0
  if (str == end || *str != '<' || ++str == end) {
564
0
    return false;
565
0
  }
566
0
567
0
  // If we seem to be SGML or XML and we got down here, just pretend we're HTML
568
0
  if (*str == '!' || *str == '?') {
569
0
    mContentType = TEXT_HTML;
570
0
    return true;
571
0
  }
572
0
573
0
  uint32_t bufSize = end - str;
574
0
  // We use sizeof(_tagstr) below because that's the length of _tagstr
575
0
  // with the one char " " or ">" appended.
576
0
#define MATCHES_TAG(_tagstr)                                              \
577
0
  (bufSize >= sizeof(_tagstr) &&                                          \
578
0
   (PL_strncasecmp(str, _tagstr " ", sizeof(_tagstr)) == 0 ||             \
579
0
    PL_strncasecmp(str, _tagstr ">", sizeof(_tagstr)) == 0))
580
0
581
0
  if (MATCHES_TAG("html")     ||
582
0
      MATCHES_TAG("frameset") ||
583
0
      MATCHES_TAG("body")     ||
584
0
      MATCHES_TAG("head")     ||
585
0
      MATCHES_TAG("script")   ||
586
0
      MATCHES_TAG("iframe")   ||
587
0
      MATCHES_TAG("a")        ||
588
0
      MATCHES_TAG("img")      ||
589
0
      MATCHES_TAG("table")    ||
590
0
      MATCHES_TAG("title")    ||
591
0
      MATCHES_TAG("link")     ||
592
0
      MATCHES_TAG("base")     ||
593
0
      MATCHES_TAG("style")    ||
594
0
      MATCHES_TAG("div")      ||
595
0
      MATCHES_TAG("p")        ||
596
0
      MATCHES_TAG("font")     ||
597
0
      MATCHES_TAG("applet")   ||
598
0
      MATCHES_TAG("meta")     ||
599
0
      MATCHES_TAG("center")   ||
600
0
      MATCHES_TAG("form")     ||
601
0
      MATCHES_TAG("isindex")  ||
602
0
      MATCHES_TAG("h1")       ||
603
0
      MATCHES_TAG("h2")       ||
604
0
      MATCHES_TAG("h3")       ||
605
0
      MATCHES_TAG("h4")       ||
606
0
      MATCHES_TAG("h5")       ||
607
0
      MATCHES_TAG("h6")       ||
608
0
      MATCHES_TAG("b")        ||
609
0
      MATCHES_TAG("pre")) {
610
0
611
0
    mContentType = TEXT_HTML;
612
0
    return true;
613
0
  }
614
0
615
0
#undef MATCHES_TAG
616
0
617
0
  return false;
618
0
}
619
620
bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest)
621
0
{
622
0
  // Just like HTML, this should be able to be shut off.
623
0
  if (!AllowSniffing(aRequest)) {
624
0
    return false;
625
0
  }
626
0
627
0
  // First see whether we can glean anything from the uri...
628
0
  if (!SniffURI(aRequest)) {
629
0
    // Oh well; just generic XML will have to do
630
0
    MutexAutoLock lock(mMutex);
631
0
    mContentType = TEXT_XML;
632
0
  }
633
0
634
0
  return true;
635
0
}
636
637
bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest)
638
0
{
639
0
  nsCOMPtr<nsIMIMEService> mimeService(do_GetService("@mozilla.org/mime;1"));
640
0
  if (mimeService) {
641
0
    nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
642
0
    if (channel) {
643
0
      nsCOMPtr<nsIURI> uri;
644
0
      nsresult result = channel->GetURI(getter_AddRefs(uri));
645
0
      if (NS_SUCCEEDED(result) && uri) {
646
0
        nsAutoCString type;
647
0
        result = mimeService->GetTypeFromURI(uri, type);
648
0
        if (NS_SUCCEEDED(result)) {
649
0
          MutexAutoLock lock(mMutex);
650
0
          mContentType = type;
651
0
          return true;
652
0
        }
653
0
      }
654
0
    }
655
0
  }
656
0
657
0
  return false;
658
0
}
659
660
// This macro is based on RFC 2046 Section 4.1.2.  Treat any char 0-31
661
// except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
662
// encodings like Shift_JIS) as non-text
663
#define IS_TEXT_CHAR(ch)                                     \
664
0
  (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27)
665
666
bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest)
667
0
{
668
0
  // All we can do now is try to guess whether this is text/plain or
669
0
  // application/octet-stream
670
0
671
0
  MutexAutoLock lock(mMutex);
672
0
673
0
  const char* testData;
674
0
  uint32_t testDataLen;
675
0
  if (mDecodedData.IsEmpty()) {
676
0
    testData = mBuffer;
677
0
    testDataLen = mBufferLen;
678
0
  } else {
679
0
    testData = mDecodedData.get();
680
0
    testDataLen = std::min(mDecodedData.Length(), MAX_BUFFER_SIZE);
681
0
  }
682
0
683
0
  // First, check for a BOM.  If we see one, assume this is text/plain
684
0
  // in whatever encoding.  If there is a BOM _and_ text we will
685
0
  // always have at least 4 bytes in the buffer (since the 2-byte BOMs
686
0
  // are for 2-byte encodings and the UTF-8 BOM is 3 bytes).
687
0
  if (testDataLen >= 4) {
688
0
    const unsigned char* buf = (const unsigned char*)testData;
689
0
    if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian
690
0
        (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian
691
0
        (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8
692
0
        (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)) { // UCS-4, Big Endian
693
0
694
0
      mContentType = TEXT_PLAIN;
695
0
      return true;
696
0
    }
697
0
  }
698
0
699
0
  // Now see whether the buffer has any non-text chars.  If not, then let's
700
0
  // just call it text/plain...
701
0
  //
702
0
  uint32_t i;
703
0
  for (i = 0; i < testDataLen && IS_TEXT_CHAR(testData[i]); i++) {
704
0
  }
705
0
706
0
  if (i == testDataLen) {
707
0
    mContentType = TEXT_PLAIN;
708
0
  }
709
0
  else {
710
0
    mContentType = APPLICATION_OCTET_STREAM;
711
0
  }
712
0
713
0
  return true;
714
0
}
715
716
717
nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request,
718
                                                     nsISupports *aCtxt)
719
0
{
720
0
  nsresult rv = NS_OK;
721
0
722
0
  nsCOMPtr<nsIStreamListener> listener;
723
0
  nsAutoCString contentType;
724
0
  {
725
0
    MutexAutoLock lock(mMutex);
726
0
    if (!mNextListener) return NS_ERROR_FAILURE;
727
0
728
0
    listener = mNextListener;
729
0
    contentType = mContentType;
730
0
  }
731
0
732
0
  if (!contentType.IsEmpty()) {
733
0
    nsCOMPtr<nsIViewSourceChannel> viewSourceChannel =
734
0
      do_QueryInterface(request);
735
0
    if (viewSourceChannel) {
736
0
      rv = viewSourceChannel->SetOriginalContentType(contentType);
737
0
    } else {
738
0
      nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv);
739
0
      if (NS_SUCCEEDED(rv)) {
740
0
        // Set the new content type on the channel...
741
0
        rv = channel->SetContentType(contentType);
742
0
      }
743
0
    }
744
0
745
0
    NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!");
746
0
747
0
    if (NS_FAILED(rv)) {
748
0
      // Cancel the request to make sure it has the correct status if
749
0
      // mNextListener looks at it.
750
0
      request->Cancel(rv);
751
0
      listener->OnStartRequest(request, aCtxt);
752
0
753
0
      nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request);
754
0
      if (divertable) {
755
0
        rv = divertable->UnknownDecoderInvolvedOnStartRequestCalled();
756
0
      }
757
0
758
0
      return rv;
759
0
    }
760
0
  }
761
0
762
0
  // Fire the OnStartRequest(...)
763
0
  rv = listener->OnStartRequest(request, aCtxt);
764
0
765
0
   nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request);
766
0
   if (divertable) {
767
0
     rv = divertable->UnknownDecoderInvolvedOnStartRequestCalled();
768
0
     bool diverting;
769
0
     divertable->GetDivertingToParent(&diverting);
770
0
     if (diverting) {
771
0
       // The channel is diverted to the parent do not send any more data here.
772
0
       return rv;
773
0
     }
774
0
   }
775
0
776
0
  if (NS_SUCCEEDED(rv)) {
777
0
    // install stream converter if required
778
0
    nsCOMPtr<nsIEncodedChannel> encodedChannel = do_QueryInterface(request);
779
0
    if (encodedChannel) {
780
0
      nsCOMPtr<nsIStreamListener> listenerNew;
781
0
      rv = encodedChannel->DoApplyContentConversions(listener, getter_AddRefs(listenerNew), aCtxt);
782
0
      if (NS_SUCCEEDED(rv) && listenerNew) {
783
0
        MutexAutoLock lock(mMutex);
784
0
        mNextListener = listenerNew;
785
0
        listener = listenerNew;
786
0
      }
787
0
    }
788
0
  }
789
0
790
0
  if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
791
0
792
0
  // If the request was canceled, then we need to treat that equivalently
793
0
  // to an error returned by OnStartRequest.
794
0
  if (NS_SUCCEEDED(rv))
795
0
    request->GetStatus(&rv);
796
0
797
0
  // Fire the first OnDataAvailable for the data that was read from the
798
0
  // stream into the sniffer buffer...
799
0
  if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) {
800
0
    uint32_t len = 0;
801
0
    nsCOMPtr<nsIInputStream> in;
802
0
    nsCOMPtr<nsIOutputStream> out;
803
0
804
0
    // Create a pipe and fill it with the data from the sniffer buffer.
805
0
    rv = NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out),
806
0
                    MAX_BUFFER_SIZE, MAX_BUFFER_SIZE);
807
0
808
0
    if (NS_SUCCEEDED(rv)) {
809
0
      rv = out->Write(mBuffer, mBufferLen, &len);
810
0
      if (NS_SUCCEEDED(rv)) {
811
0
        if (len == mBufferLen) {
812
0
          rv = listener->OnDataAvailable(request, aCtxt, in, 0, len);
813
0
        } else {
814
0
          NS_ERROR("Unable to write all the data into the pipe.");
815
0
          rv = NS_ERROR_FAILURE;
816
0
        }
817
0
      }
818
0
    }
819
0
  }
820
0
821
0
  delete [] mBuffer;
822
0
  mBuffer = nullptr;
823
0
  mBufferLen = 0;
824
0
825
0
  return rv;
826
0
}
827
828
829
nsresult
830
nsUnknownDecoder::ConvertEncodedData(nsIRequest* request,
831
                                     const char* data,
832
                                     uint32_t length)
833
0
{
834
0
  nsresult rv = NS_OK;
835
0
836
0
  {
837
0
    MutexAutoLock lock(mMutex);
838
0
    mDecodedData = "";
839
0
  }
840
0
  nsCOMPtr<nsIEncodedChannel> encodedChannel(do_QueryInterface(request));
841
0
  if (encodedChannel) {
842
0
843
0
    RefPtr<ConvertedStreamListener> strListener =
844
0
      new ConvertedStreamListener(this);
845
0
846
0
    nsCOMPtr<nsIStreamListener> listener;
847
0
    rv = encodedChannel->DoApplyContentConversions(strListener,
848
0
                                                   getter_AddRefs(listener),
849
0
                                                   nullptr);
850
0
851
0
    if (NS_FAILED(rv)) {
852
0
      return rv;
853
0
    }
854
0
855
0
    if (listener) {
856
0
      listener->OnStartRequest(request, nullptr);
857
0
858
0
      if (length) {
859
0
        nsCOMPtr<nsIStringInputStream> rawStream =
860
0
          do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID);
861
0
        if (!rawStream)
862
0
          return NS_ERROR_FAILURE;
863
0
864
0
        rv = rawStream->SetData((const char*)data, length);
865
0
        NS_ENSURE_SUCCESS(rv, rv);
866
0
867
0
        rv = listener->OnDataAvailable(request, nullptr, rawStream, 0,
868
0
                                       length);
869
0
        NS_ENSURE_SUCCESS(rv, rv);
870
0
      }
871
0
872
0
      listener->OnStopRequest(request, nullptr, NS_OK);
873
0
    }
874
0
  }
875
0
  return rv;
876
0
}
877
878
//
879
// nsIThreadRetargetableStreamListener methods
880
//
881
NS_IMETHODIMP
882
nsUnknownDecoder::CheckListenerChain()
883
0
{
884
0
  nsCOMPtr<nsIThreadRetargetableStreamListener> listener;
885
0
  {
886
0
    MutexAutoLock lock(mMutex);
887
0
    listener = do_QueryInterface(mNextListener);
888
0
  }
889
0
  if (!listener) {
890
0
    return NS_ERROR_NO_INTERFACE;
891
0
  }
892
0
893
0
  return listener->CheckListenerChain();
894
0
}
895
896
void
897
nsBinaryDetector::DetermineContentType(nsIRequest* aRequest)
898
0
{
899
0
  nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest);
900
0
  if (!httpChannel) {
901
0
    return;
902
0
  }
903
0
904
0
  // It's an HTTP channel.  Check for the text/plain mess
905
0
  nsAutoCString contentTypeHdr;
906
0
  Unused << httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Type"),
907
0
                                           contentTypeHdr);
908
0
  nsAutoCString contentType;
909
0
  httpChannel->GetContentType(contentType);
910
0
911
0
  // Make sure to do a case-sensitive exact match comparison here.  Apache
912
0
  // 1.x just sends text/plain for "unknown", while Apache 2.x sends
913
0
  // text/plain with a ISO-8859-1 charset.  Debian's Apache version, just to
914
0
  // be different, sends text/plain with iso-8859-1 charset.  For extra fun,
915
0
  // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8.  Don't do general
916
0
  // case-insensitive comparison, since we really want to apply this crap as
917
0
  // rarely as we can.
918
0
  if (!contentType.EqualsLiteral("text/plain") ||
919
0
      (!contentTypeHdr.EqualsLiteral("text/plain") &&
920
0
       !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") &&
921
0
       !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") &&
922
0
       !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) {
923
0
    return;
924
0
  }
925
0
926
0
  // Check whether we have content-encoding.  If we do, don't try to
927
0
  // detect the type.
928
0
  // XXXbz we could improve this by doing a local decompress if we
929
0
  // wanted, I'm sure.
930
0
  nsAutoCString contentEncoding;
931
0
  Unused << httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"),
932
0
                                           contentEncoding);
933
0
  if (!contentEncoding.IsEmpty()) {
934
0
    return;
935
0
  }
936
0
937
0
  LastDitchSniff(aRequest);
938
0
  MutexAutoLock lock(mMutex);
939
0
  if (mContentType.EqualsLiteral(APPLICATION_OCTET_STREAM)) {
940
0
    // We want to guess at it instead
941
0
    mContentType = APPLICATION_GUESS_FROM_EXT;
942
0
  } else {
943
0
    // Let the text/plain type we already have be, so that other content
944
0
    // sniffers can also get a shot at this data.
945
0
    mContentType.Truncate();
946
0
  }
947
0
}