Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : #include "nsUnknownDecoder.h"
7 : #include "nsIPipe.h"
8 : #include "nsIInputStream.h"
9 : #include "nsIOutputStream.h"
10 : #include "nsMimeTypes.h"
11 : #include "nsIPrefService.h"
12 : #include "nsIPrefBranch.h"
13 :
14 : #include "nsCRT.h"
15 :
16 : #include "nsIMIMEService.h"
17 :
18 : #include "nsIDivertableChannel.h"
19 : #include "nsIViewSourceChannel.h"
20 : #include "nsIHttpChannel.h"
21 : #include "nsIForcePendingChannel.h"
22 : #include "nsIEncodedChannel.h"
23 : #include "nsIURI.h"
24 : #include "nsStringStream.h"
25 : #include "nsNetCID.h"
26 : #include "nsNetUtil.h"
27 :
28 : #include <algorithm>
29 :
30 : #define MAX_BUFFER_SIZE 512u
31 :
32 0 : NS_IMPL_ISUPPORTS(nsUnknownDecoder::ConvertedStreamListener,
33 : nsIStreamListener,
34 : nsIRequestObserver)
35 :
36 0 : nsUnknownDecoder::ConvertedStreamListener::
37 0 : ConvertedStreamListener(nsUnknownDecoder *aDecoder)
38 : {
39 0 : mDecoder = aDecoder;
40 0 : }
41 :
42 0 : nsUnknownDecoder::ConvertedStreamListener::~ConvertedStreamListener()
43 : {
44 0 : }
45 :
46 : nsresult
47 0 : nsUnknownDecoder::ConvertedStreamListener::
48 : AppendDataToString(nsIInputStream* inputStream,
49 : void* closure,
50 : const char* rawSegment,
51 : uint32_t toOffset,
52 : uint32_t count,
53 : uint32_t* writeCount)
54 : {
55 0 : nsCString* decodedData = static_cast<nsCString*>(closure);
56 0 : decodedData->Append(rawSegment, count);
57 0 : *writeCount = count;
58 0 : return NS_OK;
59 : }
60 :
61 : NS_IMETHODIMP
62 0 : nsUnknownDecoder::ConvertedStreamListener::OnStartRequest(nsIRequest* request,
63 : nsISupports* context)
64 : {
65 0 : return NS_OK;
66 : }
67 :
68 : NS_IMETHODIMP
69 0 : nsUnknownDecoder::ConvertedStreamListener::
70 : OnDataAvailable(nsIRequest* request,
71 : nsISupports* context,
72 : nsIInputStream* stream,
73 : uint64_t offset,
74 : uint32_t count)
75 : {
76 : uint32_t read;
77 0 : nsAutoCString decodedData;
78 : nsresult rv = stream->ReadSegments(AppendDataToString, &decodedData, count,
79 0 : &read);
80 0 : if (NS_FAILED(rv)) {
81 0 : return rv;
82 : }
83 0 : MutexAutoLock lock(mDecoder->mMutex);
84 0 : mDecoder->mDecodedData = decodedData;
85 0 : return NS_OK;
86 : }
87 :
88 : NS_IMETHODIMP
89 0 : nsUnknownDecoder::ConvertedStreamListener::OnStopRequest(nsIRequest* request,
90 : nsISupports* context,
91 : nsresult status)
92 : {
93 0 : return NS_OK;
94 : }
95 :
96 1 : nsUnknownDecoder::nsUnknownDecoder()
97 : : mBuffer(nullptr)
98 : , mBufferLen(0)
99 : , mRequireHTMLsuffix(false)
100 : , mMutex("nsUnknownDecoder")
101 1 : , mDecodedData("")
102 : {
103 2 : nsCOMPtr<nsIPrefBranch> prefs = do_GetService(NS_PREFSERVICE_CONTRACTID);
104 1 : if (prefs) {
105 : bool val;
106 1 : if (NS_SUCCEEDED(prefs->GetBoolPref("security.requireHTMLsuffix", &val)))
107 0 : mRequireHTMLsuffix = val;
108 : }
109 1 : }
110 :
111 0 : nsUnknownDecoder::~nsUnknownDecoder()
112 : {
113 0 : if (mBuffer) {
114 0 : delete [] mBuffer;
115 0 : mBuffer = nullptr;
116 : }
117 0 : }
118 :
119 : // ----
120 : //
121 : // nsISupports implementation...
122 : //
123 : // ----
124 :
125 8 : NS_IMPL_ADDREF(nsUnknownDecoder)
126 6 : NS_IMPL_RELEASE(nsUnknownDecoder)
127 :
128 5 : NS_INTERFACE_MAP_BEGIN(nsUnknownDecoder)
129 5 : NS_INTERFACE_MAP_ENTRY(nsIStreamConverter)
130 5 : NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
131 5 : NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
132 5 : NS_INTERFACE_MAP_ENTRY(nsIContentSniffer)
133 2 : NS_INTERFACE_MAP_ENTRY(nsIThreadRetargetableStreamListener)
134 2 : NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIStreamListener)
135 0 : NS_INTERFACE_MAP_END
136 :
137 :
138 : // ----
139 : //
140 : // nsIStreamConverter methods...
141 : //
142 : // ----
143 :
144 : NS_IMETHODIMP
145 0 : nsUnknownDecoder::Convert(nsIInputStream *aFromStream,
146 : const char *aFromType,
147 : const char *aToType,
148 : nsISupports *aCtxt,
149 : nsIInputStream **aResultStream)
150 : {
151 0 : return NS_ERROR_NOT_IMPLEMENTED;
152 : }
153 :
154 : NS_IMETHODIMP
155 0 : nsUnknownDecoder::AsyncConvertData(const char *aFromType,
156 : const char *aToType,
157 : nsIStreamListener *aListener,
158 : nsISupports *aCtxt)
159 : {
160 0 : NS_ASSERTION(aListener && aFromType && aToType,
161 : "null pointer passed into multi mixed converter");
162 : // hook up our final listener. this guy gets the various On*() calls we want to throw
163 : // at him.
164 : //
165 :
166 0 : MutexAutoLock lock(mMutex);
167 0 : mNextListener = aListener;
168 0 : return (aListener) ? NS_OK : NS_ERROR_FAILURE;
169 : }
170 :
171 : // ----
172 : //
173 : // nsIStreamListener methods...
174 : //
175 : // ----
176 :
177 : NS_IMETHODIMP
178 0 : nsUnknownDecoder::OnDataAvailable(nsIRequest* request,
179 : nsISupports *aCtxt,
180 : nsIInputStream *aStream,
181 : uint64_t aSourceOffset,
182 : uint32_t aCount)
183 : {
184 0 : nsresult rv = NS_OK;
185 :
186 : bool contentTypeEmpty;
187 : {
188 0 : MutexAutoLock lock(mMutex);
189 0 : if (!mNextListener) return NS_ERROR_FAILURE;
190 :
191 0 : contentTypeEmpty = mContentType.IsEmpty();
192 : }
193 :
194 0 : if (contentTypeEmpty) {
195 : uint32_t count, len;
196 :
197 : // If the buffer has not been allocated by now, just fail...
198 0 : if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
199 :
200 : //
201 : // Determine how much of the stream should be read to fill up the
202 : // sniffer buffer...
203 : //
204 0 : if (mBufferLen + aCount >= MAX_BUFFER_SIZE) {
205 0 : count = MAX_BUFFER_SIZE - mBufferLen;
206 : } else {
207 0 : count = aCount;
208 : }
209 :
210 : // Read the data into the buffer...
211 0 : rv = aStream->Read((mBuffer+mBufferLen), count, &len);
212 0 : if (NS_FAILED(rv)) return rv;
213 :
214 0 : mBufferLen += len;
215 0 : aCount -= len;
216 :
217 0 : if (aCount) {
218 : //
219 : // Adjust the source offset... The call to FireListenerNotifications(...)
220 : // will make the first OnDataAvailable(...) call with an offset of 0.
221 : // So, this offset needs to be adjusted to reflect that...
222 : //
223 0 : aSourceOffset += mBufferLen;
224 :
225 0 : DetermineContentType(request);
226 :
227 0 : rv = FireListenerNotifications(request, aCtxt);
228 : }
229 : }
230 :
231 : // Must not fire ODA again if it failed once
232 0 : if (aCount && NS_SUCCEEDED(rv)) {
233 : #ifdef DEBUG
234 : {
235 0 : MutexAutoLock lock(mMutex);
236 0 : NS_ASSERTION(!mContentType.IsEmpty(),
237 : "Content type should be known by now.");
238 : }
239 : #endif
240 :
241 0 : nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request);
242 0 : if (divertable) {
243 : bool diverting;
244 0 : divertable->GetDivertingToParent(&diverting);
245 0 : if (diverting) {
246 : // The channel is diverted to the parent do not send any more data here.
247 0 : return rv;
248 : }
249 : }
250 :
251 0 : nsCOMPtr<nsIStreamListener> listener;
252 : {
253 0 : MutexAutoLock lock(mMutex);
254 0 : listener = mNextListener;
255 : }
256 0 : rv = listener->OnDataAvailable(request, aCtxt, aStream,
257 0 : aSourceOffset, aCount);
258 : }
259 :
260 0 : return rv;
261 : }
262 :
263 : // ----
264 : //
265 : // nsIRequestObserver methods...
266 : //
267 : // ----
268 :
269 : NS_IMETHODIMP
270 0 : nsUnknownDecoder::OnStartRequest(nsIRequest* request, nsISupports *aCtxt)
271 : {
272 0 : nsresult rv = NS_OK;
273 :
274 : {
275 0 : MutexAutoLock lock(mMutex);
276 0 : if (!mNextListener) return NS_ERROR_FAILURE;
277 : }
278 :
279 : // Allocate the sniffer buffer...
280 0 : if (NS_SUCCEEDED(rv) && !mBuffer) {
281 0 : mBuffer = new char[MAX_BUFFER_SIZE];
282 :
283 0 : if (!mBuffer) {
284 0 : rv = NS_ERROR_OUT_OF_MEMORY;
285 : }
286 : }
287 :
288 0 : nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request);
289 0 : if (divertable) {
290 0 : divertable->UnknownDecoderInvolvedKeepData();
291 : }
292 :
293 : // Do not pass the OnStartRequest on to the next listener (yet)...
294 0 : return rv;
295 : }
296 :
297 : NS_IMETHODIMP
298 0 : nsUnknownDecoder::OnStopRequest(nsIRequest* request, nsISupports *aCtxt,
299 : nsresult aStatus)
300 : {
301 0 : nsresult rv = NS_OK;
302 :
303 : bool contentTypeEmpty;
304 : {
305 0 : MutexAutoLock lock(mMutex);
306 0 : if (!mNextListener) return NS_ERROR_FAILURE;
307 :
308 0 : contentTypeEmpty = mContentType.IsEmpty();
309 : }
310 :
311 : //
312 : // The total amount of data is less than the size of the sniffer buffer.
313 : // Analyze the buffer now...
314 : //
315 0 : if (contentTypeEmpty) {
316 0 : DetermineContentType(request);
317 :
318 : // Make sure channel listeners see channel as pending while we call
319 : // OnStartRequest/OnDataAvailable, even though the underlying channel
320 : // has already hit OnStopRequest.
321 0 : nsCOMPtr<nsIForcePendingChannel> forcePendingChannel = do_QueryInterface(request);
322 0 : if (forcePendingChannel) {
323 0 : forcePendingChannel->ForcePending(true);
324 : }
325 :
326 0 : rv = FireListenerNotifications(request, aCtxt);
327 :
328 0 : if (NS_FAILED(rv)) {
329 0 : aStatus = rv;
330 : }
331 :
332 : // now we need to set pending state to false before calling OnStopRequest
333 0 : if (forcePendingChannel) {
334 0 : forcePendingChannel->ForcePending(false);
335 : }
336 : }
337 :
338 0 : nsCOMPtr<nsIStreamListener> listener;
339 : {
340 0 : MutexAutoLock lock(mMutex);
341 0 : listener = mNextListener;
342 0 : mNextListener = nullptr;
343 : }
344 0 : rv = listener->OnStopRequest(request, aCtxt, aStatus);
345 :
346 0 : return rv;
347 : }
348 :
349 : // ----
350 : //
351 : // nsIContentSniffer methods...
352 : //
353 : // ----
354 : NS_IMETHODIMP
355 3 : nsUnknownDecoder::GetMIMETypeFromContent(nsIRequest* aRequest,
356 : const uint8_t* aData,
357 : uint32_t aLength,
358 : nsACString& type)
359 : {
360 : // This is only used by sniffer, therefore we do not need to lock anything
361 : // here.
362 :
363 3 : mBuffer = const_cast<char*>(reinterpret_cast<const char*>(aData));
364 3 : mBufferLen = aLength;
365 3 : DetermineContentType(aRequest);
366 3 : mBuffer = nullptr;
367 3 : mBufferLen = 0;
368 3 : type.Assign(mContentType);
369 3 : mContentType.Truncate();
370 3 : return type.IsEmpty() ? NS_ERROR_NOT_AVAILABLE : NS_OK;
371 : }
372 :
373 :
374 : // Actual sniffing code
375 :
376 0 : bool nsUnknownDecoder::AllowSniffing(nsIRequest* aRequest)
377 : {
378 0 : if (!mRequireHTMLsuffix) {
379 0 : return true;
380 : }
381 :
382 0 : nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
383 0 : if (!channel) {
384 0 : NS_ERROR("QI failed");
385 0 : return false;
386 : }
387 :
388 0 : nsCOMPtr<nsIURI> uri;
389 0 : if (NS_FAILED(channel->GetURI(getter_AddRefs(uri))) || !uri) {
390 0 : return false;
391 : }
392 :
393 0 : bool isLocalFile = false;
394 0 : if (NS_FAILED(uri->SchemeIs("file", &isLocalFile)) || isLocalFile) {
395 0 : return false;
396 : }
397 :
398 0 : return true;
399 : }
400 :
401 : /**
402 : * This is the array of sniffer entries that depend on "magic numbers"
403 : * in the file. Each entry has either a type associated with it (set
404 : * these with the SNIFFER_ENTRY macro) or a function to be executed
405 : * (set these with the SNIFFER_ENTRY_WITH_FUNC macro). The function
406 : * should take a single nsIRequest* and returns bool -- true if
407 : * it sets mContentType, false otherwise
408 : */
409 : nsUnknownDecoder::nsSnifferEntry nsUnknownDecoder::sSnifferEntries[] = {
410 : SNIFFER_ENTRY("%PDF-", APPLICATION_PDF),
411 :
412 : SNIFFER_ENTRY("%!PS-Adobe-", APPLICATION_POSTSCRIPT),
413 :
414 : // Files that start with mailbox delimiters let's provisionally call
415 : // text/plain
416 : SNIFFER_ENTRY("From", TEXT_PLAIN),
417 : SNIFFER_ENTRY(">From", TEXT_PLAIN),
418 :
419 : // If the buffer begins with "#!" or "%!" then it is a script of
420 : // some sort... "Scripts" can include arbitrary data to be passed
421 : // to an interpreter, so we need to decide whether we can call this
422 : // text or whether it's data.
423 : SNIFFER_ENTRY_WITH_FUNC("#!", &nsUnknownDecoder::LastDitchSniff),
424 :
425 : // XXXbz should (and can) we also include the various ways that <?xml can
426 : // appear as UTF-16 and such? See http://www.w3.org/TR/REC-xml#sec-guessing
427 : SNIFFER_ENTRY_WITH_FUNC("<?xml", &nsUnknownDecoder::SniffForXML)
428 : };
429 :
430 : uint32_t nsUnknownDecoder::sSnifferEntryNum =
431 : sizeof(nsUnknownDecoder::sSnifferEntries) /
432 : sizeof(nsUnknownDecoder::nsSnifferEntry);
433 :
434 0 : void nsUnknownDecoder::DetermineContentType(nsIRequest* aRequest)
435 : {
436 : {
437 0 : MutexAutoLock lock(mMutex);
438 0 : NS_ASSERTION(mContentType.IsEmpty(), "Content type is already known.");
439 0 : if (!mContentType.IsEmpty()) return;
440 : }
441 :
442 0 : const char* testData = mBuffer;
443 0 : uint32_t testDataLen = mBufferLen;
444 : // Check if data are compressed.
445 0 : nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(aRequest));
446 0 : nsAutoCString decodedData;
447 :
448 0 : if (channel) {
449 : // ConvertEncodedData is always called only on a single thread for each
450 : // instance of an object.
451 0 : nsresult rv = ConvertEncodedData(aRequest, mBuffer, mBufferLen);
452 0 : if (NS_SUCCEEDED(rv)) {
453 0 : MutexAutoLock lock(mMutex);
454 0 : decodedData = mDecodedData;
455 : }
456 0 : if (!decodedData.IsEmpty()) {
457 0 : testData = decodedData.get();
458 0 : testDataLen = std::min(decodedData.Length(), MAX_BUFFER_SIZE);
459 : }
460 : }
461 :
462 : // First, run through all the types we can detect reliably based on
463 : // magic numbers
464 : uint32_t i;
465 0 : for (i = 0; i < sSnifferEntryNum; ++i) {
466 0 : if (testDataLen >= sSnifferEntries[i].mByteLen && // enough data
467 0 : memcmp(testData, sSnifferEntries[i].mBytes, sSnifferEntries[i].mByteLen) == 0) { // and type matches
468 0 : NS_ASSERTION(sSnifferEntries[i].mMimeType ||
469 : sSnifferEntries[i].mContentTypeSniffer,
470 : "Must have either a type string or a function to set the type");
471 0 : NS_ASSERTION(!sSnifferEntries[i].mMimeType ||
472 : !sSnifferEntries[i].mContentTypeSniffer,
473 : "Both a type string and a type sniffing function set;"
474 : " using type string");
475 0 : if (sSnifferEntries[i].mMimeType) {
476 0 : MutexAutoLock lock(mMutex);
477 0 : mContentType = sSnifferEntries[i].mMimeType;
478 0 : NS_ASSERTION(!mContentType.IsEmpty(),
479 : "Content type should be known by now.");
480 0 : return;
481 : }
482 0 : if ((this->*(sSnifferEntries[i].mContentTypeSniffer))(aRequest)) {
483 : #ifdef DEBUG
484 0 : MutexAutoLock lock(mMutex);
485 0 : NS_ASSERTION(!mContentType.IsEmpty(),
486 : "Content type should be known by now.");
487 : #endif
488 0 : return;
489 : }
490 : }
491 : }
492 :
493 0 : nsAutoCString sniffedType;
494 : NS_SniffContent(NS_DATA_SNIFFER_CATEGORY, aRequest,
495 0 : (const uint8_t*)testData, testDataLen, sniffedType);
496 : {
497 0 : MutexAutoLock lock(mMutex);
498 0 : mContentType = sniffedType;
499 0 : if (!mContentType.IsEmpty()) {
500 0 : return;
501 : }
502 : }
503 :
504 0 : if (SniffForHTML(aRequest)) {
505 : #ifdef DEBUG
506 0 : MutexAutoLock lock(mMutex);
507 0 : NS_ASSERTION(!mContentType.IsEmpty(),
508 : "Content type should be known by now.");
509 : #endif
510 0 : return;
511 : }
512 :
513 : // We don't know what this is yet. Before we just give up, try
514 : // the URI from the request.
515 0 : if (SniffURI(aRequest)) {
516 : #ifdef DEBUG
517 0 : MutexAutoLock lock(mMutex);
518 0 : NS_ASSERTION(!mContentType.IsEmpty(),
519 : "Content type should be known by now.");
520 : #endif
521 0 : return;
522 : }
523 :
524 0 : LastDitchSniff(aRequest);
525 : #ifdef DEBUG
526 0 : MutexAutoLock lock(mMutex);
527 0 : NS_ASSERTION(!mContentType.IsEmpty(),
528 : "Content type should be known by now.");
529 : #endif
530 : }
531 :
532 0 : bool nsUnknownDecoder::SniffForHTML(nsIRequest* aRequest)
533 : {
534 : /*
535 : * To prevent a possible attack, we will not consider this to be
536 : * html content if it comes from the local file system and our prefs
537 : * are set right
538 : */
539 0 : if (!AllowSniffing(aRequest)) {
540 0 : return false;
541 : }
542 :
543 0 : MutexAutoLock lock(mMutex);
544 :
545 : // Now look for HTML.
546 : const char* str;
547 : const char* end;
548 0 : if (mDecodedData.IsEmpty()) {
549 0 : str = mBuffer;
550 0 : end = mBuffer + mBufferLen;
551 : } else {
552 0 : str = mDecodedData.get();
553 0 : end = mDecodedData.get() + std::min(mDecodedData.Length(),
554 0 : MAX_BUFFER_SIZE);
555 : }
556 :
557 : // skip leading whitespace
558 0 : while (str != end && nsCRT::IsAsciiSpace(*str)) {
559 0 : ++str;
560 : }
561 :
562 : // did we find something like a start tag?
563 0 : if (str == end || *str != '<' || ++str == end) {
564 0 : return false;
565 : }
566 :
567 : // If we seem to be SGML or XML and we got down here, just pretend we're HTML
568 0 : if (*str == '!' || *str == '?') {
569 0 : mContentType = TEXT_HTML;
570 0 : return true;
571 : }
572 :
573 0 : uint32_t bufSize = end - str;
574 : // We use sizeof(_tagstr) below because that's the length of _tagstr
575 : // with the one char " " or ">" appended.
576 : #define MATCHES_TAG(_tagstr) \
577 : (bufSize >= sizeof(_tagstr) && \
578 : (PL_strncasecmp(str, _tagstr " ", sizeof(_tagstr)) == 0 || \
579 : PL_strncasecmp(str, _tagstr ">", sizeof(_tagstr)) == 0))
580 :
581 0 : if (MATCHES_TAG("html") ||
582 0 : MATCHES_TAG("frameset") ||
583 0 : MATCHES_TAG("body") ||
584 0 : MATCHES_TAG("head") ||
585 0 : MATCHES_TAG("script") ||
586 0 : MATCHES_TAG("iframe") ||
587 0 : MATCHES_TAG("a") ||
588 0 : MATCHES_TAG("img") ||
589 0 : MATCHES_TAG("table") ||
590 0 : MATCHES_TAG("title") ||
591 0 : MATCHES_TAG("link") ||
592 0 : MATCHES_TAG("base") ||
593 0 : MATCHES_TAG("style") ||
594 0 : MATCHES_TAG("div") ||
595 0 : MATCHES_TAG("p") ||
596 0 : MATCHES_TAG("font") ||
597 0 : MATCHES_TAG("applet") ||
598 0 : MATCHES_TAG("meta") ||
599 0 : MATCHES_TAG("center") ||
600 0 : MATCHES_TAG("form") ||
601 0 : MATCHES_TAG("isindex") ||
602 0 : MATCHES_TAG("h1") ||
603 0 : MATCHES_TAG("h2") ||
604 0 : MATCHES_TAG("h3") ||
605 0 : MATCHES_TAG("h4") ||
606 0 : MATCHES_TAG("h5") ||
607 0 : MATCHES_TAG("h6") ||
608 0 : MATCHES_TAG("b") ||
609 0 : MATCHES_TAG("pre")) {
610 :
611 0 : mContentType = TEXT_HTML;
612 0 : return true;
613 : }
614 :
615 : #undef MATCHES_TAG
616 :
617 0 : return false;
618 : }
619 :
620 0 : bool nsUnknownDecoder::SniffForXML(nsIRequest* aRequest)
621 : {
622 : // Just like HTML, this should be able to be shut off.
623 0 : if (!AllowSniffing(aRequest)) {
624 0 : return false;
625 : }
626 :
627 : // First see whether we can glean anything from the uri...
628 0 : if (!SniffURI(aRequest)) {
629 : // Oh well; just generic XML will have to do
630 0 : MutexAutoLock lock(mMutex);
631 0 : mContentType = TEXT_XML;
632 : }
633 :
634 0 : return true;
635 : }
636 :
637 0 : bool nsUnknownDecoder::SniffURI(nsIRequest* aRequest)
638 : {
639 0 : nsCOMPtr<nsIMIMEService> mimeService(do_GetService("@mozilla.org/mime;1"));
640 0 : if (mimeService) {
641 0 : nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
642 0 : if (channel) {
643 0 : nsCOMPtr<nsIURI> uri;
644 0 : nsresult result = channel->GetURI(getter_AddRefs(uri));
645 0 : if (NS_SUCCEEDED(result) && uri) {
646 0 : nsAutoCString type;
647 0 : result = mimeService->GetTypeFromURI(uri, type);
648 0 : if (NS_SUCCEEDED(result)) {
649 0 : MutexAutoLock lock(mMutex);
650 0 : mContentType = type;
651 0 : return true;
652 : }
653 : }
654 : }
655 : }
656 :
657 0 : return false;
658 : }
659 :
660 : // This macro is based on RFC 2046 Section 4.1.2. Treat any char 0-31
661 : // except the 9-13 range (\t, \n, \v, \f, \r) and char 27 (used by
662 : // encodings like Shift_JIS) as non-text
663 : #define IS_TEXT_CHAR(ch) \
664 : (((unsigned char)(ch)) > 31 || (9 <= (ch) && (ch) <= 13) || (ch) == 27)
665 :
666 0 : bool nsUnknownDecoder::LastDitchSniff(nsIRequest* aRequest)
667 : {
668 : // All we can do now is try to guess whether this is text/plain or
669 : // application/octet-stream
670 :
671 0 : MutexAutoLock lock(mMutex);
672 :
673 : const char* testData;
674 : uint32_t testDataLen;
675 0 : if (mDecodedData.IsEmpty()) {
676 0 : testData = mBuffer;
677 0 : testDataLen = mBufferLen;
678 : } else {
679 0 : testData = mDecodedData.get();
680 0 : testDataLen = std::min(mDecodedData.Length(), MAX_BUFFER_SIZE);
681 : }
682 :
683 : // First, check for a BOM. If we see one, assume this is text/plain
684 : // in whatever encoding. If there is a BOM _and_ text we will
685 : // always have at least 4 bytes in the buffer (since the 2-byte BOMs
686 : // are for 2-byte encodings and the UTF-8 BOM is 3 bytes).
687 0 : if (testDataLen >= 4) {
688 0 : const unsigned char* buf = (const unsigned char*)testData;
689 0 : if ((buf[0] == 0xFE && buf[1] == 0xFF) || // UTF-16, Big Endian
690 0 : (buf[0] == 0xFF && buf[1] == 0xFE) || // UTF-16 or UCS-4, Little Endian
691 0 : (buf[0] == 0xEF && buf[1] == 0xBB && buf[2] == 0xBF) || // UTF-8
692 0 : (buf[0] == 0 && buf[1] == 0 && buf[2] == 0xFE && buf[3] == 0xFF)) { // UCS-4, Big Endian
693 :
694 0 : mContentType = TEXT_PLAIN;
695 0 : return true;
696 : }
697 : }
698 :
699 : // Now see whether the buffer has any non-text chars. If not, then let's
700 : // just call it text/plain...
701 : //
702 : uint32_t i;
703 0 : for (i = 0; i < testDataLen && IS_TEXT_CHAR(testData[i]); i++) {
704 0 : continue;
705 : }
706 :
707 0 : if (i == testDataLen) {
708 0 : mContentType = TEXT_PLAIN;
709 : }
710 : else {
711 0 : mContentType = APPLICATION_OCTET_STREAM;
712 : }
713 :
714 0 : return true;
715 : }
716 :
717 :
718 0 : nsresult nsUnknownDecoder::FireListenerNotifications(nsIRequest* request,
719 : nsISupports *aCtxt)
720 : {
721 0 : nsresult rv = NS_OK;
722 :
723 0 : nsCOMPtr<nsIStreamListener> listener;
724 0 : nsAutoCString contentType;
725 : {
726 0 : MutexAutoLock lock(mMutex);
727 0 : if (!mNextListener) return NS_ERROR_FAILURE;
728 :
729 0 : listener = mNextListener;
730 0 : contentType = mContentType;
731 : }
732 :
733 0 : if (!contentType.IsEmpty()) {
734 : nsCOMPtr<nsIViewSourceChannel> viewSourceChannel =
735 0 : do_QueryInterface(request);
736 0 : if (viewSourceChannel) {
737 0 : rv = viewSourceChannel->SetOriginalContentType(contentType);
738 : } else {
739 0 : nsCOMPtr<nsIChannel> channel = do_QueryInterface(request, &rv);
740 0 : if (NS_SUCCEEDED(rv)) {
741 : // Set the new content type on the channel...
742 0 : rv = channel->SetContentType(contentType);
743 : }
744 : }
745 :
746 0 : NS_ASSERTION(NS_SUCCEEDED(rv), "Unable to set content type on channel!");
747 :
748 0 : if (NS_FAILED(rv)) {
749 : // Cancel the request to make sure it has the correct status if
750 : // mNextListener looks at it.
751 0 : request->Cancel(rv);
752 0 : listener->OnStartRequest(request, aCtxt);
753 :
754 0 : nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request);
755 0 : if (divertable) {
756 0 : rv = divertable->UnknownDecoderInvolvedOnStartRequestCalled();
757 : }
758 :
759 0 : return rv;
760 : }
761 : }
762 :
763 : // Fire the OnStartRequest(...)
764 0 : rv = listener->OnStartRequest(request, aCtxt);
765 :
766 0 : nsCOMPtr<nsIDivertableChannel> divertable = do_QueryInterface(request);
767 0 : if (divertable) {
768 0 : rv = divertable->UnknownDecoderInvolvedOnStartRequestCalled();
769 : bool diverting;
770 0 : divertable->GetDivertingToParent(&diverting);
771 0 : if (diverting) {
772 : // The channel is diverted to the parent do not send any more data here.
773 0 : return rv;
774 : }
775 : }
776 :
777 0 : if (NS_SUCCEEDED(rv)) {
778 : // install stream converter if required
779 0 : nsCOMPtr<nsIEncodedChannel> encodedChannel = do_QueryInterface(request);
780 0 : if (encodedChannel) {
781 0 : nsCOMPtr<nsIStreamListener> listenerNew;
782 0 : rv = encodedChannel->DoApplyContentConversions(listener, getter_AddRefs(listenerNew), aCtxt);
783 0 : if (NS_SUCCEEDED(rv) && listenerNew) {
784 0 : MutexAutoLock lock(mMutex);
785 0 : mNextListener = listenerNew;
786 0 : listener = listenerNew;
787 : }
788 : }
789 : }
790 :
791 0 : if (!mBuffer) return NS_ERROR_OUT_OF_MEMORY;
792 :
793 : // If the request was canceled, then we need to treat that equivalently
794 : // to an error returned by OnStartRequest.
795 0 : if (NS_SUCCEEDED(rv))
796 0 : request->GetStatus(&rv);
797 :
798 : // Fire the first OnDataAvailable for the data that was read from the
799 : // stream into the sniffer buffer...
800 0 : if (NS_SUCCEEDED(rv) && (mBufferLen > 0)) {
801 0 : uint32_t len = 0;
802 0 : nsCOMPtr<nsIInputStream> in;
803 0 : nsCOMPtr<nsIOutputStream> out;
804 :
805 : // Create a pipe and fill it with the data from the sniffer buffer.
806 0 : rv = NS_NewPipe(getter_AddRefs(in), getter_AddRefs(out),
807 : MAX_BUFFER_SIZE, MAX_BUFFER_SIZE);
808 :
809 0 : if (NS_SUCCEEDED(rv)) {
810 0 : rv = out->Write(mBuffer, mBufferLen, &len);
811 0 : if (NS_SUCCEEDED(rv)) {
812 0 : if (len == mBufferLen) {
813 0 : rv = listener->OnDataAvailable(request, aCtxt, in, 0, len);
814 : } else {
815 0 : NS_ERROR("Unable to write all the data into the pipe.");
816 0 : rv = NS_ERROR_FAILURE;
817 : }
818 : }
819 : }
820 : }
821 :
822 0 : delete [] mBuffer;
823 0 : mBuffer = nullptr;
824 0 : mBufferLen = 0;
825 :
826 0 : return rv;
827 : }
828 :
829 :
830 : nsresult
831 0 : nsUnknownDecoder::ConvertEncodedData(nsIRequest* request,
832 : const char* data,
833 : uint32_t length)
834 : {
835 0 : nsresult rv = NS_OK;
836 :
837 : {
838 0 : MutexAutoLock lock(mMutex);
839 0 : mDecodedData = "";
840 : }
841 0 : nsCOMPtr<nsIEncodedChannel> encodedChannel(do_QueryInterface(request));
842 0 : if (encodedChannel) {
843 :
844 : RefPtr<ConvertedStreamListener> strListener =
845 0 : new ConvertedStreamListener(this);
846 :
847 0 : nsCOMPtr<nsIStreamListener> listener;
848 0 : rv = encodedChannel->DoApplyContentConversions(strListener,
849 0 : getter_AddRefs(listener),
850 0 : nullptr);
851 :
852 0 : if (NS_FAILED(rv)) {
853 0 : return rv;
854 : }
855 :
856 0 : if (listener) {
857 0 : listener->OnStartRequest(request, nullptr);
858 :
859 : nsCOMPtr<nsIStringInputStream> rawStream =
860 0 : do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID);
861 0 : if (!rawStream)
862 0 : return NS_ERROR_FAILURE;
863 :
864 0 : rv = rawStream->SetData((const char*)data, length);
865 0 : NS_ENSURE_SUCCESS(rv, rv);
866 :
867 0 : rv = listener->OnDataAvailable(request, nullptr, rawStream, 0,
868 0 : length);
869 0 : NS_ENSURE_SUCCESS(rv, rv);
870 :
871 0 : listener->OnStopRequest(request, nullptr, NS_OK);
872 : }
873 : }
874 0 : return rv;
875 : }
876 :
877 : //
878 : // nsIThreadRetargetableStreamListener methods
879 : //
880 : NS_IMETHODIMP
881 0 : nsUnknownDecoder::CheckListenerChain()
882 : {
883 0 : nsCOMPtr<nsIThreadRetargetableStreamListener> listener;
884 : {
885 0 : MutexAutoLock lock(mMutex);
886 0 : listener = do_QueryInterface(mNextListener);
887 : }
888 0 : if (!listener) {
889 0 : return NS_ERROR_NO_INTERFACE;
890 : }
891 :
892 0 : return listener->CheckListenerChain();
893 : }
894 :
895 : void
896 3 : nsBinaryDetector::DetermineContentType(nsIRequest* aRequest)
897 : {
898 3 : nsCOMPtr<nsIHttpChannel> httpChannel = do_QueryInterface(aRequest);
899 3 : if (!httpChannel) {
900 2 : return;
901 : }
902 :
903 : // It's an HTTP channel. Check for the text/plain mess
904 1 : nsAutoCString contentTypeHdr;
905 3 : Unused << httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Type"),
906 2 : contentTypeHdr);
907 1 : nsAutoCString contentType;
908 1 : httpChannel->GetContentType(contentType);
909 :
910 : // Make sure to do a case-sensitive exact match comparison here. Apache
911 : // 1.x just sends text/plain for "unknown", while Apache 2.x sends
912 : // text/plain with a ISO-8859-1 charset. Debian's Apache version, just to
913 : // be different, sends text/plain with iso-8859-1 charset. For extra fun,
914 : // FC7, RHEL4, and Ubuntu Feisty send charset=UTF-8. Don't do general
915 : // case-insensitive comparison, since we really want to apply this crap as
916 : // rarely as we can.
917 2 : if (!contentType.EqualsLiteral("text/plain") ||
918 0 : (!contentTypeHdr.EqualsLiteral("text/plain") &&
919 0 : !contentTypeHdr.EqualsLiteral("text/plain; charset=ISO-8859-1") &&
920 0 : !contentTypeHdr.EqualsLiteral("text/plain; charset=iso-8859-1") &&
921 0 : !contentTypeHdr.EqualsLiteral("text/plain; charset=UTF-8"))) {
922 1 : return;
923 : }
924 :
925 : // Check whether we have content-encoding. If we do, don't try to
926 : // detect the type.
927 : // XXXbz we could improve this by doing a local decompress if we
928 : // wanted, I'm sure.
929 0 : nsAutoCString contentEncoding;
930 0 : Unused << httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"),
931 0 : contentEncoding);
932 0 : if (!contentEncoding.IsEmpty()) {
933 0 : return;
934 : }
935 :
936 0 : LastDitchSniff(aRequest);
937 0 : MutexAutoLock lock(mMutex);
938 0 : if (mContentType.Equals(APPLICATION_OCTET_STREAM)) {
939 : // We want to guess at it instead
940 0 : mContentType = APPLICATION_GUESS_FROM_EXT;
941 : } else {
942 : // Let the text/plain type we already have be, so that other content
943 : // sniffers can also get a shot at this data.
944 0 : mContentType.Truncate();
945 : }
946 : }
|