LCOV - code coverage report
Current view: top level - browser/components/feeds - nsFeedSniffer.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 59 134 44.0 %
Date: 2017-07-14 16:53:18 Functions: 6 13 46.2 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : #include "nsFeedSniffer.h"
       7             : 
       8             : #include "mozilla/Unused.h"
       9             : 
      10             : #include "nsNetCID.h"
      11             : #include "nsXPCOM.h"
      12             : #include "nsCOMPtr.h"
      13             : #include "nsStringStream.h"
      14             : 
      15             : #include "nsBrowserCompsCID.h"
      16             : 
      17             : #include "nsICategoryManager.h"
      18             : #include "nsIServiceManager.h"
      19             : #include "nsComponentManagerUtils.h"
      20             : #include "nsServiceManagerUtils.h"
      21             : 
      22             : #include "nsIStreamConverterService.h"
      23             : #include "nsIStreamConverter.h"
      24             : 
      25             : #include "nsIStreamListener.h"
      26             : 
      27             : #include "nsIHttpChannel.h"
      28             : #include "nsIMIMEHeaderParam.h"
      29             : 
      30             : #include "nsMimeTypes.h"
      31             : #include "nsIURI.h"
      32             : #include <algorithm>
      33             : 
      34             : #define TYPE_ATOM "application/atom+xml"
      35             : #define TYPE_RSS "application/rss+xml"
      36             : #define TYPE_MAYBE_FEED "application/vnd.mozilla.maybe.feed"
      37             : 
      38             : #define NS_RDF "http://www.w3.org/1999/02/22-rdf-syntax-ns#"
      39             : #define NS_RSS "http://purl.org/rss/1.0/"
      40             : 
      41             : #define MAX_BYTES 512u
      42             : 
      43          19 : NS_IMPL_ISUPPORTS(nsFeedSniffer,
      44             :                   nsIContentSniffer,
      45             :                   nsIStreamListener,
      46             :                   nsIRequestObserver)
      47             : 
      48             : nsresult
      49           1 : nsFeedSniffer::ConvertEncodedData(nsIRequest* request,
      50             :                                   const uint8_t* data,
      51             :                                   uint32_t length)
      52             : {
      53           1 :   nsresult rv = NS_OK;
      54             : 
      55           1 :  mDecodedData = "";
      56           2 :  nsCOMPtr<nsIHttpChannel> httpChannel(do_QueryInterface(request));
      57           1 :   if (!httpChannel)
      58           0 :     return NS_ERROR_NO_INTERFACE;
      59             : 
      60           2 :   nsAutoCString contentEncoding;
      61           3 :   mozilla::Unused << httpChannel->GetResponseHeader(NS_LITERAL_CSTRING("Content-Encoding"),
      62           2 :                                                     contentEncoding);
      63           1 :   if (!contentEncoding.IsEmpty()) {
      64           0 :     nsCOMPtr<nsIStreamConverterService> converterService(do_GetService(NS_STREAMCONVERTERSERVICE_CONTRACTID));
      65           0 :     if (converterService) {
      66           0 :       ToLowerCase(contentEncoding);
      67             : 
      68           0 :       nsCOMPtr<nsIStreamListener> converter;
      69           0 :       rv = converterService->AsyncConvertData(contentEncoding.get(),
      70             :                                               "uncompressed", this, nullptr,
      71           0 :                                               getter_AddRefs(converter));
      72           0 :       NS_ENSURE_SUCCESS(rv, rv);
      73             : 
      74           0 :       converter->OnStartRequest(request, nullptr);
      75             : 
      76             :       nsCOMPtr<nsIStringInputStream> rawStream =
      77           0 :         do_CreateInstance(NS_STRINGINPUTSTREAM_CONTRACTID);
      78           0 :       if (!rawStream)
      79           0 :         return NS_ERROR_FAILURE;
      80             : 
      81           0 :       rv = rawStream->SetData((const char*)data, length);
      82           0 :       NS_ENSURE_SUCCESS(rv, rv);
      83             : 
      84           0 :       rv = converter->OnDataAvailable(request, nullptr, rawStream, 0, length);
      85           0 :       NS_ENSURE_SUCCESS(rv, rv);
      86             : 
      87           0 :       converter->OnStopRequest(request, nullptr, NS_OK);
      88             :     }
      89             :   }
      90           1 :   return rv;
      91             : }
      92             : 
      93             : template<int N>
      94             : static bool
      95             : StringBeginsWithLowercaseLiteral(nsAString& aString,
      96             :                                  const char (&aSubstring)[N])
      97             : {
      98             :   return StringHead(aString, N).LowerCaseEqualsLiteral(aSubstring);
      99             : }
     100             : 
     101             : bool
     102           0 : HasAttachmentDisposition(nsIHttpChannel* httpChannel)
     103             : {
     104           0 :   if (!httpChannel)
     105           0 :     return false;
     106             : 
     107             :   uint32_t disp;
     108           0 :   nsresult rv = httpChannel->GetContentDisposition(&disp);
     109             : 
     110           0 :   if (NS_SUCCEEDED(rv) && disp == nsIChannel::DISPOSITION_ATTACHMENT)
     111           0 :     return true;
     112             : 
     113           0 :   return false;
     114             : }
     115             : 
     116             : /**
     117             :  * @return the first occurrence of a character within a string buffer,
     118             :  *         or nullptr if not found
     119             :  */
     120             : static const char*
     121           0 : FindChar(char c, const char *begin, const char *end)
     122             : {
     123           0 :   for (; begin < end; ++begin) {
     124           0 :     if (*begin == c)
     125           0 :       return begin;
     126             :   }
     127           0 :   return nullptr;
     128             : }
     129             : 
     130             : /**
     131             :  *
     132             :  * Determine if a substring is the "documentElement" in the document.
     133             :  *
     134             :  * All of our sniffed substrings: <rss, <feed, <rdf:RDF must be the "document"
     135             :  * element within the XML DOM, i.e. the root container element. Otherwise,
     136             :  * it's possible that someone embedded one of these tags inside a document of
     137             :  * another type, e.g. a HTML document, and we don't want to show the preview
     138             :  * page if the document isn't actually a feed.
     139             :  *
     140             :  * @param   start
     141             :  *          The beginning of the data being sniffed
     142             :  * @param   end
     143             :  *          The end of the data being sniffed, right before the substring that
     144             :  *          was found.
     145             :  * @returns true if the found substring is the documentElement, false
     146             :  *          otherwise.
     147             :  */
     148             : static bool
     149           0 : IsDocumentElement(const char *start, const char* end)
     150             : {
     151             :   // For every tag in the buffer, check to see if it's a PI, Doctype or
     152             :   // comment, our desired substring or something invalid.
     153           0 :   while ( (start = FindChar('<', start, end)) ) {
     154           0 :     ++start;
     155           0 :     if (start >= end)
     156           0 :       return false;
     157             : 
     158             :     // Check to see if the character following the '<' is either '?' or '!'
     159             :     // (processing instruction or doctype or comment)... these are valid nodes
     160             :     // to have in the prologue.
     161           0 :     if (*start != '?' && *start != '!')
     162           0 :       return false;
     163             : 
     164             :     // Now advance the iterator until the '>' (We do this because we don't want
     165             :     // to sniff indicator substrings that are embedded within other nodes, e.g.
     166             :     // comments: <!-- <rdf:RDF .. > -->
     167           0 :     start = FindChar('>', start, end);
     168           0 :     if (!start)
     169           0 :       return false;
     170             : 
     171           0 :     ++start;
     172             :   }
     173           0 :   return true;
     174             : }
     175             : 
     176             : /**
     177             :  * Determines whether or not a string exists as the root element in an XML data
     178             :  * string buffer.
     179             :  * @param   dataString
     180             :  *          The data being sniffed
     181             :  * @param   substring
     182             :  *          The substring being tested for existence and root-ness.
     183             :  * @returns true if the substring exists and is the documentElement, false
     184             :  *          otherwise.
     185             :  */
     186             : static bool
     187           3 : ContainsTopLevelSubstring(nsACString& dataString, const char *substring)
     188             : {
     189           3 :   nsACString::const_iterator start, end;
     190           3 :   dataString.BeginReading(start);
     191           3 :   dataString.EndReading(end);
     192             : 
     193           3 :   if (!FindInReadable(nsCString(substring), start, end)){
     194           3 :     return false;
     195             :   }
     196             : 
     197           0 :   auto offset = start.get() - dataString.Data();
     198             : 
     199           0 :   const char *begin = dataString.BeginReading();
     200             : 
     201             :   // Only do the validation when we find the substring.
     202           0 :   return IsDocumentElement(begin, begin + offset);
     203             : }
     204             : 
     205             : NS_IMETHODIMP
     206           3 : nsFeedSniffer::GetMIMETypeFromContent(nsIRequest* request,
     207             :                                       const uint8_t* data,
     208             :                                       uint32_t length,
     209             :                                       nsACString& sniffedType)
     210             : {
     211           6 :   nsCOMPtr<nsIHttpChannel> channel(do_QueryInterface(request));
     212           3 :   if (!channel)
     213           2 :     return NS_ERROR_NO_INTERFACE;
     214             : 
     215             :   // Check that this is a GET request, since you can't subscribe to a POST...
     216           2 :   nsAutoCString method;
     217           1 :   mozilla::Unused << channel->GetRequestMethod(method);
     218           1 :   if (!method.EqualsLiteral("GET")) {
     219           0 :     sniffedType.Truncate();
     220           0 :     return NS_OK;
     221             :   }
     222             : 
     223             :   // We need to find out if this is a load of a view-source document. In this
     224             :   // case we do not want to override the content type, since the source display
     225             :   // does not need to be converted from feed format to XUL. More importantly,
     226             :   // we don't want to change the content type from something
     227             :   // nsContentDLF::CreateInstance knows about (e.g. application/xml, text/html
     228             :   // etc) to something that only the application fe knows about (maybe.feed)
     229             :   // thus deactivating syntax highlighting.
     230           2 :   nsCOMPtr<nsIURI> originalURI;
     231           1 :   channel->GetOriginalURI(getter_AddRefs(originalURI));
     232             : 
     233           2 :   nsAutoCString scheme;
     234           1 :   originalURI->GetScheme(scheme);
     235           1 :   if (scheme.EqualsLiteral("view-source")) {
     236           0 :     sniffedType.Truncate();
     237           0 :     return NS_OK;
     238             :   }
     239             : 
     240             :   // Check the Content-Type to see if it is set correctly. If it is set to
     241             :   // something specific that we think is a reliable indication of a feed, don't
     242             :   // bother sniffing since we assume the site maintainer knows what they're
     243             :   // doing.
     244           2 :   nsAutoCString contentType;
     245           1 :   channel->GetContentType(contentType);
     246           2 :   bool noSniff = contentType.EqualsLiteral(TYPE_RSS) ||
     247           2 :                    contentType.EqualsLiteral(TYPE_ATOM);
     248             : 
     249             :   // Check to see if this was a feed request from the location bar or from
     250             :   // the feed: protocol. This is also a reliable indication.
     251             :   // The value of the header doesn't matter.
     252           1 :   if (!noSniff) {
     253           2 :     nsAutoCString sniffHeader;
     254             :     nsresult foundHeader =
     255           4 :       channel->GetRequestHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"),
     256           3 :                                 sniffHeader);
     257           1 :     noSniff = NS_SUCCEEDED(foundHeader);
     258             :   }
     259             : 
     260           1 :   if (noSniff) {
     261             :     // check for an attachment after we have a likely feed.
     262           0 :     if(HasAttachmentDisposition(channel)) {
     263           0 :       sniffedType.Truncate();
     264           0 :       return NS_OK;
     265             :     }
     266             : 
     267             :     // set the feed header as a response header, since we have good metadata
     268             :     // telling us that the feed is supposed to be RSS or Atom
     269             :     mozilla::DebugOnly<nsresult> rv =
     270           0 :       channel->SetResponseHeader(NS_LITERAL_CSTRING("X-Moz-Is-Feed"),
     271           0 :                                  NS_LITERAL_CSTRING("1"), false);
     272           0 :     MOZ_ASSERT(NS_SUCCEEDED(rv));
     273           0 :     sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
     274           0 :     return NS_OK;
     275             :   }
     276             : 
     277             :   // Don't sniff arbitrary types.  Limit sniffing to situations that
     278             :   // we think can reasonably arise.
     279           2 :   if (!contentType.EqualsLiteral(TEXT_HTML) &&
     280           1 :       !contentType.EqualsLiteral(APPLICATION_OCTET_STREAM) &&
     281             :       // Same criterion as XMLHttpRequest.  Should we be checking for "+xml"
     282             :       // and check for text/xml and application/xml by hand instead?
     283           0 :       contentType.Find("xml") == -1) {
     284           0 :     sniffedType.Truncate();
     285           0 :     return NS_OK;
     286             :   }
     287             : 
     288             :   // Now we need to potentially decompress data served with
     289             :   // Content-Encoding: gzip
     290           1 :   nsresult rv = ConvertEncodedData(request, data, length);
     291           1 :   if (NS_FAILED(rv))
     292           0 :     return rv;
     293             : 
     294             :   // We cap the number of bytes to scan at MAX_BYTES to prevent picking up
     295             :   // false positives by accidentally reading document content, e.g. a "how to
     296             :   // make a feed" page.
     297             :   const char* testData;
     298           1 :   if (mDecodedData.IsEmpty()) {
     299           1 :     testData = (const char*)data;
     300           1 :     length = std::min(length, MAX_BYTES);
     301             :   } else {
     302           0 :     testData = mDecodedData.get();
     303           0 :     length = std::min(mDecodedData.Length(), MAX_BYTES);
     304             :   }
     305             : 
     306             :   // The strategy here is based on that described in:
     307             :   // http://blogs.msdn.com/rssteam/articles/PublishersGuide.aspx
     308             :   // for interoperarbility purposes.
     309             : 
     310             :   // Thus begins the actual sniffing.
     311           2 :   nsDependentCSubstring dataString((const char*)testData, length);
     312             : 
     313           1 :   bool isFeed = false;
     314             : 
     315             :   // RSS 0.91/0.92/2.0
     316           1 :   isFeed = ContainsTopLevelSubstring(dataString, "<rss");
     317             : 
     318             :   // Atom 1.0
     319           1 :   if (!isFeed)
     320           1 :     isFeed = ContainsTopLevelSubstring(dataString, "<feed");
     321             : 
     322             :   // RSS 1.0
     323           1 :   if (!isFeed) {
     324           1 :     bool foundNS_RDF = FindInReadable(NS_LITERAL_CSTRING(NS_RDF), dataString);
     325           1 :     bool foundNS_RSS = FindInReadable(NS_LITERAL_CSTRING(NS_RSS), dataString);
     326           2 :     isFeed = ContainsTopLevelSubstring(dataString, "<rdf:RDF") &&
     327           1 :       foundNS_RDF && foundNS_RSS;
     328             :   }
     329             : 
     330             :   // If we sniffed a feed, coerce our internal type
     331           1 :   if (isFeed && !HasAttachmentDisposition(channel))
     332           0 :     sniffedType.AssignLiteral(TYPE_MAYBE_FEED);
     333             :   else
     334           1 :     sniffedType.Truncate();
     335           1 :   return NS_OK;
     336             : }
     337             : 
     338             : NS_IMETHODIMP
     339           0 : nsFeedSniffer::OnStartRequest(nsIRequest* request, nsISupports* context)
     340             : {
     341           0 :   return NS_OK;
     342             : }
     343             : 
     344             : nsresult
     345           0 : nsFeedSniffer::AppendSegmentToString(nsIInputStream* inputStream,
     346             :                                      void* closure,
     347             :                                      const char* rawSegment,
     348             :                                      uint32_t toOffset,
     349             :                                      uint32_t count,
     350             :                                      uint32_t* writeCount)
     351             : {
     352           0 :   nsCString* decodedData = static_cast<nsCString*>(closure);
     353           0 :   decodedData->Append(rawSegment, count);
     354           0 :   *writeCount = count;
     355           0 :   return NS_OK;
     356             : }
     357             : 
     358             : NS_IMETHODIMP
     359           0 : nsFeedSniffer::OnDataAvailable(nsIRequest* request, nsISupports* context,
     360             :                                nsIInputStream* stream, uint64_t offset,
     361             :                                uint32_t count)
     362             : {
     363             :   uint32_t read;
     364           0 :   return stream->ReadSegments(AppendSegmentToString, &mDecodedData, count,
     365           0 :                               &read);
     366             : }
     367             : 
     368             : NS_IMETHODIMP
     369           0 : nsFeedSniffer::OnStopRequest(nsIRequest* request, nsISupports* context,
     370             :                              nsresult status)
     371             : {
     372           0 :   return NS_OK;
     373             : }

Generated by: LCOV version 1.13