LCOV - output.info - parser/htmlparser/nsScanner.cpp

LCOV - code coverage report

Current view:	top level - parser/htmlparser - nsScanner.cpp (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	86	149	57.7 %
Date:	2017-07-14 16:53:18	Functions:	11	19	57.9 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* vim: set ts=2 sw=2 et tw=78: */
       3             : /* This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : //#define __INCREMENTAL 1
       8             : 
       9             : #include "nsScanner.h"
      10             : 
      11             : #include "mozilla/Attributes.h"
      12             : #include "mozilla/DebugOnly.h"
      13             : #include "mozilla/Encoding.h"
      14             : #include "nsDebug.h"
      15             : #include "nsReadableUtils.h"
      16             : #include "nsIInputStream.h"
      17             : #include "nsIFile.h"
      18             : #include "nsUTF8Utils.h" // for LossyConvertEncoding
      19             : #include "nsCRT.h"
      20             : #include "nsParser.h"
      21             : #include "nsCharsetSource.h"
      22             : 
      23           0 : nsReadEndCondition::nsReadEndCondition(const char16_t* aTerminateChars) :
      24           0 :   mChars(aTerminateChars), mFilter(char16_t(~0)) // All bits set
      25             : {
      26             :   // Build filter that will be used to filter out characters with
      27             :   // bits that none of the terminal chars have. This works very well
      28             :   // because terminal chars often have only the last 4-6 bits set and
      29             :   // normal ascii letters have bit 7 set. Other letters have even higher
      30             :   // bits set.
      31             :   
      32             :   // Calculate filter
      33           0 :   const char16_t *current = aTerminateChars;
      34           0 :   char16_t terminalChar = *current;
      35           0 :   while (terminalChar) {
      36           0 :     mFilter &= ~terminalChar;
      37           0 :     ++current;
      38           0 :     terminalChar = *current;
      39             :   }
      40           0 : }
      41             : 
      42             : /**
      43             :  *  Use this constructor if you want i/o to be based on 
      44             :  *  a single string you hand in during construction.
      45             :  *  This short cut was added for Javascript.
      46             :  *
      47             :  *  @update  gess 5/12/98
      48             :  *  @param   aMode represents the parser mode (nav, other)
      49             :  *  @return  
      50             :  */
      51           0 : nsScanner::nsScanner(const nsAString& anHTMLString)
      52             : {
      53           0 :   MOZ_COUNT_CTOR(nsScanner);
      54             : 
      55           0 :   mSlidingBuffer = nullptr;
      56           0 :   if (AppendToBuffer(anHTMLString)) {
      57           0 :     mSlidingBuffer->BeginReading(mCurrentPosition);
      58             :   } else {
      59             :     /* XXX see hack below, re: bug 182067 */
      60           0 :     memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
      61           0 :     mEndPosition = mCurrentPosition;
      62             :   }
      63           0 :   mMarkPosition = mCurrentPosition;
      64           0 :   mIncremental = false;
      65           0 :   mUnicodeDecoder = nullptr;
      66           0 :   mCharsetSource = kCharsetUninitialized;
      67           0 : }
      68             : 
      69             : /**
      70             :  *  Use this constructor if you want i/o to be based on strings 
      71             :  *  the scanner receives. If you pass a null filename, you
      72             :  *  can still provide data to the scanner via append.
      73             :  */
      74          23 : nsScanner::nsScanner(nsString& aFilename, bool aCreateStream)
      75          23 :   : mFilename(aFilename)
      76             : {
      77          23 :   MOZ_COUNT_CTOR(nsScanner);
      78          23 :   NS_ASSERTION(!aCreateStream, "This is always true.");
      79             : 
      80          23 :   mSlidingBuffer = nullptr;
      81             : 
      82             :   // XXX This is a big hack.  We need to initialize the iterators to something.
      83             :   // What matters is that mCurrentPosition == mEndPosition, so that our methods
      84             :   // believe that we are at EOF (see bug 182067).  We null out mCurrentPosition
      85             :   // so that we have some hope of catching null pointer dereferences associated
      86             :   // with this hack. --darin
      87          23 :   memset(&mCurrentPosition, 0, sizeof(mCurrentPosition));
      88          23 :   mMarkPosition = mCurrentPosition;
      89          23 :   mEndPosition = mCurrentPosition;
      90             : 
      91          23 :   mIncremental = true;
      92             : 
      93          23 :   mUnicodeDecoder = nullptr;
      94          23 :   mCharsetSource = kCharsetUninitialized;
      95             :   // XML defaults to UTF-8 and about:blank is UTF-8, too.
      96          23 :   SetDocumentCharset(UTF_8_ENCODING, kCharsetFromDocTypeDefault);
      97          23 : }
      98             : 
      99          45 : nsresult nsScanner::SetDocumentCharset(NotNull<const Encoding*> aEncoding,
     100             :                                        int32_t aSource)
     101             : {
     102          45 :   if (aSource < mCharsetSource) // priority is lower than the current one
     103           0 :     return NS_OK;
     104             : 
     105          45 :   mCharsetSource = aSource;
     106          90 :   nsCString charsetName;
     107          45 :   aEncoding->Name(charsetName);
     108          45 :   if (!mCharset.IsEmpty() && charsetName.Equals(mCharset)) {
     109          22 :     return NS_OK; // no difference, don't change it
     110             :   }
     111             : 
     112             :   // different, need to change it
     113             : 
     114          23 :   mCharset.Assign(charsetName);
     115             : 
     116          23 :   mUnicodeDecoder = aEncoding->NewDecoderWithBOMRemoval();
     117             : 
     118          23 :   return NS_OK;
     119             : }
     120             : 
     121             : 
     122             : /**
     123             :  *  default destructor
     124             :  *  
     125             :  *  @update  gess 3/25/98
     126             :  *  @param   
     127             :  *  @return  
     128             :  */
     129          44 : nsScanner::~nsScanner() {
     130             : 
     131          22 :   delete mSlidingBuffer;
     132             : 
     133          22 :   MOZ_COUNT_DTOR(nsScanner);
     134          22 : }
     135             : 
     136             : /**
     137             :  *  Resets current offset position of input stream to marked position. 
     138             :  *  This allows us to back up to this point if the need should arise, 
     139             :  *  such as when tokenization gets interrupted.
     140             :  *  NOTE: IT IS REALLY BAD FORM TO CALL RELEASE WITHOUT CALLING MARK FIRST!
     141             :  *
     142             :  *  @update  gess 5/12/98
     143             :  *  @param   
     144             :  *  @return  
     145             :  */
     146          45 : void nsScanner::RewindToMark(void){
     147          45 :   if (mSlidingBuffer) {
     148          44 :     mCurrentPosition = mMarkPosition;
     149             :   }
     150          45 : }
     151             : 
     152             : 
     153             : /**
     154             :  *  Records current offset position in input stream. This allows us
     155             :  *  to back up to this point if the need should arise, such as when
     156             :  *  tokenization gets interrupted.
     157             :  *
     158             :  *  @update  gess 7/29/98
     159             :  *  @param   
     160             :  *  @return  
     161             :  */
     162          89 : int32_t nsScanner::Mark() {
     163          89 :   int32_t distance = 0;
     164          89 :   if (mSlidingBuffer) {
     165          88 :     nsScannerIterator oldStart;
     166          88 :     mSlidingBuffer->BeginReading(oldStart);
     167             : 
     168          88 :     distance = Distance(oldStart, mCurrentPosition);
     169             : 
     170          88 :     mSlidingBuffer->DiscardPrefix(mCurrentPosition);
     171          88 :     mSlidingBuffer->BeginReading(mCurrentPosition);
     172          88 :     mMarkPosition = mCurrentPosition;
     173             :   }
     174             : 
     175          89 :   return distance;
     176             : }
     177             : 
     178             : /** 
     179             :  * Insert data to our underlying input buffer as
     180             :  * if it were read from an input stream.
     181             :  *
     182             :  * @update  harishd 01/12/99
     183             :  * @return  error code 
     184             :  */
     185           0 : bool nsScanner::UngetReadable(const nsAString& aBuffer) {
     186           0 :   if (!mSlidingBuffer) {
     187           0 :     return false;
     188             :   }
     189             : 
     190           0 :   mSlidingBuffer->UngetReadable(aBuffer,mCurrentPosition);
     191           0 :   mSlidingBuffer->BeginReading(mCurrentPosition); // Insertion invalidated our iterators
     192           0 :   mSlidingBuffer->EndReading(mEndPosition);
     193             :  
     194           0 :   return true;
     195             : }
     196             : 
     197             : /** 
     198             :  * Append data to our underlying input buffer as
     199             :  * if it were read from an input stream.
     200             :  *
     201             :  * @update  gess4/3/98
     202             :  * @return  error code 
     203             :  */
     204           0 : nsresult nsScanner::Append(const nsAString& aBuffer) {
     205           0 :   if (!AppendToBuffer(aBuffer))
     206           0 :     return NS_ERROR_OUT_OF_MEMORY;
     207           0 :   return NS_OK;
     208             : }
     209             : 
     210             : /**
     211             :  *  
     212             :  *  
     213             :  *  @update  gess 5/21/98
     214             :  *  @param   
     215             :  *  @return  
     216             :  */
     217          22 : nsresult nsScanner::Append(const char* aBuffer, uint32_t aLen)
     218             : {
     219          22 :   nsresult res = NS_OK;
     220          22 :   if (mUnicodeDecoder) {
     221          22 :     CheckedInt<size_t> needed = mUnicodeDecoder->MaxUTF16BufferLength(aLen);
     222          22 :     if (!needed.isValid()) {
     223           0 :       return NS_ERROR_OUT_OF_MEMORY;
     224             :     }
     225          22 :     CheckedInt<uint32_t> allocLen(1); // null terminator due to legacy sadness
     226          22 :     allocLen += needed.value();
     227          22 :     if (!allocLen.isValid()) {
     228           0 :       return NS_ERROR_OUT_OF_MEMORY;
     229             :     }
     230             :     nsScannerString::Buffer* buffer =
     231          22 :       nsScannerString::AllocBuffer(allocLen.value());
     232          22 :     NS_ENSURE_TRUE(buffer,NS_ERROR_OUT_OF_MEMORY);
     233          22 :     char16_t *unichars = buffer->DataStart();
     234             : 
     235             :     uint32_t result;
     236             :     size_t read;
     237             :     size_t written;
     238          44 :     Tie(result, read, written) =
     239          44 :       mUnicodeDecoder->DecodeToUTF16WithoutReplacement(
     240             :         AsBytes(MakeSpan(aBuffer, aLen)),
     241             :         MakeSpan(unichars, needed.value()),
     242          22 :         false); // Retain bug about failure to handle EOF
     243          22 :     MOZ_ASSERT(result != kOutputFull);
     244          22 :     MOZ_ASSERT(read <= aLen);
     245          22 :     MOZ_ASSERT(written <= needed.value());
     246          22 :     if (result != kInputEmpty) {
     247             :       // Since about:blank is empty, this line runs only for XML. Use a
     248             :       // character that's illegal in XML instead of U+FFFD in order to make
     249             :       // expat flag the error. There is no need to loop and convert more, since
     250             :       // expat will stop here anyway.
     251           0 :       unichars[written++] = 0xFFFF;
     252             :     }
     253          22 :     buffer->SetDataLength(written);
     254             :     // Don't propagate return code of unicode decoder
     255             :     // since it doesn't reflect on our success or failure
     256             :     // - Ref. bug 87110
     257          22 :     res = NS_OK; 
     258          22 :     if (!AppendToBuffer(buffer))
     259           0 :       res = NS_ERROR_OUT_OF_MEMORY;
     260             :   }
     261             :   else {
     262           0 :     NS_WARNING("No decoder found.");
     263           0 :     res = NS_ERROR_FAILURE;
     264             :   }
     265             : 
     266          22 :   return res;
     267             : }
     268             : 
     269             : /**
     270             :  *  retrieve next char from scanners internal input stream
     271             :  *  
     272             :  *  @update  gess 3/25/98
     273             :  *  @param   
     274             :  *  @return  error code reflecting read status
     275             :  */
     276           0 : nsresult nsScanner::GetChar(char16_t& aChar) {
     277           0 :   if (!mSlidingBuffer || mCurrentPosition == mEndPosition) {
     278           0 :     aChar = 0;
     279           0 :     return NS_ERROR_HTMLPARSER_EOF;
     280             :   }
     281             : 
     282           0 :   aChar = *mCurrentPosition++;
     283             : 
     284           0 :   return NS_OK;
     285             : }
     286             : 
     287           0 : void nsScanner::BindSubstring(nsScannerSubstring& aSubstring, const nsScannerIterator& aStart, const nsScannerIterator& aEnd)
     288             : {
     289           0 :   aSubstring.Rebind(*mSlidingBuffer, aStart, aEnd);
     290           0 : }
     291             : 
     292          44 : void nsScanner::CurrentPosition(nsScannerIterator& aPosition)
     293             : {
     294          44 :   aPosition = mCurrentPosition;
     295          44 : }
     296             : 
     297          88 : void nsScanner::EndReading(nsScannerIterator& aPosition)
     298             : {
     299          88 :   aPosition = mEndPosition;
     300          88 : }
     301             :  
     302          44 : void nsScanner::SetPosition(nsScannerIterator& aPosition, bool aTerminate)
     303             : {
     304          44 :   if (mSlidingBuffer) {
     305          44 :     mCurrentPosition = aPosition;
     306          44 :     if (aTerminate && (mCurrentPosition == mEndPosition)) {
     307          44 :       mMarkPosition = mCurrentPosition;
     308          44 :       mSlidingBuffer->DiscardPrefix(mCurrentPosition);
     309             :     }
     310             :   }
     311          44 : }
     312             : 
     313          22 : bool nsScanner::AppendToBuffer(nsScannerString::Buffer* aBuf)
     314             : {
     315          22 :   if (!mSlidingBuffer) {
     316          22 :     mSlidingBuffer = new nsScannerString(aBuf);
     317          22 :     if (!mSlidingBuffer)
     318           0 :       return false;
     319          22 :     mSlidingBuffer->BeginReading(mCurrentPosition);
     320          22 :     mMarkPosition = mCurrentPosition;
     321          22 :     mSlidingBuffer->EndReading(mEndPosition);
     322             :   }
     323             :   else {
     324           0 :     mSlidingBuffer->AppendBuffer(aBuf);
     325           0 :     if (mCurrentPosition == mEndPosition) {
     326           0 :       mSlidingBuffer->BeginReading(mCurrentPosition);
     327             :     }
     328           0 :     mSlidingBuffer->EndReading(mEndPosition);
     329             :   }
     330             : 
     331          22 :   return true;
     332             : }
     333             : 
     334             : /**
     335             :  *  call this to copy bytes out of the scanner that have not yet been consumed
     336             :  *  by the tokenization process.
     337             :  *  
     338             :  *  @update  gess 5/12/98
     339             :  *  @param   aCopyBuffer is where the scanner buffer will be copied to
     340             :  *  @return  true if OK or false on OOM
     341             :  */
     342           0 : bool nsScanner::CopyUnusedData(nsString& aCopyBuffer) {
     343           0 :   if (!mSlidingBuffer) {
     344           0 :     aCopyBuffer.Truncate();
     345           0 :     return true;
     346             :   }
     347             : 
     348           0 :   nsScannerIterator start, end;
     349           0 :   start = mCurrentPosition;
     350           0 :   end = mEndPosition;
     351             : 
     352           0 :   return CopyUnicodeTo(start, end, aCopyBuffer);
     353             : }
     354             : 
     355             : /**
     356             :  *  Retrieve the name of the file that the scanner is reading from.
     357             :  *  In some cases, it's just a given name, because the scanner isn't
     358             :  *  really reading from a file.
     359             :  *  
     360             :  *  @update  gess 5/12/98
     361             :  *  @return  
     362             :  */
     363          67 : nsString& nsScanner::GetFilename(void) {
     364          67 :   return mFilename;
     365             : }
     366             : 
     367             : /**
     368             :  *  Conduct self test. Actually, selftesting for this class
     369             :  *  occurs in the parser selftest.
     370             :  *  
     371             :  *  @update  gess 3/25/98
     372             :  *  @param   
     373             :  *  @return  
     374             :  */
     375             : 
     376           0 : void nsScanner::SelfTest(void) {
     377             : #ifdef _DEBUG
     378             : #endif
     379           0 : }

Generated by: LCOV version 1.13