LCOV - code coverage report
Current view: top level - parser/html - nsHtml5Tokenizer.h (source / functions) Hit Total Coverage
Test: output.info Lines: 28 61 45.9 %
Date: 2017-07-14 16:53:18 Functions: 8 16 50.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * Copyright (c) 2005-2007 Henri Sivonen
       3             :  * Copyright (c) 2007-2015 Mozilla Foundation
       4             :  * Portions of comments Copyright 2004-2010 Apple Computer, Inc., Mozilla
       5             :  * Foundation, and Opera Software ASA.
       6             :  *
       7             :  * Permission is hereby granted, free of charge, to any person obtaining a
       8             :  * copy of this software and associated documentation files (the "Software"),
       9             :  * to deal in the Software without restriction, including without limitation
      10             :  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      11             :  * and/or sell copies of the Software, and to permit persons to whom the
      12             :  * Software is furnished to do so, subject to the following conditions:
      13             :  *
      14             :  * The above copyright notice and this permission notice shall be included in
      15             :  * all copies or substantial portions of the Software.
      16             :  *
      17             :  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      18             :  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      19             :  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
      20             :  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      21             :  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
      22             :  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
      23             :  * DEALINGS IN THE SOFTWARE.
      24             :  */
      25             : 
      26             : /*
      27             :  * THIS IS A GENERATED FILE. PLEASE DO NOT EDIT.
      28             :  * Please edit Tokenizer.java instead and regenerate.
      29             :  */
      30             : 
      31             : #ifndef nsHtml5Tokenizer_h
      32             : #define nsHtml5Tokenizer_h
      33             : 
      34             : #include "nsIAtom.h"
      35             : #include "nsHtml5AtomTable.h"
      36             : #include "nsHtml5String.h"
      37             : #include "nsIContent.h"
      38             : #include "nsTraceRefcnt.h"
      39             : #include "jArray.h"
      40             : #include "nsHtml5DocumentMode.h"
      41             : #include "nsHtml5ArrayCopy.h"
      42             : #include "nsHtml5NamedCharacters.h"
      43             : #include "nsHtml5NamedCharactersAccel.h"
      44             : #include "nsGkAtoms.h"
      45             : #include "nsAHtml5TreeBuilderState.h"
      46             : #include "nsHtml5Macros.h"
      47             : #include "nsHtml5Highlighter.h"
      48             : #include "nsHtml5TokenizerLoopPolicies.h"
      49             : 
      50             : class nsHtml5StreamParser;
      51             : 
      52             : class nsHtml5AttributeName;
      53             : class nsHtml5ElementName;
      54             : class nsHtml5TreeBuilder;
      55             : class nsHtml5MetaScanner;
      56             : class nsHtml5UTF16Buffer;
      57             : class nsHtml5StateSnapshot;
      58             : class nsHtml5Portability;
      59             : 
      60             : 
      61             : class nsHtml5Tokenizer
      62             : {
      63             :   private:
      64             :     static const int32_t DATA_AND_RCDATA_MASK = ~1;
      65             : 
      66             :   public:
      67             :     static const int32_t DATA = 0;
      68             : 
      69             :     static const int32_t RCDATA = 1;
      70             : 
      71             :     static const int32_t SCRIPT_DATA = 2;
      72             : 
      73             :     static const int32_t RAWTEXT = 3;
      74             : 
      75             :     static const int32_t SCRIPT_DATA_ESCAPED = 4;
      76             : 
      77             :     static const int32_t ATTRIBUTE_VALUE_DOUBLE_QUOTED = 5;
      78             : 
      79             :     static const int32_t ATTRIBUTE_VALUE_SINGLE_QUOTED = 6;
      80             : 
      81             :     static const int32_t ATTRIBUTE_VALUE_UNQUOTED = 7;
      82             : 
      83             :     static const int32_t PLAINTEXT = 8;
      84             : 
      85             :     static const int32_t TAG_OPEN = 9;
      86             : 
      87             :     static const int32_t CLOSE_TAG_OPEN = 10;
      88             : 
      89             :     static const int32_t TAG_NAME = 11;
      90             : 
      91             :     static const int32_t BEFORE_ATTRIBUTE_NAME = 12;
      92             : 
      93             :     static const int32_t ATTRIBUTE_NAME = 13;
      94             : 
      95             :     static const int32_t AFTER_ATTRIBUTE_NAME = 14;
      96             : 
      97             :     static const int32_t BEFORE_ATTRIBUTE_VALUE = 15;
      98             : 
      99             :     static const int32_t AFTER_ATTRIBUTE_VALUE_QUOTED = 16;
     100             : 
     101             :     static const int32_t BOGUS_COMMENT = 17;
     102             : 
     103             :     static const int32_t MARKUP_DECLARATION_OPEN = 18;
     104             : 
     105             :     static const int32_t DOCTYPE = 19;
     106             : 
     107             :     static const int32_t BEFORE_DOCTYPE_NAME = 20;
     108             : 
     109             :     static const int32_t DOCTYPE_NAME = 21;
     110             : 
     111             :     static const int32_t AFTER_DOCTYPE_NAME = 22;
     112             : 
     113             :     static const int32_t BEFORE_DOCTYPE_PUBLIC_IDENTIFIER = 23;
     114             : 
     115             :     static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED = 24;
     116             : 
     117             :     static const int32_t DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED = 25;
     118             : 
     119             :     static const int32_t AFTER_DOCTYPE_PUBLIC_IDENTIFIER = 26;
     120             : 
     121             :     static const int32_t BEFORE_DOCTYPE_SYSTEM_IDENTIFIER = 27;
     122             : 
     123             :     static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED = 28;
     124             : 
     125             :     static const int32_t DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED = 29;
     126             : 
     127             :     static const int32_t AFTER_DOCTYPE_SYSTEM_IDENTIFIER = 30;
     128             : 
     129             :     static const int32_t BOGUS_DOCTYPE = 31;
     130             : 
     131             :     static const int32_t COMMENT_START = 32;
     132             : 
     133             :     static const int32_t COMMENT_START_DASH = 33;
     134             : 
     135             :     static const int32_t COMMENT = 34;
     136             : 
     137             :     static const int32_t COMMENT_END_DASH = 35;
     138             : 
     139             :     static const int32_t COMMENT_END = 36;
     140             : 
     141             :     static const int32_t COMMENT_END_BANG = 37;
     142             : 
     143             :     static const int32_t NON_DATA_END_TAG_NAME = 38;
     144             : 
     145             :     static const int32_t MARKUP_DECLARATION_HYPHEN = 39;
     146             : 
     147             :     static const int32_t MARKUP_DECLARATION_OCTYPE = 40;
     148             : 
     149             :     static const int32_t DOCTYPE_UBLIC = 41;
     150             : 
     151             :     static const int32_t DOCTYPE_YSTEM = 42;
     152             : 
     153             :     static const int32_t AFTER_DOCTYPE_PUBLIC_KEYWORD = 43;
     154             : 
     155             :     static const int32_t BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS = 44;
     156             : 
     157             :     static const int32_t AFTER_DOCTYPE_SYSTEM_KEYWORD = 45;
     158             : 
     159             :     static const int32_t CONSUME_CHARACTER_REFERENCE = 46;
     160             : 
     161             :     static const int32_t CONSUME_NCR = 47;
     162             : 
     163             :     static const int32_t CHARACTER_REFERENCE_TAIL = 48;
     164             : 
     165             :     static const int32_t HEX_NCR_LOOP = 49;
     166             : 
     167             :     static const int32_t DECIMAL_NRC_LOOP = 50;
     168             : 
     169             :     static const int32_t HANDLE_NCR_VALUE = 51;
     170             : 
     171             :     static const int32_t HANDLE_NCR_VALUE_RECONSUME = 52;
     172             : 
     173             :     static const int32_t CHARACTER_REFERENCE_HILO_LOOKUP = 53;
     174             : 
     175             :     static const int32_t SELF_CLOSING_START_TAG = 54;
     176             : 
     177             :     static const int32_t CDATA_START = 55;
     178             : 
     179             :     static const int32_t CDATA_SECTION = 56;
     180             : 
     181             :     static const int32_t CDATA_RSQB = 57;
     182             : 
     183             :     static const int32_t CDATA_RSQB_RSQB = 58;
     184             : 
     185             :     static const int32_t SCRIPT_DATA_LESS_THAN_SIGN = 59;
     186             : 
     187             :     static const int32_t SCRIPT_DATA_ESCAPE_START = 60;
     188             : 
     189             :     static const int32_t SCRIPT_DATA_ESCAPE_START_DASH = 61;
     190             : 
     191             :     static const int32_t SCRIPT_DATA_ESCAPED_DASH = 62;
     192             : 
     193             :     static const int32_t SCRIPT_DATA_ESCAPED_DASH_DASH = 63;
     194             : 
     195             :     static const int32_t BOGUS_COMMENT_HYPHEN = 64;
     196             : 
     197             :     static const int32_t RAWTEXT_RCDATA_LESS_THAN_SIGN = 65;
     198             : 
     199             :     static const int32_t SCRIPT_DATA_ESCAPED_LESS_THAN_SIGN = 66;
     200             : 
     201             :     static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_START = 67;
     202             : 
     203             :     static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED = 68;
     204             : 
     205             :     static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_LESS_THAN_SIGN = 69;
     206             : 
     207             :     static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH = 70;
     208             : 
     209             :     static const int32_t SCRIPT_DATA_DOUBLE_ESCAPED_DASH_DASH = 71;
     210             : 
     211             :     static const int32_t SCRIPT_DATA_DOUBLE_ESCAPE_END = 72;
     212             : 
     213             :     static const int32_t PROCESSING_INSTRUCTION = 73;
     214             : 
     215             :     static const int32_t PROCESSING_INSTRUCTION_QUESTION_MARK = 74;
     216             : 
     217             :   private:
     218             :     static const int32_t LEAD_OFFSET = (0xD800 - (0x10000 >> 10));
     219             : 
     220             :     static char16_t LT_GT[];
     221             :     static char16_t LT_SOLIDUS[];
     222             :     static char16_t RSQB_RSQB[];
     223             :     static char16_t REPLACEMENT_CHARACTER[];
     224             :     static char16_t LF[];
     225             :     static char16_t CDATA_LSQB[];
     226             :     static char16_t OCTYPE[];
     227             :     static char16_t UBLIC[];
     228             :     static char16_t YSTEM[];
     229             :     static staticJArray<char16_t,int32_t> TITLE_ARR;
     230             :     static staticJArray<char16_t,int32_t> SCRIPT_ARR;
     231             :     static staticJArray<char16_t,int32_t> STYLE_ARR;
     232             :     static staticJArray<char16_t,int32_t> PLAINTEXT_ARR;
     233             :     static staticJArray<char16_t,int32_t> XMP_ARR;
     234             :     static staticJArray<char16_t,int32_t> TEXTAREA_ARR;
     235             :     static staticJArray<char16_t,int32_t> IFRAME_ARR;
     236             :     static staticJArray<char16_t,int32_t> NOEMBED_ARR;
     237             :     static staticJArray<char16_t,int32_t> NOSCRIPT_ARR;
     238             :     static staticJArray<char16_t,int32_t> NOFRAMES_ARR;
     239             :   protected:
     240             :     nsHtml5TreeBuilder* tokenHandler;
     241             :     nsHtml5StreamParser* encodingDeclarationHandler;
     242             :     bool lastCR;
     243             :     int32_t stateSave;
     244             :   private:
     245             :     int32_t returnStateSave;
     246             :   protected:
     247             :     int32_t index;
     248             :   private:
     249             :     bool forceQuirks;
     250             :     char16_t additional;
     251             :     int32_t entCol;
     252             :     int32_t firstCharKey;
     253             :     int32_t lo;
     254             :     int32_t hi;
     255             :     int32_t candidate;
     256             :     int32_t charRefBufMark;
     257             :   protected:
     258             :     int32_t value;
     259             :   private:
     260             :     bool seenDigits;
     261             :   protected:
     262             :     int32_t cstart;
     263             :   private:
     264             :     nsHtml5String publicId;
     265             :     nsHtml5String systemId;
     266             :     autoJArray<char16_t,int32_t> strBuf;
     267             :     int32_t strBufLen;
     268             :     autoJArray<char16_t,int32_t> charRefBuf;
     269             :     int32_t charRefBufLen;
     270             :     autoJArray<char16_t,int32_t> bmpChar;
     271             :     autoJArray<char16_t,int32_t> astralChar;
     272             :   protected:
     273             :     nsHtml5ElementName* endTagExpectation;
     274             :   private:
     275             :     jArray<char16_t,int32_t> endTagExpectationAsArray;
     276             :   protected:
     277             :     bool endTag;
     278             :   private:
     279             :     bool containsHyphen;
     280             :     nsHtml5ElementName* tagName;
     281             :     nsHtml5ElementName* nonInternedTagName;
     282             :   protected:
     283             :     nsHtml5AttributeName* attributeName;
     284             :   private:
     285             :     nsHtml5AttributeName* nonInternedAttributeName;
     286             :     nsIAtom* doctypeName;
     287             :     nsHtml5String publicIdentifier;
     288             :     nsHtml5String systemIdentifier;
     289             :     nsHtml5HtmlAttributes* attributes;
     290             :     bool newAttributesEachTime;
     291             :     bool shouldSuspend;
     292             :   protected:
     293             :     bool confident;
     294             :   private:
     295             :     int32_t line;
     296             :     int32_t attributeLine;
     297             :     nsHtml5AtomTable* interner;
     298             :     bool viewingXmlSource;
     299             :   public:
     300             :     nsHtml5Tokenizer(nsHtml5TreeBuilder* tokenHandler, bool viewingXmlSource);
     301             :     void setInterner(nsHtml5AtomTable* interner);
     302             :     void initLocation(nsHtml5String newPublicId, nsHtml5String newSystemId);
     303             :     bool isViewingXmlSource();
     304             :     void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsIAtom* endTagExpectation);
     305             :     void setStateAndEndTagExpectation(int32_t specialTokenizerState, nsHtml5ElementName* endTagExpectation);
     306             :   private:
     307             :     void endTagExpectationToArray();
     308             :   public:
     309             :     void setLineNumber(int32_t line);
     310          10 :     inline int32_t getLineNumber()
     311             :     {
     312          10 :       return line;
     313             :     }
     314             : 
     315             :     nsHtml5HtmlAttributes* emptyAttributes();
     316             :   private:
     317           0 :     inline void appendCharRefBuf(char16_t c)
     318             :     {
     319           0 :       MOZ_RELEASE_ASSERT(charRefBufLen < charRefBuf.length, "Attempted to overrun charRefBuf!");
     320           0 :       charRefBuf[charRefBufLen++] = c;
     321           0 :     }
     322             : 
     323             :     void emitOrAppendCharRefBuf(int32_t returnState);
     324          82 :     inline void clearStrBufAfterUse()
     325             :     {
     326          82 :       strBufLen = 0;
     327          82 :     }
     328             : 
     329          64 :     inline void clearStrBufBeforeUse()
     330             :     {
     331          64 :       MOZ_ASSERT(!strBufLen, "strBufLen not reset after previous use!");
     332          64 :       strBufLen = 0;
     333          64 :     }
     334             : 
     335           5 :     inline void clearStrBufAfterOneHyphen()
     336             :     {
     337           5 :       MOZ_ASSERT(strBufLen == 1, "strBufLen length not one!");
     338           5 :       MOZ_ASSERT(strBuf[0] == '-', "strBuf does not start with a hyphen!");
     339           5 :       strBufLen = 0;
     340           5 :     }
     341             : 
     342        1144 :     inline void appendStrBuf(char16_t c)
     343             :     {
     344        1144 :       MOZ_ASSERT(strBufLen < strBuf.length, "Previous buffer length insufficient.");
     345        1144 :       if (MOZ_UNLIKELY(strBufLen == strBuf.length)) {
     346           0 :         if (MOZ_UNLIKELY(!EnsureBufferSpace(1))) {
     347           0 :           MOZ_CRASH("Unable to recover from buffer reallocation failure");
     348             :         }
     349             :       }
     350        1144 :       strBuf[strBufLen++] = c;
     351        1144 :     }
     352             : 
     353             :   protected:
     354             :     nsHtml5String strBufToString();
     355             :   private:
     356             :     void strBufToDoctypeName();
     357             :     void emitStrBuf();
     358           0 :     inline void appendSecondHyphenToBogusComment()
     359             :     {
     360           0 :       appendStrBuf('-');
     361           0 :     }
     362             : 
     363           0 :     inline void adjustDoubleHyphenAndAppendToStrBufAndErr(char16_t c)
     364             :     {
     365           0 :       errConsecutiveHyphens();
     366           0 :       appendStrBuf(c);
     367           0 :     }
     368             : 
     369             :     void appendStrBuf(char16_t* buffer, int32_t offset, int32_t length);
     370           0 :     inline void appendCharRefBufToStrBuf()
     371             :     {
     372           0 :       appendStrBuf(charRefBuf, 0, charRefBufLen);
     373           0 :       charRefBufLen = 0;
     374           0 :     }
     375             : 
     376             :     void emitComment(int32_t provisionalHyphens, int32_t pos);
     377             :   protected:
     378             :     void flushChars(char16_t* buf, int32_t pos);
     379             :   private:
     380             :     void strBufToElementNameString();
     381             :     int32_t emitCurrentTagToken(bool selfClosing, int32_t pos);
     382             :     void attributeNameComplete();
     383             :     void addAttributeWithoutValue();
     384             :     void addAttributeWithValue();
     385             :   public:
     386             :     void start();
     387             :     bool tokenizeBuffer(nsHtml5UTF16Buffer* buffer);
     388             :   private:
     389             :     template<class P> int32_t stateLoop(int32_t state, char16_t c, int32_t pos, char16_t* buf, bool reconsume, int32_t returnState, int32_t endPos);
     390             :     void initDoctypeFields();
     391           0 :     inline void adjustDoubleHyphenAndAppendToStrBufCarriageReturn()
     392             :     {
     393           0 :       silentCarriageReturn();
     394           0 :       adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
     395           0 :     }
     396             : 
     397           0 :     inline void adjustDoubleHyphenAndAppendToStrBufLineFeed()
     398             :     {
     399           0 :       silentLineFeed();
     400           0 :       adjustDoubleHyphenAndAppendToStrBufAndErr('\n');
     401           0 :     }
     402             : 
     403           4 :     inline void appendStrBufLineFeed()
     404             :     {
     405           4 :       silentLineFeed();
     406           4 :       appendStrBuf('\n');
     407           4 :     }
     408             : 
     409           0 :     inline void appendStrBufCarriageReturn()
     410             :     {
     411           0 :       silentCarriageReturn();
     412           0 :       appendStrBuf('\n');
     413           0 :     }
     414             : 
     415             :   protected:
     416           0 :     inline void silentCarriageReturn()
     417             :     {
     418           0 :       ++line;
     419           0 :       lastCR = true;
     420           0 :     }
     421             : 
     422          81 :     inline void silentLineFeed()
     423             :     {
     424          81 :       ++line;
     425          81 :     }
     426             : 
     427             :   private:
     428             :     void emitCarriageReturn(char16_t* buf, int32_t pos);
     429             :     void emitReplacementCharacter(char16_t* buf, int32_t pos);
     430             :     void emitPlaintextReplacementCharacter(char16_t* buf, int32_t pos);
     431             :     void setAdditionalAndRememberAmpersandLocation(char16_t add);
     432             :     void bogusDoctype();
     433             :     void bogusDoctypeWithoutQuirks();
     434             :     void handleNcrValue(int32_t returnState);
     435             :   public:
     436             :     void eof();
     437             :   private:
     438             :     void emitDoctypeToken(int32_t pos);
     439             :   protected:
     440        3106 :     inline char16_t checkChar(char16_t* buf, int32_t pos)
     441             :     {
     442        3106 :       return buf[pos];
     443             :     }
     444             : 
     445             :   public:
     446             :     bool internalEncodingDeclaration(nsHtml5String internalCharset);
     447             :   private:
     448             :     void emitOrAppendTwo(const char16_t* val, int32_t returnState);
     449             :     void emitOrAppendOne(const char16_t* val, int32_t returnState);
     450             :   public:
     451             :     void end();
     452             :     void requestSuspension();
     453             :     bool isInDataState();
     454             :     void resetToDataState();
     455             :     void loadState(nsHtml5Tokenizer* other);
     456             :     void initializeWithoutStarting();
     457             :     void setEncodingDeclarationHandler(nsHtml5StreamParser* encodingDeclarationHandler);
     458             :     ~nsHtml5Tokenizer();
     459             :     static void initializeStatics();
     460             :     static void releaseStatics();
     461             : 
     462             : #include "nsHtml5TokenizerHSupplement.h"
     463             : };
     464             : 
     465             : #endif
     466             : 

Generated by: LCOV version 1.13