LCOV - code coverage report
Current view: top level - layout/style - nsCSSScanner.h (source / functions) Hit Total Coverage
Test: output.info Lines: 25 32 78.1 %
Date: 2017-07-14 16:53:18 Functions: 13 19 68.4 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* This Source Code Form is subject to the terms of the Mozilla Public
       3             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       4             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       5             : 
       6             : /* tokenization of CSS style sheets */
       7             : 
       8             : #ifndef nsCSSScanner_h___
       9             : #define nsCSSScanner_h___
      10             : 
      11             : #include "nsString.h"
      12             : 
      13             : namespace mozilla {
      14             : namespace css {
      15             : class ErrorReporter;
      16             : } // namespace css
      17             : } // namespace mozilla
      18             : 
      19             : // Token types; in close but not perfect correspondence to the token
      20             : // categorization in section 4.1.1 of CSS2.1.  (The deviations are all
      21             : // the fault of css3-selectors, which has requirements that can only be
      22             : // met by changing the generic tokenization.)  The comment on each line
      23             : // illustrates the form of each identifier.
      24             : 
      25             : enum nsCSSTokenType {
      26             :   // White space of any kind.  No value fields are used.  Note that
      27             :   // comments do *not* count as white space; comments separate tokens
      28             :   // but are not themselves tokens.
      29             :   eCSSToken_Whitespace,     //
      30             :   // A comment.
      31             :   eCSSToken_Comment,        // /*...*/
      32             : 
      33             :   // Identifier-like tokens.  mIdent is the text of the identifier.
      34             :   // The difference between ID and Hash is: if the text after the #
      35             :   // would have been a valid Ident if the # hadn't been there, the
      36             :   // scanner produces an ID token.  Otherwise it produces a Hash token.
      37             :   // (This distinction is required by css3-selectors.)
      38             :   eCSSToken_Ident,          // word
      39             :   eCSSToken_Function,       // word(
      40             :   eCSSToken_AtKeyword,      // @word
      41             :   eCSSToken_ID,             // #word
      42             :   eCSSToken_Hash,           // #0word
      43             : 
      44             :   // Numeric tokens.  mNumber is the floating-point value of the
      45             :   // number, and mHasSign indicates whether there was an explicit sign
      46             :   // (+ or -) in front of the number.  If mIntegerValid is true, the
      47             :   // number had the lexical form of an integer, and mInteger is its
      48             :   // integer value.  Lexically integer values outside the range of a
      49             :   // 32-bit signed number are clamped to the maximum values; mNumber
      50             :   // will indicate a 'truer' value in that case.  Percentage tokens
      51             :   // are always considered not to be integers, even if their numeric
      52             :   // value is integral (100% => mNumber = 1.0).  For Dimension
      53             :   // tokens, mIdent holds the text of the unit.
      54             :   eCSSToken_Number,         // 1 -5 +2e3 3.14159 7.297352e-3
      55             :   eCSSToken_Dimension,      // 24px 8.5in
      56             :   eCSSToken_Percentage,     // 85% 1280.4%
      57             : 
      58             :   // String-like tokens.  In all cases, mIdent holds the text
      59             :   // belonging to the string, and mSymbol holds the delimiter
      60             :   // character, which may be ', ", or zero (only for unquoted URLs).
      61             :   // Bad_String and Bad_URL tokens are emitted when the closing
      62             :   // delimiter or parenthesis was missing.
      63             :   eCSSToken_String,         // 'foo bar' "foo bar"
      64             :   eCSSToken_Bad_String,     // 'foo bar
      65             :   eCSSToken_URL,            // url(foobar) url("foo bar")
      66             :   eCSSToken_Bad_URL,        // url(foo
      67             : 
      68             :   // Any one-character symbol.  mSymbol holds the character.
      69             :   eCSSToken_Symbol,         // . ; { } ! *
      70             : 
      71             :   // Match operators.  These are single tokens rather than pairs of
      72             :   // Symbol tokens because css3-selectors forbids the presence of
      73             :   // comments between the two characters.  No value fields are used;
      74             :   // the token type indicates which operator.
      75             :   eCSSToken_Includes,       // ~=
      76             :   eCSSToken_Dashmatch,      // |=
      77             :   eCSSToken_Beginsmatch,    // ^=
      78             :   eCSSToken_Endsmatch,      // $=
      79             :   eCSSToken_Containsmatch,  // *=
      80             : 
      81             :   // Unicode-range token: currently used only in @font-face.
      82             :   // The lexical rule for this token includes several forms that are
      83             :   // semantically invalid.  Therefore, mIdent always holds the
      84             :   // complete original text of the token (so we can print it
      85             :   // accurately in diagnostics), and mIntegerValid is true iff the
      86             :   // token is semantically valid.  In that case, mInteger holds the
      87             :   // lowest value included in the range, and mInteger2 holds the
      88             :   // highest value included in the range.
      89             :   eCSSToken_URange,         // U+007e U+01?? U+2000-206F
      90             : 
      91             :   // HTML comment delimiters, ignored as a unit when they appear at
      92             :   // the top level of a style sheet, for compatibility with websites
      93             :   // written for compatibility with pre-CSS browsers.  This token type
      94             :   // subsumes the css2.1 CDO and CDC tokens, which are always treated
      95             :   // the same by the parser.  mIdent holds the text of the token, for
      96             :   // diagnostics.
      97             :   eCSSToken_HTMLComment,    // <!-- -->
      98             : };
      99             : 
     100             : // Classification of tokens used to determine if a "/**/" string must be
     101             : // inserted if pasting token streams together when serializing.  We include
     102             : // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
     103             : // as css-syntax does not treat these as whole tokens, but we will still
     104             : // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
     105             : // and between a '/' delim and a '*=' containsmatch.
     106             : //
     107             : // https://drafts.csswg.org/css-syntax/#serialization
     108             : enum nsCSSTokenSerializationType {
     109             :   eCSSTokenSerialization_Nothing,
     110             :   eCSSTokenSerialization_Whitespace,
     111             :   eCSSTokenSerialization_AtKeyword_or_Hash,
     112             :   eCSSTokenSerialization_Number,
     113             :   eCSSTokenSerialization_Dimension,
     114             :   eCSSTokenSerialization_Percentage,
     115             :   eCSSTokenSerialization_URange,
     116             :   eCSSTokenSerialization_URL_or_BadURL,
     117             :   eCSSTokenSerialization_Function,
     118             :   eCSSTokenSerialization_Ident,
     119             :   eCSSTokenSerialization_CDC,
     120             :   eCSSTokenSerialization_DashMatch,
     121             :   eCSSTokenSerialization_ContainsMatch,
     122             :   eCSSTokenSerialization_Symbol_Hash,         // '#'
     123             :   eCSSTokenSerialization_Symbol_At,           // '@'
     124             :   eCSSTokenSerialization_Symbol_Dot_or_Plus,  // '.', '+'
     125             :   eCSSTokenSerialization_Symbol_Minus,        // '-'
     126             :   eCSSTokenSerialization_Symbol_OpenParen,    // '('
     127             :   eCSSTokenSerialization_Symbol_Question,     // '?'
     128             :   eCSSTokenSerialization_Symbol_Assorted,     // '$', '^', '~'
     129             :   eCSSTokenSerialization_Symbol_Equals,       // '='
     130             :   eCSSTokenSerialization_Symbol_Bar,          // '|'
     131             :   eCSSTokenSerialization_Symbol_Slash,        // '/'
     132             :   eCSSTokenSerialization_Symbol_Asterisk,     // '*'
     133             :   eCSSTokenSerialization_Other                // anything else
     134             : };
     135             : 
     136             : // A single token returned from the scanner.  mType is always
     137             : // meaningful; comments above describe which other fields are
     138             : // meaningful for which token types.
     139       15174 : struct nsCSSToken {
     140             :   nsAutoString    mIdent;
     141             :   float           mNumber;
     142             :   int32_t         mInteger;
     143             :   int32_t         mInteger2;
     144             :   nsCSSTokenType  mType;
     145             :   char16_t       mSymbol;
     146             :   bool            mIntegerValid;
     147             :   bool            mHasSign;
     148             : 
     149        7462 :   nsCSSToken()
     150        7462 :     : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
     151        7462 :       mSymbol('\0'), mIntegerValid(false), mHasSign(false)
     152        7462 :   {}
     153             : 
     154      114057 :   bool IsSymbol(char16_t aSymbol) const {
     155      114057 :     return mType == eCSSToken_Symbol && mSymbol == aSymbol;
     156             :   }
     157             : 
     158             :   void AppendToString(nsString& aBuffer) const;
     159             : };
     160             : 
     161             : // Represents an nsCSSScanner's saved position in the input buffer.
     162             : class nsCSSScannerPosition {
     163             :   friend class nsCSSScanner;
     164             : public:
     165        7458 :   nsCSSScannerPosition() : mInitialized(false) { }
     166             : 
     167         606 :   uint32_t LineNumber() {
     168         606 :     MOZ_ASSERT(mInitialized);
     169         606 :     return mLineNumber;
     170             :   }
     171             : 
     172         606 :   uint32_t LineOffset() {
     173         606 :     MOZ_ASSERT(mInitialized);
     174         606 :     return mLineOffset;
     175             :   }
     176             : 
     177             : private:
     178             :   uint32_t mOffset;
     179             :   uint32_t mLineNumber;
     180             :   uint32_t mLineOffset;
     181             :   uint32_t mTokenLineNumber;
     182             :   uint32_t mTokenLineOffset;
     183             :   uint32_t mTokenOffset;
     184             :   bool mInitialized;
     185             : };
     186             : 
     187             : enum nsCSSScannerExclude {
     188             :   // Return all tokens, including whitespace and comments.
     189             :   eCSSScannerExclude_None,
     190             :   // Include whitespace but exclude comments.
     191             :   eCSSScannerExclude_Comments,
     192             :   // Exclude whitespace and comments.
     193             :   eCSSScannerExclude_WhitespaceAndComments
     194             : };
     195             : 
     196             : // nsCSSScanner tokenizes an input stream using the CSS2.1 forward
     197             : // compatible tokenization rules.  Used internally by nsCSSParser;
     198             : // not available for use by other code.
     199             : class nsCSSScanner {
     200             :   public:
     201             :   // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
     202             :   // when the line number is unknown.  The scanner does not take
     203             :   // ownership of |aBuffer|, so the caller must be sure to keep it
     204             :   // alive for the lifetime of the scanner.
     205             :   nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
     206             :   ~nsCSSScanner();
     207             : 
     208        2380 :   void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
     209        2380 :     mReporter = aReporter;
     210        2380 :   }
     211             : 
     212             :   // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
     213           0 :   void ClearSeenBadToken() { mSeenBadToken = false; }
     214           0 :   bool SeenBadToken() const { return mSeenBadToken; }
     215             : 
     216             :   // Reset or check whether a "var(" FUNCTION token has been seen.
     217        7458 :   void ClearSeenVariableReference() { mSeenVariableReference = false; }
     218        7458 :   bool SeenVariableReference() const { return mSeenVariableReference; }
     219             : 
     220             :   // Get the 1-based line number of the last character of
     221             :   // the most recently processed token.
     222       21980 :   uint32_t GetLineNumber() const { return mTokenLineNumber; }
     223             : 
     224             :   // Get the 0-based column number of the first character of
     225             :   // the most recently processed token.
     226       21980 :   uint32_t GetColumnNumber() const
     227       21980 :   { return mTokenOffset - mTokenLineOffset; }
     228             : 
     229           0 :   uint32_t GetTokenOffset() const
     230           0 :   { return mTokenOffset; }
     231             : 
     232           0 :   uint32_t GetTokenEndOffset() const
     233           0 :   { return mOffset; }
     234             : 
     235             :   // Get the text of the line containing the first character of
     236             :   // the most recently processed token.
     237             :   nsDependentSubstring GetCurrentLine() const;
     238             : 
     239             :   // Get the next token.  Return false on EOF.  aTokenResult is filled
     240             :   // in with the data for the token.  aSkip controls whether
     241             :   // whitespace and/or comment tokens are ever returned.
     242             :   bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip);
     243             : 
     244             :   // Get the body of an URL token (everything after the 'url(').
     245             :   // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
     246             :   // which, for historical reasons, must make additional function
     247             :   // tokens behave like url().  Please do not add new uses to the
     248             :   // parser.
     249             :   void NextURL(nsCSSToken& aTokenResult);
     250             : 
     251             :   // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
     252             :   // because "2n-1" is a single DIMENSION token, and "n-1" is a single
     253             :   // IDENT token, but the :nth() selector syntax wants to interpret
     254             :   // them the same as "2n -1" and "n -1" respectively.  Please do not
     255             :   // add new uses to the parser.
     256             :   //
     257             :   // Note: this function may not be used to back up over a line boundary.
     258             :   void Backup(uint32_t n);
     259             : 
     260             :   // Starts recording the input stream from the current position.
     261             :   void StartRecording();
     262             : 
     263             :   // Abandons recording of the input stream.
     264             :   void StopRecording();
     265             : 
     266             :   // Stops recording of the input stream and appends the recorded
     267             :   // input to aBuffer.
     268             :   void StopRecording(nsString& aBuffer);
     269             : 
     270             :   // Returns the length of the current recording.
     271             :   uint32_t RecordingLength() const;
     272             : 
     273             : #ifdef DEBUG
     274             :   bool IsRecording() const;
     275             : #endif
     276             : 
     277             :   // Stores the current scanner offset into the specified object.
     278             :   void SavePosition(nsCSSScannerPosition& aState);
     279             : 
     280             :   // Resets the scanner offset to a position saved by SavePosition.
     281             :   void RestoreSavedPosition(const nsCSSScannerPosition& aState);
     282             : 
     283             :   enum EOFCharacters {
     284             :     eEOFCharacters_None =                    0x0000,
     285             : 
     286             :     // to handle \<EOF> inside strings
     287             :     eEOFCharacters_DropBackslash =           0x0001,
     288             : 
     289             :     // to handle \<EOF> outside strings
     290             :     eEOFCharacters_ReplacementChar =         0x0002,
     291             : 
     292             :     // to close comments
     293             :     eEOFCharacters_Asterisk =                0x0004,
     294             :     eEOFCharacters_Slash =                   0x0008,
     295             : 
     296             :     // to close double-quoted strings
     297             :     eEOFCharacters_DoubleQuote =             0x0010,
     298             : 
     299             :     // to close single-quoted strings
     300             :     eEOFCharacters_SingleQuote =             0x0020,
     301             : 
     302             :     // to close URLs
     303             :     eEOFCharacters_CloseParen =              0x0040,
     304             :   };
     305             : 
     306             :   // Appends any characters to the specified string the input stream to make the
     307             :   // last token not rely on special EOF handling behavior.
     308             :   //
     309             :   // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
     310             :   static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
     311             :                                          nsAString& aString);
     312             : 
     313         614 :   EOFCharacters GetEOFCharacters() const {
     314             : #ifdef DEBUG
     315         614 :     AssertEOFCharactersValid(mEOFCharacters);
     316             : #endif
     317         614 :     return mEOFCharacters;
     318             :   }
     319             : 
     320             : #ifdef DEBUG
     321             :   static void AssertEOFCharactersValid(uint32_t c);
     322             : #endif
     323             : 
     324             : protected:
     325             :   int32_t Peek(uint32_t n = 0);
     326             :   void Advance(uint32_t n = 1);
     327             :   void AdvanceLine();
     328             : 
     329             :   void SkipWhitespace();
     330             :   void SkipComment();
     331             : 
     332             :   bool GatherEscape(nsString& aOutput, bool aInString);
     333             :   bool GatherText(uint8_t aClass, nsString& aIdent);
     334             : 
     335             :   bool ScanIdent(nsCSSToken& aResult);
     336             :   bool ScanAtKeyword(nsCSSToken& aResult);
     337             :   bool ScanHash(nsCSSToken& aResult);
     338             :   bool ScanNumber(nsCSSToken& aResult);
     339             :   bool ScanString(nsCSSToken& aResult);
     340             :   bool ScanURange(nsCSSToken& aResult);
     341             : 
     342             :   void SetEOFCharacters(uint32_t aEOFCharacters);
     343             :   void AddEOFCharacters(uint32_t aEOFCharacters);
     344             : 
     345             :   const char16_t *mBuffer;
     346             :   uint32_t mOffset;
     347             :   uint32_t mCount;
     348             : 
     349             :   uint32_t mLineNumber;
     350             :   uint32_t mLineOffset;
     351             : 
     352             :   uint32_t mTokenLineNumber;
     353             :   uint32_t mTokenLineOffset;
     354             :   uint32_t mTokenOffset;
     355             : 
     356             :   uint32_t mRecordStartOffset;
     357             :   EOFCharacters mEOFCharacters;
     358             : 
     359             :   mozilla::css::ErrorReporter *mReporter;
     360             : 
     361             :   bool mRecording;
     362             :   bool mSeenBadToken;
     363             :   bool mSeenVariableReference;
     364             : };
     365             : 
     366             : // Token for the grid-template-areas micro-syntax
     367             : // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
     368           0 : struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
     369             :   nsAutoString mName;  // Empty for a null cell, non-empty for a named cell
     370             :   bool isTrash;  // True for a trash token, mName is ignored in this case.
     371             : };
     372             : 
     373             : // Scanner for the grid-template-areas micro-syntax
     374             : class nsCSSGridTemplateAreaScanner {
     375             : public:
     376             :   explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
     377             : 
     378             :   // Get the next token.  Return false on EOF.
     379             :   // aTokenResult is filled in with the data for the token.
     380             :   bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
     381             : 
     382             : private:
     383             :   const char16_t *mBuffer;
     384             :   uint32_t mOffset;
     385             :   uint32_t mCount;
     386             : };
     387             : 
     388             : #endif /* nsCSSScanner_h___ */

Generated by: LCOV version 1.13