Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 : /* tokenization of CSS style sheets */
7 :
8 : #ifndef nsCSSScanner_h___
9 : #define nsCSSScanner_h___
10 :
11 : #include "nsString.h"
12 :
13 : namespace mozilla {
14 : namespace css {
15 : class ErrorReporter;
16 : } // namespace css
17 : } // namespace mozilla
18 :
19 : // Token types; in close but not perfect correspondence to the token
20 : // categorization in section 4.1.1 of CSS2.1. (The deviations are all
21 : // the fault of css3-selectors, which has requirements that can only be
22 : // met by changing the generic tokenization.) The comment on each line
23 : // illustrates the form of each identifier.
24 :
25 : enum nsCSSTokenType {
26 : // White space of any kind. No value fields are used. Note that
27 : // comments do *not* count as white space; comments separate tokens
28 : // but are not themselves tokens.
29 : eCSSToken_Whitespace, //
30 : // A comment.
31 : eCSSToken_Comment, // /*...*/
32 :
33 : // Identifier-like tokens. mIdent is the text of the identifier.
34 : // The difference between ID and Hash is: if the text after the #
35 : // would have been a valid Ident if the # hadn't been there, the
36 : // scanner produces an ID token. Otherwise it produces a Hash token.
37 : // (This distinction is required by css3-selectors.)
38 : eCSSToken_Ident, // word
39 : eCSSToken_Function, // word(
40 : eCSSToken_AtKeyword, // @word
41 : eCSSToken_ID, // #word
42 : eCSSToken_Hash, // #0word
43 :
44 : // Numeric tokens. mNumber is the floating-point value of the
45 : // number, and mHasSign indicates whether there was an explicit sign
46 : // (+ or -) in front of the number. If mIntegerValid is true, the
47 : // number had the lexical form of an integer, and mInteger is its
48 : // integer value. Lexically integer values outside the range of a
49 : // 32-bit signed number are clamped to the maximum values; mNumber
50 : // will indicate a 'truer' value in that case. Percentage tokens
51 : // are always considered not to be integers, even if their numeric
52 : // value is integral (100% => mNumber = 1.0). For Dimension
53 : // tokens, mIdent holds the text of the unit.
54 : eCSSToken_Number, // 1 -5 +2e3 3.14159 7.297352e-3
55 : eCSSToken_Dimension, // 24px 8.5in
56 : eCSSToken_Percentage, // 85% 1280.4%
57 :
58 : // String-like tokens. In all cases, mIdent holds the text
59 : // belonging to the string, and mSymbol holds the delimiter
60 : // character, which may be ', ", or zero (only for unquoted URLs).
61 : // Bad_String and Bad_URL tokens are emitted when the closing
62 : // delimiter or parenthesis was missing.
63 : eCSSToken_String, // 'foo bar' "foo bar"
64 : eCSSToken_Bad_String, // 'foo bar
65 : eCSSToken_URL, // url(foobar) url("foo bar")
66 : eCSSToken_Bad_URL, // url(foo
67 :
68 : // Any one-character symbol. mSymbol holds the character.
69 : eCSSToken_Symbol, // . ; { } ! *
70 :
71 : // Match operators. These are single tokens rather than pairs of
72 : // Symbol tokens because css3-selectors forbids the presence of
73 : // comments between the two characters. No value fields are used;
74 : // the token type indicates which operator.
75 : eCSSToken_Includes, // ~=
76 : eCSSToken_Dashmatch, // |=
77 : eCSSToken_Beginsmatch, // ^=
78 : eCSSToken_Endsmatch, // $=
79 : eCSSToken_Containsmatch, // *=
80 :
81 : // Unicode-range token: currently used only in @font-face.
82 : // The lexical rule for this token includes several forms that are
83 : // semantically invalid. Therefore, mIdent always holds the
84 : // complete original text of the token (so we can print it
85 : // accurately in diagnostics), and mIntegerValid is true iff the
86 : // token is semantically valid. In that case, mInteger holds the
87 : // lowest value included in the range, and mInteger2 holds the
88 : // highest value included in the range.
89 : eCSSToken_URange, // U+007e U+01?? U+2000-206F
90 :
91 : // HTML comment delimiters, ignored as a unit when they appear at
92 : // the top level of a style sheet, for compatibility with websites
93 : // written for compatibility with pre-CSS browsers. This token type
94 : // subsumes the css2.1 CDO and CDC tokens, which are always treated
95 : // the same by the parser. mIdent holds the text of the token, for
96 : // diagnostics.
97 : eCSSToken_HTMLComment, // <!-- -->
98 : };
99 :
100 : // Classification of tokens used to determine if a "/**/" string must be
101 : // inserted if pasting token streams together when serializing. We include
102 : // values corresponding to eCSSToken_Dashmatch and eCSSToken_Containsmatch,
103 : // as css-syntax does not treat these as whole tokens, but we will still
104 : // need to insert a "/**/" string between a '|' delim and a '|=' dashmatch
105 : // and between a '/' delim and a '*=' containsmatch.
106 : //
107 : // https://drafts.csswg.org/css-syntax/#serialization
108 : enum nsCSSTokenSerializationType {
109 : eCSSTokenSerialization_Nothing,
110 : eCSSTokenSerialization_Whitespace,
111 : eCSSTokenSerialization_AtKeyword_or_Hash,
112 : eCSSTokenSerialization_Number,
113 : eCSSTokenSerialization_Dimension,
114 : eCSSTokenSerialization_Percentage,
115 : eCSSTokenSerialization_URange,
116 : eCSSTokenSerialization_URL_or_BadURL,
117 : eCSSTokenSerialization_Function,
118 : eCSSTokenSerialization_Ident,
119 : eCSSTokenSerialization_CDC,
120 : eCSSTokenSerialization_DashMatch,
121 : eCSSTokenSerialization_ContainsMatch,
122 : eCSSTokenSerialization_Symbol_Hash, // '#'
123 : eCSSTokenSerialization_Symbol_At, // '@'
124 : eCSSTokenSerialization_Symbol_Dot_or_Plus, // '.', '+'
125 : eCSSTokenSerialization_Symbol_Minus, // '-'
126 : eCSSTokenSerialization_Symbol_OpenParen, // '('
127 : eCSSTokenSerialization_Symbol_Question, // '?'
128 : eCSSTokenSerialization_Symbol_Assorted, // '$', '^', '~'
129 : eCSSTokenSerialization_Symbol_Equals, // '='
130 : eCSSTokenSerialization_Symbol_Bar, // '|'
131 : eCSSTokenSerialization_Symbol_Slash, // '/'
132 : eCSSTokenSerialization_Symbol_Asterisk, // '*'
133 : eCSSTokenSerialization_Other // anything else
134 : };
135 :
136 : // A single token returned from the scanner. mType is always
137 : // meaningful; comments above describe which other fields are
138 : // meaningful for which token types.
139 15174 : struct nsCSSToken {
140 : nsAutoString mIdent;
141 : float mNumber;
142 : int32_t mInteger;
143 : int32_t mInteger2;
144 : nsCSSTokenType mType;
145 : char16_t mSymbol;
146 : bool mIntegerValid;
147 : bool mHasSign;
148 :
149 7462 : nsCSSToken()
150 7462 : : mNumber(0), mInteger(0), mInteger2(0), mType(eCSSToken_Whitespace),
151 7462 : mSymbol('\0'), mIntegerValid(false), mHasSign(false)
152 7462 : {}
153 :
154 114057 : bool IsSymbol(char16_t aSymbol) const {
155 114057 : return mType == eCSSToken_Symbol && mSymbol == aSymbol;
156 : }
157 :
158 : void AppendToString(nsString& aBuffer) const;
159 : };
160 :
161 : // Represents an nsCSSScanner's saved position in the input buffer.
162 : class nsCSSScannerPosition {
163 : friend class nsCSSScanner;
164 : public:
165 7458 : nsCSSScannerPosition() : mInitialized(false) { }
166 :
167 606 : uint32_t LineNumber() {
168 606 : MOZ_ASSERT(mInitialized);
169 606 : return mLineNumber;
170 : }
171 :
172 606 : uint32_t LineOffset() {
173 606 : MOZ_ASSERT(mInitialized);
174 606 : return mLineOffset;
175 : }
176 :
177 : private:
178 : uint32_t mOffset;
179 : uint32_t mLineNumber;
180 : uint32_t mLineOffset;
181 : uint32_t mTokenLineNumber;
182 : uint32_t mTokenLineOffset;
183 : uint32_t mTokenOffset;
184 : bool mInitialized;
185 : };
186 :
187 : enum nsCSSScannerExclude {
188 : // Return all tokens, including whitespace and comments.
189 : eCSSScannerExclude_None,
190 : // Include whitespace but exclude comments.
191 : eCSSScannerExclude_Comments,
192 : // Exclude whitespace and comments.
193 : eCSSScannerExclude_WhitespaceAndComments
194 : };
195 :
196 : // nsCSSScanner tokenizes an input stream using the CSS2.1 forward
197 : // compatible tokenization rules. Used internally by nsCSSParser;
198 : // not available for use by other code.
199 : class nsCSSScanner {
200 : public:
201 : // |aLineNumber == 1| is the beginning of a file, use |aLineNumber == 0|
202 : // when the line number is unknown. The scanner does not take
203 : // ownership of |aBuffer|, so the caller must be sure to keep it
204 : // alive for the lifetime of the scanner.
205 : nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber);
206 : ~nsCSSScanner();
207 :
208 2380 : void SetErrorReporter(mozilla::css::ErrorReporter* aReporter) {
209 2380 : mReporter = aReporter;
210 2380 : }
211 :
212 : // Reset or check whether a BAD_URL or BAD_STRING token has been seen.
213 0 : void ClearSeenBadToken() { mSeenBadToken = false; }
214 0 : bool SeenBadToken() const { return mSeenBadToken; }
215 :
216 : // Reset or check whether a "var(" FUNCTION token has been seen.
217 7458 : void ClearSeenVariableReference() { mSeenVariableReference = false; }
218 7458 : bool SeenVariableReference() const { return mSeenVariableReference; }
219 :
220 : // Get the 1-based line number of the last character of
221 : // the most recently processed token.
222 21980 : uint32_t GetLineNumber() const { return mTokenLineNumber; }
223 :
224 : // Get the 0-based column number of the first character of
225 : // the most recently processed token.
226 21980 : uint32_t GetColumnNumber() const
227 21980 : { return mTokenOffset - mTokenLineOffset; }
228 :
229 0 : uint32_t GetTokenOffset() const
230 0 : { return mTokenOffset; }
231 :
232 0 : uint32_t GetTokenEndOffset() const
233 0 : { return mOffset; }
234 :
235 : // Get the text of the line containing the first character of
236 : // the most recently processed token.
237 : nsDependentSubstring GetCurrentLine() const;
238 :
239 : // Get the next token. Return false on EOF. aTokenResult is filled
240 : // in with the data for the token. aSkip controls whether
241 : // whitespace and/or comment tokens are ever returned.
242 : bool Next(nsCSSToken& aTokenResult, nsCSSScannerExclude aSkip);
243 :
244 : // Get the body of an URL token (everything after the 'url(').
245 : // This is exposed for use by nsCSSParser::ParseMozDocumentRule,
246 : // which, for historical reasons, must make additional function
247 : // tokens behave like url(). Please do not add new uses to the
248 : // parser.
249 : void NextURL(nsCSSToken& aTokenResult);
250 :
251 : // This is exposed for use by nsCSSParser::ParsePseudoClassWithNthPairArg,
252 : // because "2n-1" is a single DIMENSION token, and "n-1" is a single
253 : // IDENT token, but the :nth() selector syntax wants to interpret
254 : // them the same as "2n -1" and "n -1" respectively. Please do not
255 : // add new uses to the parser.
256 : //
257 : // Note: this function may not be used to back up over a line boundary.
258 : void Backup(uint32_t n);
259 :
260 : // Starts recording the input stream from the current position.
261 : void StartRecording();
262 :
263 : // Abandons recording of the input stream.
264 : void StopRecording();
265 :
266 : // Stops recording of the input stream and appends the recorded
267 : // input to aBuffer.
268 : void StopRecording(nsString& aBuffer);
269 :
270 : // Returns the length of the current recording.
271 : uint32_t RecordingLength() const;
272 :
273 : #ifdef DEBUG
274 : bool IsRecording() const;
275 : #endif
276 :
277 : // Stores the current scanner offset into the specified object.
278 : void SavePosition(nsCSSScannerPosition& aState);
279 :
280 : // Resets the scanner offset to a position saved by SavePosition.
281 : void RestoreSavedPosition(const nsCSSScannerPosition& aState);
282 :
283 : enum EOFCharacters {
284 : eEOFCharacters_None = 0x0000,
285 :
286 : // to handle \<EOF> inside strings
287 : eEOFCharacters_DropBackslash = 0x0001,
288 :
289 : // to handle \<EOF> outside strings
290 : eEOFCharacters_ReplacementChar = 0x0002,
291 :
292 : // to close comments
293 : eEOFCharacters_Asterisk = 0x0004,
294 : eEOFCharacters_Slash = 0x0008,
295 :
296 : // to close double-quoted strings
297 : eEOFCharacters_DoubleQuote = 0x0010,
298 :
299 : // to close single-quoted strings
300 : eEOFCharacters_SingleQuote = 0x0020,
301 :
302 : // to close URLs
303 : eEOFCharacters_CloseParen = 0x0040,
304 : };
305 :
306 : // Appends any characters to the specified string the input stream to make the
307 : // last token not rely on special EOF handling behavior.
308 : //
309 : // If eEOFCharacters_DropBackslash is in aEOFCharacters, it is ignored.
310 : static void AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
311 : nsAString& aString);
312 :
313 614 : EOFCharacters GetEOFCharacters() const {
314 : #ifdef DEBUG
315 614 : AssertEOFCharactersValid(mEOFCharacters);
316 : #endif
317 614 : return mEOFCharacters;
318 : }
319 :
320 : #ifdef DEBUG
321 : static void AssertEOFCharactersValid(uint32_t c);
322 : #endif
323 :
324 : protected:
325 : int32_t Peek(uint32_t n = 0);
326 : void Advance(uint32_t n = 1);
327 : void AdvanceLine();
328 :
329 : void SkipWhitespace();
330 : void SkipComment();
331 :
332 : bool GatherEscape(nsString& aOutput, bool aInString);
333 : bool GatherText(uint8_t aClass, nsString& aIdent);
334 :
335 : bool ScanIdent(nsCSSToken& aResult);
336 : bool ScanAtKeyword(nsCSSToken& aResult);
337 : bool ScanHash(nsCSSToken& aResult);
338 : bool ScanNumber(nsCSSToken& aResult);
339 : bool ScanString(nsCSSToken& aResult);
340 : bool ScanURange(nsCSSToken& aResult);
341 :
342 : void SetEOFCharacters(uint32_t aEOFCharacters);
343 : void AddEOFCharacters(uint32_t aEOFCharacters);
344 :
345 : const char16_t *mBuffer;
346 : uint32_t mOffset;
347 : uint32_t mCount;
348 :
349 : uint32_t mLineNumber;
350 : uint32_t mLineOffset;
351 :
352 : uint32_t mTokenLineNumber;
353 : uint32_t mTokenLineOffset;
354 : uint32_t mTokenOffset;
355 :
356 : uint32_t mRecordStartOffset;
357 : EOFCharacters mEOFCharacters;
358 :
359 : mozilla::css::ErrorReporter *mReporter;
360 :
361 : bool mRecording;
362 : bool mSeenBadToken;
363 : bool mSeenVariableReference;
364 : };
365 :
366 : // Token for the grid-template-areas micro-syntax
367 : // http://dev.w3.org/csswg/css-grid/#propdef-grid-template-areas
368 0 : struct MOZ_STACK_CLASS nsCSSGridTemplateAreaToken {
369 : nsAutoString mName; // Empty for a null cell, non-empty for a named cell
370 : bool isTrash; // True for a trash token, mName is ignored in this case.
371 : };
372 :
373 : // Scanner for the grid-template-areas micro-syntax
374 : class nsCSSGridTemplateAreaScanner {
375 : public:
376 : explicit nsCSSGridTemplateAreaScanner(const nsAString& aBuffer);
377 :
378 : // Get the next token. Return false on EOF.
379 : // aTokenResult is filled in with the data for the token.
380 : bool Next(nsCSSGridTemplateAreaToken& aTokenResult);
381 :
382 : private:
383 : const char16_t *mBuffer;
384 : uint32_t mOffset;
385 : uint32_t mCount;
386 : };
387 :
388 : #endif /* nsCSSScanner_h___ */
|