Line data Source code
1 : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 : /* This Source Code Form is subject to the terms of the Mozilla Public
3 : * License, v. 2.0. If a copy of the MPL was not distributed with this
4 : * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
5 :
6 :
7 : /* tokenization of CSS style sheets */
8 :
9 : #include "nsCSSScanner.h"
10 : #include "nsStyleUtil.h"
11 : #include "nsISupportsImpl.h"
12 : #include "mozilla/ArrayUtils.h"
13 : #include "mozilla/css/ErrorReporter.h"
14 : #include "mozilla/Likely.h"
15 : #include <algorithm>
16 :
17 : using namespace mozilla;
18 :
19 : /* Character class tables and related helper functions. */
20 :
21 : static const uint8_t IS_HEX_DIGIT = 0x01;
22 : static const uint8_t IS_IDSTART = 0x02;
23 : static const uint8_t IS_IDCHAR = 0x04;
24 : static const uint8_t IS_URL_CHAR = 0x08;
25 : static const uint8_t IS_HSPACE = 0x10;
26 : static const uint8_t IS_VSPACE = 0x20;
27 : static const uint8_t IS_SPACE = IS_HSPACE|IS_VSPACE;
28 : static const uint8_t IS_STRING = 0x40;
29 :
30 : #define H IS_HSPACE
31 : #define V IS_VSPACE
32 : #define I IS_IDCHAR
33 : #define J IS_IDSTART
34 : #define U IS_URL_CHAR
35 : #define S IS_STRING
36 : #define X IS_HEX_DIGIT
37 :
38 : #define SH S|H
39 : #define SU S|U
40 : #define SUI S|U|I
41 : #define SUIJ S|U|I|J
42 : #define SUIX S|U|I|X
43 : #define SUIJX S|U|I|J|X
44 :
45 : static const uint8_t gLexTable[] = {
46 : // 00 01 02 03 04 05 06 07
47 : 0, S, S, S, S, S, S, S,
48 : // 08 TAB LF 0B FF CR 0E 0F
49 : S, SH, V, S, V, V, S, S,
50 : // 10 11 12 13 14 15 16 17
51 : S, S, S, S, S, S, S, S,
52 : // 18 19 1A 1B 1C 1D 1E 1F
53 : S, S, S, S, S, S, S, S,
54 : //SPC ! " # $ % & '
55 : SH, SU, 0, SU, SU, SU, SU, 0,
56 : // ( ) * + , - . /
57 : S, S, SU, SU, SU, SUI, SU, SU,
58 : // 0 1 2 3 4 5 6 7
59 : SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX, SUIX,
60 : // 8 9 : ; < = > ?
61 : SUIX, SUIX, SU, SU, SU, SU, SU, SU,
62 : // @ A B C D E F G
63 : SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
64 : // H I J K L M N O
65 : SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
66 : // P Q R S T U V W
67 : SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
68 : // X Y Z [ \ ] ^ _
69 : SUIJ, SUIJ, SUIJ, SU, J, SU, SU, SUIJ,
70 : // ` a b c d e f g
71 : SU,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX,SUIJX, SUIJ,
72 : // h i j k l m n o
73 : SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
74 : // p q r s t u v w
75 : SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ, SUIJ,
76 : // x y z { | } ~ 7F
77 : SUIJ, SUIJ, SUIJ, SU, SU, SU, SU, S,
78 : };
79 :
80 : static_assert(MOZ_ARRAY_LENGTH(gLexTable) == 128,
81 : "gLexTable expected to cover all 128 ASCII characters");
82 :
83 : #undef I
84 : #undef J
85 : #undef U
86 : #undef S
87 : #undef X
88 : #undef SH
89 : #undef SU
90 : #undef SUI
91 : #undef SUIJ
92 : #undef SUIX
93 : #undef SUIJX
94 :
95 : /**
96 : * True if 'ch' is in character class 'cls', which should be one of
97 : * the constants above or some combination of them. All characters
98 : * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
99 : */
100 : static inline bool
101 570830 : IsOpenCharClass(int32_t ch, uint8_t cls) {
102 570830 : return ch >= 0 && (ch >= 128 || (gLexTable[ch] & cls) != 0);
103 : }
104 :
105 : /**
106 : * True if 'ch' is in character class 'cls', which should be one of
107 : * the constants above or some combination of them. No characters
108 : * above U+007F are considered to be in 'cls'. EOF is never in 'cls'.
109 : */
110 : static inline bool
111 638488 : IsClosedCharClass(int32_t ch, uint8_t cls) {
112 638488 : return uint32_t(ch) < 128 && (gLexTable[ch] & cls) != 0;
113 : }
114 :
115 : /**
116 : * True if 'ch' is CSS whitespace, i.e. any of the ASCII characters
117 : * TAB, LF, FF, CR, or SPC.
118 : */
119 : static inline bool
120 241770 : IsWhitespace(int32_t ch) {
121 241770 : return IsClosedCharClass(ch, IS_SPACE);
122 : }
123 :
124 : /**
125 : * True if 'ch' is horizontal whitespace, i.e. TAB or SPC.
126 : */
127 : static inline bool
128 1442 : IsHorzSpace(int32_t ch) {
129 1442 : return IsClosedCharClass(ch, IS_HSPACE);
130 : }
131 :
132 : /**
133 : * True if 'ch' is vertical whitespace, i.e. LF, FF, or CR. Vertical
134 : * whitespace requires special handling when consumed, see AdvanceLine.
135 : */
136 : static inline bool
137 388615 : IsVertSpace(int32_t ch) {
138 388615 : return IsClosedCharClass(ch, IS_VSPACE);
139 : }
140 :
141 : /**
142 : * True if 'ch' is a character that can appear in the middle of an identifier.
143 : * This includes U+0000 since it is handled as U+FFFD, but for purposes of
144 : * GatherText it should not be included in IsOpenCharClass.
145 : */
146 : static inline bool
147 2386 : IsIdentChar(int32_t ch) {
148 2386 : return IsOpenCharClass(ch, IS_IDCHAR) || ch == 0;
149 : }
150 :
151 : /**
152 : * True if 'ch' is a character that by itself begins an identifier.
153 : * This includes U+0000 since it is handled as U+FFFD, but for purposes of
154 : * GatherText it should not be included in IsOpenCharClass.
155 : * (This is a subset of IsIdentChar.)
156 : */
157 : static inline bool
158 99781 : IsIdentStart(int32_t ch) {
159 99781 : return IsOpenCharClass(ch, IS_IDSTART) || ch == 0;
160 : }
161 :
162 : /**
163 : * True if the two-character sequence aFirstChar+aSecondChar begins an
164 : * identifier.
165 : */
166 : static inline bool
167 10048 : StartsIdent(int32_t aFirstChar, int32_t aSecondChar)
168 : {
169 16304 : return IsIdentStart(aFirstChar) ||
170 10050 : (aFirstChar == '-' && (aSecondChar == '-' || IsIdentStart(aSecondChar)));
171 : }
172 :
173 : /**
174 : * True if 'ch' is a decimal digit.
175 : */
176 : static inline bool
177 96736 : IsDigit(int32_t ch) {
178 96736 : return (ch >= '0') && (ch <= '9');
179 : }
180 :
181 : /**
182 : * True if 'ch' is a hexadecimal digit.
183 : */
184 : static inline bool
185 6661 : IsHexDigit(int32_t ch) {
186 6661 : return IsClosedCharClass(ch, IS_HEX_DIGIT);
187 : }
188 :
189 : /**
190 : * Assuming that 'ch' is a decimal digit, return its numeric value.
191 : */
192 : static inline uint32_t
193 15635 : DecimalDigitValue(int32_t ch)
194 : {
195 15635 : return ch - '0';
196 : }
197 :
198 : /**
199 : * Assuming that 'ch' is a hexadecimal digit, return its numeric value.
200 : */
201 : static inline uint32_t
202 5218 : HexDigitValue(int32_t ch)
203 : {
204 5218 : if (IsDigit(ch)) {
205 3978 : return DecimalDigitValue(ch);
206 : } else {
207 : // Note: c&7 just keeps the low three bits which causes
208 : // upper and lower case alphabetics to both yield their
209 : // "relative to 10" value for computing the hex value.
210 1240 : return (ch & 0x7) + 9;
211 : }
212 : }
213 :
214 : /**
215 : * If 'ch' can be the first character of a two-character match operator
216 : * token, return the token type code for that token, otherwise return
217 : * eCSSToken_Symbol to indicate that it can't.
218 : */
219 : static inline nsCSSTokenType
220 43978 : MatchOperatorType(int32_t ch)
221 : {
222 43978 : switch (ch) {
223 109 : case '~': return eCSSToken_Includes;
224 204 : case '|': return eCSSToken_Dashmatch;
225 10 : case '^': return eCSSToken_Beginsmatch;
226 0 : case '$': return eCSSToken_Endsmatch;
227 563 : case '*': return eCSSToken_Containsmatch;
228 43092 : default: return eCSSToken_Symbol;
229 : }
230 : }
231 :
232 : /* Out-of-line nsCSSToken methods. */
233 :
234 : /**
235 : * Append the textual representation of |this| to |aBuffer|.
236 : */
237 : void
238 66 : nsCSSToken::AppendToString(nsString& aBuffer) const
239 : {
240 66 : switch (mType) {
241 : case eCSSToken_Ident:
242 22 : nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
243 22 : break;
244 :
245 : case eCSSToken_AtKeyword:
246 0 : aBuffer.Append('@');
247 0 : nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
248 0 : break;
249 :
250 : case eCSSToken_ID:
251 : case eCSSToken_Hash:
252 0 : aBuffer.Append('#');
253 0 : nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
254 0 : break;
255 :
256 : case eCSSToken_Function:
257 44 : nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
258 44 : aBuffer.Append('(');
259 44 : break;
260 :
261 : case eCSSToken_URL:
262 : case eCSSToken_Bad_URL:
263 0 : aBuffer.AppendLiteral("url(");
264 0 : if (mSymbol != char16_t(0)) {
265 0 : nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
266 : } else {
267 0 : aBuffer.Append(mIdent);
268 : }
269 0 : if (mType == eCSSToken_URL) {
270 0 : aBuffer.Append(char16_t(')'));
271 : }
272 0 : break;
273 :
274 : case eCSSToken_Number:
275 0 : if (mIntegerValid) {
276 0 : aBuffer.AppendInt(mInteger, 10);
277 : } else {
278 0 : aBuffer.AppendFloat(mNumber);
279 : }
280 0 : break;
281 :
282 : case eCSSToken_Percentage:
283 0 : aBuffer.AppendFloat(mNumber * 100.0f);
284 0 : aBuffer.Append(char16_t('%'));
285 0 : break;
286 :
287 : case eCSSToken_Dimension:
288 0 : if (mIntegerValid) {
289 0 : aBuffer.AppendInt(mInteger, 10);
290 : } else {
291 0 : aBuffer.AppendFloat(mNumber);
292 : }
293 0 : nsStyleUtil::AppendEscapedCSSIdent(mIdent, aBuffer);
294 0 : break;
295 :
296 : case eCSSToken_Bad_String:
297 0 : nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
298 : // remove the trailing quote character
299 0 : aBuffer.Truncate(aBuffer.Length() - 1);
300 0 : break;
301 :
302 : case eCSSToken_String:
303 0 : nsStyleUtil::AppendEscapedCSSString(mIdent, aBuffer, mSymbol);
304 0 : break;
305 :
306 : case eCSSToken_Symbol:
307 0 : aBuffer.Append(mSymbol);
308 0 : break;
309 :
310 : case eCSSToken_Whitespace:
311 0 : aBuffer.Append(' ');
312 0 : break;
313 :
314 : case eCSSToken_HTMLComment:
315 : case eCSSToken_URange:
316 0 : aBuffer.Append(mIdent);
317 0 : break;
318 :
319 : case eCSSToken_Includes:
320 0 : aBuffer.AppendLiteral("~=");
321 0 : break;
322 : case eCSSToken_Dashmatch:
323 0 : aBuffer.AppendLiteral("|=");
324 0 : break;
325 : case eCSSToken_Beginsmatch:
326 0 : aBuffer.AppendLiteral("^=");
327 0 : break;
328 : case eCSSToken_Endsmatch:
329 0 : aBuffer.AppendLiteral("$=");
330 0 : break;
331 : case eCSSToken_Containsmatch:
332 0 : aBuffer.AppendLiteral("*=");
333 0 : break;
334 :
335 : default:
336 0 : NS_ERROR("invalid token type");
337 0 : break;
338 : }
339 66 : }
340 :
341 : /* nsCSSScanner methods. */
342 :
343 2380 : nsCSSScanner::nsCSSScanner(const nsAString& aBuffer, uint32_t aLineNumber)
344 2380 : : mBuffer(aBuffer.BeginReading())
345 : , mOffset(0)
346 2380 : , mCount(aBuffer.Length())
347 : , mLineNumber(aLineNumber)
348 : , mLineOffset(0)
349 : , mTokenLineNumber(aLineNumber)
350 : , mTokenLineOffset(0)
351 : , mTokenOffset(0)
352 : , mRecordStartOffset(0)
353 : , mEOFCharacters(eEOFCharacters_None)
354 : , mReporter(nullptr)
355 : , mRecording(false)
356 : , mSeenBadToken(false)
357 4760 : , mSeenVariableReference(false)
358 : {
359 2380 : MOZ_COUNT_CTOR(nsCSSScanner);
360 2380 : }
361 :
362 4760 : nsCSSScanner::~nsCSSScanner()
363 : {
364 2380 : MOZ_COUNT_DTOR(nsCSSScanner);
365 2380 : }
366 :
367 : void
368 3434 : nsCSSScanner::StartRecording()
369 : {
370 3434 : MOZ_ASSERT(!mRecording, "already started recording");
371 3434 : mRecording = true;
372 3434 : mRecordStartOffset = mOffset;
373 3434 : }
374 :
375 : void
376 1024 : nsCSSScanner::StopRecording()
377 : {
378 1024 : MOZ_ASSERT(mRecording, "haven't started recording");
379 1024 : mRecording = false;
380 1024 : }
381 :
382 : void
383 2410 : nsCSSScanner::StopRecording(nsString& aBuffer)
384 : {
385 2410 : MOZ_ASSERT(mRecording, "haven't started recording");
386 2410 : mRecording = false;
387 2410 : aBuffer.Append(mBuffer + mRecordStartOffset,
388 4820 : mOffset - mRecordStartOffset);
389 2410 : }
390 :
391 : uint32_t
392 5454 : nsCSSScanner::RecordingLength() const
393 : {
394 5454 : MOZ_ASSERT(mRecording, "haven't started recording");
395 5454 : return mOffset - mRecordStartOffset;
396 : }
397 :
398 : #ifdef DEBUG
399 : bool
400 1024 : nsCSSScanner::IsRecording() const
401 : {
402 1024 : return mRecording;
403 : }
404 : #endif
405 :
406 : nsDependentSubstring
407 64 : nsCSSScanner::GetCurrentLine() const
408 : {
409 64 : uint32_t end = mTokenOffset;
410 3132 : while (end < mCount && !IsVertSpace(mBuffer[end])) {
411 1534 : end++;
412 : }
413 64 : return nsDependentSubstring(mBuffer + mTokenLineOffset,
414 128 : mBuffer + end);
415 : }
416 :
417 : /**
418 : * Return the raw UTF-16 code unit at position |mOffset + n| within
419 : * the read buffer. If that is beyond the end of the buffer, returns
420 : * -1 to indicate end of input.
421 : */
422 : inline int32_t
423 487855 : nsCSSScanner::Peek(uint32_t n)
424 : {
425 487855 : if (mOffset + n >= mCount) {
426 6560 : return -1;
427 : }
428 481295 : return mBuffer[mOffset + n];
429 : }
430 :
431 : /**
432 : * Advance |mOffset| over |n| code units. Advance(0) is a no-op.
433 : * If |n| is greater than the distance to end of input, will silently
434 : * stop at the end. May not be used to advance over a line boundary;
435 : * AdvanceLine() must be used instead.
436 : */
437 : inline void
438 425316 : nsCSSScanner::Advance(uint32_t n)
439 : {
440 : #ifdef DEBUG
441 638578 : while (mOffset < mCount && n > 0) {
442 213262 : MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset]),
443 : "may not Advance() over a line boundary");
444 213262 : mOffset++;
445 213262 : n--;
446 : }
447 : #else
448 : if (mOffset + n >= mCount || mOffset + n < mOffset)
449 : mOffset = mCount;
450 : else
451 : mOffset += n;
452 : #endif
453 212054 : }
454 :
455 : /**
456 : * Advance |mOffset| over a line boundary.
457 : */
458 : void
459 20035 : nsCSSScanner::AdvanceLine()
460 : {
461 20035 : MOZ_ASSERT(IsVertSpace(mBuffer[mOffset]),
462 : "may not AdvanceLine() over a horizontal character");
463 : // Advance over \r\n as a unit.
464 20035 : if (mBuffer[mOffset] == '\r' && mOffset + 1 < mCount &&
465 0 : mBuffer[mOffset+1] == '\n')
466 0 : mOffset += 2;
467 : else
468 20035 : mOffset += 1;
469 : // 0 is a magical line number meaning that we don't know (i.e., script)
470 20035 : if (mLineNumber != 0)
471 20035 : mLineNumber++;
472 20035 : mLineOffset = mOffset;
473 20035 : }
474 :
475 : /**
476 : * Back up |mOffset| over |n| code units. Backup(0) is a no-op.
477 : * If |n| is greater than the distance to beginning of input, will
478 : * silently stop at the beginning. May not be used to back up over a
479 : * line boundary.
480 : */
481 : void
482 0 : nsCSSScanner::Backup(uint32_t n)
483 : {
484 : #ifdef DEBUG
485 0 : while (mOffset > 0 && n > 0) {
486 0 : MOZ_ASSERT(!IsVertSpace(mBuffer[mOffset-1]),
487 : "may not Backup() over a line boundary");
488 0 : mOffset--;
489 0 : n--;
490 : }
491 : #else
492 : if (mOffset < n)
493 : mOffset = 0;
494 : else
495 : mOffset -= n;
496 : #endif
497 0 : }
498 :
499 : void
500 7458 : nsCSSScanner::SavePosition(nsCSSScannerPosition& aState)
501 : {
502 7458 : aState.mOffset = mOffset;
503 7458 : aState.mLineNumber = mLineNumber;
504 7458 : aState.mLineOffset = mLineOffset;
505 7458 : aState.mTokenLineNumber = mTokenLineNumber;
506 7458 : aState.mTokenLineOffset = mTokenLineOffset;
507 7458 : aState.mTokenOffset = mTokenOffset;
508 7458 : aState.mInitialized = true;
509 7458 : }
510 :
511 : void
512 258 : nsCSSScanner::RestoreSavedPosition(const nsCSSScannerPosition& aState)
513 : {
514 258 : MOZ_ASSERT(aState.mInitialized, "have not saved state");
515 258 : if (aState.mInitialized) {
516 258 : mOffset = aState.mOffset;
517 258 : mLineNumber = aState.mLineNumber;
518 258 : mLineOffset = aState.mLineOffset;
519 258 : mTokenLineNumber = aState.mTokenLineNumber;
520 258 : mTokenLineOffset = aState.mTokenLineOffset;
521 258 : mTokenOffset = aState.mTokenOffset;
522 : }
523 258 : }
524 :
525 : /**
526 : * Skip over a sequence of whitespace characters (vertical or
527 : * horizontal) starting at the current read position.
528 : */
529 : void
530 106930 : nsCSSScanner::SkipWhitespace()
531 : {
532 : for (;;) {
533 106930 : int32_t ch = Peek();
534 106930 : if (!IsWhitespace(ch)) { // EOF counts as non-whitespace
535 43589 : break;
536 : }
537 63341 : if (IsVertSpace(ch)) {
538 19238 : AdvanceLine();
539 : } else {
540 44103 : Advance();
541 : }
542 63341 : }
543 43589 : }
544 :
545 : /**
546 : * Skip over one CSS comment starting at the current read position.
547 : */
548 : void
549 1090 : nsCSSScanner::SkipComment()
550 : {
551 1090 : MOZ_ASSERT(Peek() == '/' && Peek(1) == '*', "should not have been called");
552 1090 : Advance(2);
553 : for (;;) {
554 90021 : int32_t ch = Peek();
555 90021 : if (ch < 0) {
556 0 : if (mReporter)
557 0 : mReporter->ReportUnexpectedEOF("PECommentEOF");
558 0 : SetEOFCharacters(eEOFCharacters_Asterisk | eEOFCharacters_Slash);
559 0 : return;
560 : }
561 90021 : if (ch == '*') {
562 2527 : Advance();
563 2527 : ch = Peek();
564 2527 : if (ch < 0) {
565 0 : if (mReporter)
566 0 : mReporter->ReportUnexpectedEOF("PECommentEOF");
567 0 : SetEOFCharacters(eEOFCharacters_Slash);
568 0 : return;
569 : }
570 2527 : if (ch == '/') {
571 1090 : Advance();
572 1090 : return;
573 : }
574 87494 : } else if (IsVertSpace(ch)) {
575 797 : AdvanceLine();
576 : } else {
577 86697 : Advance();
578 : }
579 88931 : }
580 : }
581 :
582 : /**
583 : * If there is a valid escape sequence starting at the current read
584 : * position, consume it, decode it, append the result to |aOutput|,
585 : * and return true. Otherwise, consume nothing, leave |aOutput|
586 : * unmodified, and return false. If |aInString| is true, accept the
587 : * additional form of escape sequence allowed within string-like tokens.
588 : */
589 : bool
590 1443 : nsCSSScanner::GatherEscape(nsString& aOutput, bool aInString)
591 : {
592 1443 : MOZ_ASSERT(Peek() == '\\', "should not have been called");
593 1443 : int32_t ch = Peek(1);
594 1443 : if (ch < 0) {
595 : // If we are in a string (or a url() containing a string), we want to drop
596 : // the backslash on the floor. Otherwise, we want to treat it as a U+FFFD
597 : // character.
598 0 : Advance();
599 0 : if (aInString) {
600 0 : SetEOFCharacters(eEOFCharacters_DropBackslash);
601 : } else {
602 0 : aOutput.Append(UCS2_REPLACEMENT_CHAR);
603 0 : SetEOFCharacters(eEOFCharacters_ReplacementChar);
604 : }
605 0 : return true;
606 : }
607 1443 : if (IsVertSpace(ch)) {
608 0 : if (aInString) {
609 : // In strings (and in url() containing a string), escaped
610 : // newlines are completely removed, to allow splitting over
611 : // multiple lines.
612 0 : Advance();
613 0 : AdvanceLine();
614 0 : return true;
615 : }
616 : // Outside of strings, backslash followed by a newline is not an escape.
617 0 : return false;
618 : }
619 :
620 1443 : if (!IsHexDigit(ch)) {
621 : // "Any character (except a hexadecimal digit, linefeed, carriage
622 : // return, or form feed) can be escaped with a backslash to remove
623 : // its special meaning." -- CSS2.1 section 4.1.3
624 1 : Advance(2);
625 1 : if (ch == 0) {
626 0 : aOutput.Append(UCS2_REPLACEMENT_CHAR);
627 : } else {
628 1 : aOutput.Append(ch);
629 : }
630 1 : return true;
631 : }
632 :
633 : // "[at most six hexadecimal digits following a backslash] stand
634 : // for the ISO 10646 character with that number, which must not be
635 : // zero. (It is undefined in CSS 2.1 what happens if a style sheet
636 : // does contain a character with Unicode codepoint zero.)"
637 : // -- CSS2.1 section 4.1.3
638 :
639 : // At this point we know we have \ followed by at least one
640 : // hexadecimal digit, therefore the escape sequence is valid and we
641 : // can go ahead and consume the backslash.
642 1442 : Advance();
643 1442 : uint32_t val = 0;
644 1442 : int i = 0;
645 5218 : do {
646 5218 : val = val * 16 + HexDigitValue(ch);
647 5218 : i++;
648 5218 : Advance();
649 5218 : ch = Peek();
650 5218 : } while (i < 6 && IsHexDigit(ch));
651 :
652 : // "Interpret the hex digits as a hexadecimal number. If this number is zero,
653 : // or is greater than the maximum allowed codepoint, return U+FFFD
654 : // REPLACEMENT CHARACTER" -- CSS Syntax Level 3
655 1442 : if (MOZ_UNLIKELY(val == 0)) {
656 0 : aOutput.Append(UCS2_REPLACEMENT_CHAR);
657 : } else {
658 1442 : AppendUCS4ToUTF16(ENSURE_VALID_CHAR(val), aOutput);
659 : }
660 :
661 : // Consume exactly one whitespace character after a
662 : // hexadecimal escape sequence.
663 1442 : if (IsVertSpace(ch)) {
664 0 : AdvanceLine();
665 1442 : } else if (IsHorzSpace(ch)) {
666 1076 : Advance();
667 : }
668 1442 : return true;
669 : }
670 :
671 : /**
672 : * Consume a run of "text" beginning with the current read position,
673 : * consisting of characters in the class |aClass| (which must be a
674 : * suitable argument to IsOpenCharClass) plus escape sequences.
675 : * Append the text to |aText|, after decoding escape sequences.
676 : *
677 : * Returns true if at least one character was appended to |aText|,
678 : * false otherwise.
679 : */
680 : bool
681 39210 : nsCSSScanner::GatherText(uint8_t aClass, nsString& aText)
682 : {
683 : // This is all of the character classes currently used with
684 : // GatherText. If you have a need to use this function with a
685 : // different class, go ahead and add it.
686 39210 : MOZ_ASSERT(aClass == IS_STRING ||
687 : aClass == IS_IDCHAR ||
688 : aClass == IS_URL_CHAR,
689 : "possibly-inappropriate character class");
690 :
691 39210 : uint32_t start = mOffset;
692 39210 : bool inString = aClass == IS_STRING;
693 :
694 : for (;;) {
695 : // Consume runs of unescaped characters in one go.
696 40653 : uint32_t n = mOffset;
697 819163 : while (n < mCount && IsOpenCharClass(mBuffer[n], aClass)) {
698 389255 : n++;
699 : }
700 40653 : if (n > mOffset) {
701 37743 : aText.Append(&mBuffer[mOffset], n - mOffset);
702 37743 : mOffset = n;
703 : }
704 40653 : if (n == mCount) {
705 949 : break;
706 : }
707 :
708 39704 : int32_t ch = Peek();
709 39704 : MOZ_ASSERT(!IsOpenCharClass(ch, aClass),
710 : "should not have exited the inner loop");
711 39704 : if (ch == 0) {
712 0 : Advance();
713 0 : aText.Append(UCS2_REPLACEMENT_CHAR);
714 0 : continue;
715 : }
716 :
717 39704 : if (ch != '\\') {
718 38261 : break;
719 : }
720 1443 : if (!GatherEscape(aText, inString)) {
721 0 : break;
722 : }
723 1443 : }
724 :
725 39210 : return mOffset > start;
726 : }
727 :
728 : /**
729 : * Scan an Ident token. This also handles Function and URL tokens,
730 : * both of which begin indistinguishably from an identifier. It can
731 : * produce a Symbol token when an apparent identifier actually led
732 : * into an invalid escape sequence.
733 : */
734 : bool
735 30613 : nsCSSScanner::ScanIdent(nsCSSToken& aToken)
736 : {
737 30613 : if (MOZ_UNLIKELY(!GatherText(IS_IDCHAR, aToken.mIdent))) {
738 0 : MOZ_ASSERT(Peek() == '\\',
739 : "unexpected IsIdentStart character that did not begin an ident");
740 0 : aToken.mSymbol = Peek();
741 0 : Advance();
742 0 : return true;
743 : }
744 :
745 30613 : if (MOZ_LIKELY(Peek() != '(')) {
746 26255 : aToken.mType = eCSSToken_Ident;
747 26255 : return true;
748 : }
749 :
750 4358 : Advance();
751 4358 : aToken.mType = eCSSToken_Function;
752 4358 : if (aToken.mIdent.LowerCaseEqualsLiteral("url")) {
753 752 : NextURL(aToken);
754 3606 : } else if (aToken.mIdent.LowerCaseEqualsLiteral("var")) {
755 1603 : mSeenVariableReference = true;
756 : }
757 4358 : return true;
758 : }
759 :
760 : /**
761 : * Scan an AtKeyword token. Also handles production of Symbol when
762 : * an '@' is not followed by an identifier.
763 : */
764 : bool
765 286 : nsCSSScanner::ScanAtKeyword(nsCSSToken& aToken)
766 : {
767 286 : MOZ_ASSERT(Peek() == '@', "should not have been called");
768 :
769 : // Fall back for when '@' isn't followed by an identifier.
770 286 : aToken.mSymbol = '@';
771 286 : Advance();
772 :
773 286 : int32_t ch = Peek();
774 286 : if (StartsIdent(ch, Peek(1))) {
775 286 : if (GatherText(IS_IDCHAR, aToken.mIdent)) {
776 286 : aToken.mType = eCSSToken_AtKeyword;
777 : }
778 : }
779 286 : return true;
780 : }
781 :
782 : /**
783 : * Scan a Hash token. Handles the distinction between eCSSToken_ID
784 : * and eCSSToken_Hash, and handles production of Symbol when a '#'
785 : * is not followed by identifier characters.
786 : */
787 : bool
788 2386 : nsCSSScanner::ScanHash(nsCSSToken& aToken)
789 : {
790 2386 : MOZ_ASSERT(Peek() == '#', "should not have been called");
791 :
792 : // Fall back for when '#' isn't followed by identifier characters.
793 2386 : aToken.mSymbol = '#';
794 2386 : Advance();
795 :
796 2386 : int32_t ch = Peek();
797 2386 : if (IsIdentChar(ch) || ch == '\\') {
798 : nsCSSTokenType type =
799 2386 : StartsIdent(ch, Peek(1)) ? eCSSToken_ID : eCSSToken_Hash;
800 2386 : aToken.mIdent.SetLength(0);
801 2386 : if (GatherText(IS_IDCHAR, aToken.mIdent)) {
802 2386 : aToken.mType = type;
803 : }
804 : }
805 :
806 2386 : return true;
807 : }
808 :
809 : /**
810 : * Scan a Number, Percentage, or Dimension token (all of which begin
811 : * like a Number). Can produce a Symbol when a '.' is not followed by
812 : * digits, or when '+' or '-' are not followed by either a digit or a
813 : * '.' and then a digit. Can also produce a HTMLComment when it
814 : * encounters '-->'.
815 : */
816 : bool
817 7607 : nsCSSScanner::ScanNumber(nsCSSToken& aToken)
818 : {
819 7607 : int32_t c = Peek();
820 : #ifdef DEBUG
821 : {
822 7607 : int32_t c2 = Peek(1);
823 7607 : int32_t c3 = Peek(2);
824 7607 : MOZ_ASSERT(IsDigit(c) ||
825 : (IsDigit(c2) && (c == '.' || c == '+' || c == '-')) ||
826 : (IsDigit(c3) && (c == '+' || c == '-') && c2 == '.'),
827 : "should not have been called");
828 : }
829 : #endif
830 :
831 : // Sign of the mantissa (-1 or 1).
832 7607 : int32_t sign = c == '-' ? -1 : 1;
833 : // Absolute value of the integer part of the mantissa. This is a double so
834 : // we don't run into overflow issues for consumers that only care about our
835 : // floating-point value while still being able to express the full int32_t
836 : // range for consumers who want integers.
837 7607 : double intPart = 0;
838 : // Fractional part of the mantissa. This is a double so that when we convert
839 : // to float at the end we'll end up rounding to nearest float instead of
840 : // truncating down (as we would if fracPart were a float and we just
841 : // effectively lost the last several digits).
842 7607 : double fracPart = 0;
843 : // Absolute value of the power of 10 that we should multiply by (only
844 : // relevant for numbers in scientific notation). Has to be a signed integer,
845 : // because multiplication of signed by unsigned converts the unsigned to
846 : // signed, so if we plan to actually multiply by expSign...
847 7607 : int32_t exponent = 0;
848 : // Sign of the exponent.
849 7607 : int32_t expSign = 1;
850 :
851 7607 : aToken.mHasSign = (c == '+' || c == '-');
852 7607 : if (aToken.mHasSign) {
853 302 : Advance();
854 302 : c = Peek();
855 : }
856 :
857 7607 : bool gotDot = (c == '.');
858 :
859 7607 : if (!gotDot) {
860 : // Scan the integer part of the mantissa.
861 7298 : MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
862 10866 : do {
863 10866 : intPart = 10*intPart + DecimalDigitValue(c);
864 10866 : Advance();
865 10866 : c = Peek();
866 : } while (IsDigit(c));
867 :
868 7298 : gotDot = (c == '.') && IsDigit(Peek(1));
869 : }
870 :
871 7607 : if (gotDot) {
872 : // Scan the fractional part of the mantissa.
873 581 : Advance();
874 581 : c = Peek();
875 581 : MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
876 : // Power of ten by which we need to divide our next digit
877 581 : double divisor = 10;
878 791 : do {
879 791 : fracPart += DecimalDigitValue(c) / divisor;
880 791 : divisor *= 10;
881 791 : Advance();
882 791 : c = Peek();
883 : } while (IsDigit(c));
884 : }
885 :
886 7607 : bool gotE = false;
887 7607 : if (c == 'e' || c == 'E') {
888 298 : int32_t expSignChar = Peek(1);
889 298 : int32_t nextChar = Peek(2);
890 298 : if (IsDigit(expSignChar) ||
891 298 : ((expSignChar == '-' || expSignChar == '+') && IsDigit(nextChar))) {
892 0 : gotE = true;
893 0 : if (expSignChar == '-') {
894 0 : expSign = -1;
895 : }
896 0 : Advance(); // consumes the E
897 0 : if (expSignChar == '-' || expSignChar == '+') {
898 0 : Advance();
899 0 : c = nextChar;
900 : } else {
901 0 : c = expSignChar;
902 : }
903 0 : MOZ_ASSERT(IsDigit(c), "should have been excluded by logic above");
904 0 : do {
905 0 : exponent = 10*exponent + DecimalDigitValue(c);
906 0 : Advance();
907 0 : c = Peek();
908 : } while (IsDigit(c));
909 : }
910 : }
911 :
912 7607 : nsCSSTokenType type = eCSSToken_Number;
913 :
914 : // Set mIntegerValid for all cases (except %, below) because we need
915 : // it for the "2n" in :nth-child(2n).
916 7607 : aToken.mIntegerValid = false;
917 :
918 : // Time to reassemble our number.
919 : // Do all the math in double precision so it's truncated only once.
920 7607 : double value = sign * (intPart + fracPart);
921 7607 : if (gotE) {
922 : // Avoid multiplication of 0 by Infinity.
923 0 : if (value != 0.0) {
924 : // Explicitly cast expSign*exponent to double to avoid issues with
925 : // overloaded pow() on Windows.
926 0 : value *= pow(10.0, double(expSign * exponent));
927 : }
928 7607 : } else if (!gotDot) {
929 : // Clamp values outside of integer range.
930 7026 : if (sign > 0) {
931 6735 : aToken.mInteger = int32_t(std::min(intPart, double(INT32_MAX)));
932 : } else {
933 291 : aToken.mInteger = int32_t(std::max(-intPart, double(INT32_MIN)));
934 : }
935 7026 : aToken.mIntegerValid = true;
936 : }
937 :
938 7607 : nsString& ident = aToken.mIdent;
939 :
940 : // Check for Dimension and Percentage tokens.
941 7607 : if (c >= 0) {
942 7376 : if (StartsIdent(c, Peek(1))) {
943 3788 : if (GatherText(IS_IDCHAR, ident)) {
944 3788 : type = eCSSToken_Dimension;
945 : }
946 3588 : } else if (c == '%') {
947 758 : Advance();
948 758 : type = eCSSToken_Percentage;
949 758 : value = value / 100.0f;
950 758 : aToken.mIntegerValid = false;
951 : }
952 : }
953 7607 : MOZ_ASSERT(!IsNaN(value), "The value should not be NaN");
954 7607 : aToken.mNumber = value;
955 7607 : aToken.mType = type;
956 7607 : return true;
957 : }
958 :
959 : /**
960 : * Scan a string constant ('foo' or "foo"). Will always produce
961 : * either a String or a Bad_String token; the latter occurs when the
962 : * close quote is missing. Always returns true (for convenience in Next()).
963 : */
964 : bool
965 1855 : nsCSSScanner::ScanString(nsCSSToken& aToken)
966 : {
967 1855 : int32_t aStop = Peek();
968 1855 : MOZ_ASSERT(aStop == '"' || aStop == '\'', "should not have been called");
969 1855 : aToken.mType = eCSSToken_String;
970 1855 : aToken.mSymbol = char16_t(aStop); // Remember how it's quoted.
971 1855 : Advance();
972 :
973 : for (;;) {
974 1895 : GatherText(IS_STRING, aToken.mIdent);
975 :
976 1895 : int32_t ch = Peek();
977 1895 : if (ch == -1) {
978 0 : AddEOFCharacters(aStop == '"' ? eEOFCharacters_DoubleQuote :
979 0 : eEOFCharacters_SingleQuote);
980 0 : break; // EOF ends a string token with no error.
981 : }
982 1895 : if (ch == aStop) {
983 1855 : Advance();
984 1855 : break;
985 : }
986 : // Both " and ' are excluded from IS_STRING.
987 40 : if (ch == '"' || ch == '\'') {
988 40 : aToken.mIdent.Append(ch);
989 40 : Advance();
990 40 : continue;
991 : }
992 :
993 0 : mSeenBadToken = true;
994 0 : aToken.mType = eCSSToken_Bad_String;
995 0 : if (mReporter)
996 0 : mReporter->ReportUnexpected("SEUnterminatedString", aToken);
997 0 : break;
998 40 : }
999 1855 : return true;
1000 : }
1001 :
1002 : /**
1003 : * Scan a unicode-range token. These match the regular expression
1004 : *
1005 : * u\+[0-9a-f?]{1,6}(-[0-9a-f]{1,6})?
1006 : *
1007 : * However, some such tokens are "invalid". There are three valid forms:
1008 : *
1009 : * u+[0-9a-f]{x} 1 <= x <= 6
1010 : * u+[0-9a-f]{x}\?{y} 1 <= x+y <= 6
1011 : * u+[0-9a-f]{x}-[0-9a-f]{y} 1 <= x <= 6, 1 <= y <= 6
1012 : *
1013 : * All unicode-range tokens have their text recorded in mIdent; valid ones
1014 : * are also decoded into mInteger and mInteger2, and mIntegerValid is set.
1015 : * Note that this does not validate the numeric range, only the syntactic
1016 : * form.
1017 : */
1018 : bool
1019 0 : nsCSSScanner::ScanURange(nsCSSToken& aResult)
1020 : {
1021 0 : int32_t intro1 = Peek();
1022 0 : int32_t intro2 = Peek(1);
1023 0 : int32_t ch = Peek(2);
1024 :
1025 0 : MOZ_ASSERT((intro1 == 'u' || intro1 == 'U') &&
1026 : intro2 == '+' &&
1027 : (IsHexDigit(ch) || ch == '?'),
1028 : "should not have been called");
1029 :
1030 0 : aResult.mIdent.Append(intro1);
1031 0 : aResult.mIdent.Append(intro2);
1032 0 : Advance(2);
1033 :
1034 0 : bool valid = true;
1035 0 : bool haveQues = false;
1036 0 : uint32_t low = 0;
1037 0 : uint32_t high = 0;
1038 0 : int i = 0;
1039 :
1040 0 : do {
1041 0 : aResult.mIdent.Append(ch);
1042 0 : if (IsHexDigit(ch)) {
1043 0 : if (haveQues) {
1044 0 : valid = false; // All question marks should be at the end.
1045 : }
1046 0 : low = low*16 + HexDigitValue(ch);
1047 0 : high = high*16 + HexDigitValue(ch);
1048 : } else {
1049 0 : haveQues = true;
1050 0 : low = low*16 + 0x0;
1051 0 : high = high*16 + 0xF;
1052 : }
1053 :
1054 0 : i++;
1055 0 : Advance();
1056 0 : ch = Peek();
1057 0 : } while (i < 6 && (IsHexDigit(ch) || ch == '?'));
1058 :
1059 0 : if (ch == '-' && IsHexDigit(Peek(1))) {
1060 0 : if (haveQues) {
1061 0 : valid = false;
1062 : }
1063 :
1064 0 : aResult.mIdent.Append(ch);
1065 0 : Advance();
1066 0 : ch = Peek();
1067 0 : high = 0;
1068 0 : i = 0;
1069 0 : do {
1070 0 : aResult.mIdent.Append(ch);
1071 0 : high = high*16 + HexDigitValue(ch);
1072 :
1073 0 : i++;
1074 0 : Advance();
1075 0 : ch = Peek();
1076 0 : } while (i < 6 && IsHexDigit(ch));
1077 : }
1078 :
1079 0 : aResult.mInteger = low;
1080 0 : aResult.mInteger2 = high;
1081 0 : aResult.mIntegerValid = valid;
1082 0 : aResult.mType = eCSSToken_URange;
1083 0 : return true;
1084 : }
1085 :
1086 : #ifdef DEBUG
1087 : /* static */ void
1088 614 : nsCSSScanner::AssertEOFCharactersValid(uint32_t c)
1089 : {
1090 614 : MOZ_ASSERT(c == eEOFCharacters_None ||
1091 : c == eEOFCharacters_ReplacementChar ||
1092 : c == eEOFCharacters_Slash ||
1093 : c == (eEOFCharacters_Asterisk |
1094 : eEOFCharacters_Slash) ||
1095 : c == eEOFCharacters_DoubleQuote ||
1096 : c == eEOFCharacters_SingleQuote ||
1097 : c == (eEOFCharacters_DropBackslash |
1098 : eEOFCharacters_DoubleQuote) ||
1099 : c == (eEOFCharacters_DropBackslash |
1100 : eEOFCharacters_SingleQuote) ||
1101 : c == eEOFCharacters_CloseParen ||
1102 : c == (eEOFCharacters_ReplacementChar |
1103 : eEOFCharacters_CloseParen) ||
1104 : c == (eEOFCharacters_DoubleQuote |
1105 : eEOFCharacters_CloseParen) ||
1106 : c == (eEOFCharacters_SingleQuote |
1107 : eEOFCharacters_CloseParen) ||
1108 : c == (eEOFCharacters_DropBackslash |
1109 : eEOFCharacters_DoubleQuote |
1110 : eEOFCharacters_CloseParen) ||
1111 : c == (eEOFCharacters_DropBackslash |
1112 : eEOFCharacters_SingleQuote |
1113 : eEOFCharacters_CloseParen),
1114 : "invalid EOFCharacters value");
1115 614 : }
1116 : #endif
1117 :
1118 : void
1119 0 : nsCSSScanner::SetEOFCharacters(uint32_t aEOFCharacters)
1120 : {
1121 0 : mEOFCharacters = EOFCharacters(aEOFCharacters);
1122 0 : }
1123 :
1124 : void
1125 0 : nsCSSScanner::AddEOFCharacters(uint32_t aEOFCharacters)
1126 : {
1127 0 : mEOFCharacters = EOFCharacters(mEOFCharacters | aEOFCharacters);
1128 0 : }
1129 :
1130 : static const char16_t kImpliedEOFCharacters[] = {
1131 : UCS2_REPLACEMENT_CHAR, '*', '/', '"', '\'', ')', 0
1132 : };
1133 :
1134 : /* static */ void
1135 141 : nsCSSScanner::AppendImpliedEOFCharacters(EOFCharacters aEOFCharacters,
1136 : nsAString& aResult)
1137 : {
1138 : // First, ignore eEOFCharacters_DropBackslash.
1139 141 : uint32_t c = aEOFCharacters >> 1;
1140 :
1141 : // All of the remaining EOFCharacters bits represent appended characters,
1142 : // and the bits are in the order that they need appending.
1143 141 : for (const char16_t* p = kImpliedEOFCharacters; *p && c; p++, c >>= 1) {
1144 0 : if (c & 1) {
1145 0 : aResult.Append(*p);
1146 : }
1147 : }
1148 :
1149 141 : MOZ_ASSERT(c == 0, "too many bits in mEOFCharacters");
1150 141 : }
1151 :
1152 : /**
1153 : * Consume the part of an URL token after the initial 'url('. Caller
1154 : * is assumed to have consumed 'url(' already. Will always produce
1155 : * either an URL or a Bad_URL token.
1156 : *
1157 : * Exposed for use by nsCSSParser::ParseMozDocumentRule, which applies
1158 : * the special lexical rules for URL tokens in a nonstandard context.
1159 : */
1160 : void
1161 754 : nsCSSScanner::NextURL(nsCSSToken& aToken)
1162 : {
1163 754 : SkipWhitespace();
1164 :
1165 : // aToken.mIdent may be "url" at this point; clear that out
1166 754 : aToken.mIdent.Truncate();
1167 :
1168 754 : int32_t ch = Peek();
1169 : // Do we have a string?
1170 1266 : if (ch == '"' || ch == '\'') {
1171 512 : ScanString(aToken);
1172 512 : if (MOZ_UNLIKELY(aToken.mType == eCSSToken_Bad_String)) {
1173 0 : aToken.mType = eCSSToken_Bad_URL;
1174 0 : return;
1175 : }
1176 512 : MOZ_ASSERT(aToken.mType == eCSSToken_String, "unexpected token type");
1177 :
1178 : } else {
1179 : // Otherwise, this is the start of a non-quoted url (which may be empty).
1180 242 : aToken.mSymbol = char16_t(0);
1181 242 : GatherText(IS_URL_CHAR, aToken.mIdent);
1182 : }
1183 :
1184 : // Consume trailing whitespace and then look for a close parenthesis.
1185 754 : SkipWhitespace();
1186 754 : ch = Peek();
1187 : // ch can be less than zero indicating EOF
1188 754 : if (MOZ_LIKELY(ch < 0 || ch == ')')) {
1189 754 : Advance();
1190 754 : aToken.mType = eCSSToken_URL;
1191 1508 : if (ch < 0) {
1192 0 : AddEOFCharacters(eEOFCharacters_CloseParen);
1193 : }
1194 : } else {
1195 0 : mSeenBadToken = true;
1196 0 : aToken.mType = eCSSToken_Bad_URL;
1197 : }
1198 : }
1199 :
1200 : /**
1201 : * Primary scanner entry point. Consume one token and fill in
1202 : * |aToken| accordingly. Will skip over any number of comments first,
1203 : * and will also skip over rather than return whitespace and comment
1204 : * tokens, depending on the value of |aSkip|.
1205 : *
1206 : * Returns true if it successfully consumed a token, false if EOF has
1207 : * been reached. Will always advance the current read position by at
1208 : * least one character unless called when already at EOF.
1209 : */
1210 : bool
1211 100541 : nsCSSScanner::Next(nsCSSToken& aToken, nsCSSScannerExclude aSkip)
1212 : {
1213 : int32_t ch;
1214 :
1215 : // do this here so we don't have to do it in dozens of other places
1216 100541 : aToken.mIdent.Truncate();
1217 100541 : aToken.mType = eCSSToken_Symbol;
1218 :
1219 : for (;;) {
1220 : // Consume any number of comments, and possibly also whitespace tokens,
1221 : // in between other tokens.
1222 169139 : mTokenOffset = mOffset;
1223 134840 : mTokenLineOffset = mLineOffset;
1224 134840 : mTokenLineNumber = mLineNumber;
1225 :
1226 134840 : ch = Peek();
1227 134840 : if (IsWhitespace(ch)) {
1228 42081 : SkipWhitespace();
1229 42081 : if (aSkip != eCSSScannerExclude_WhitespaceAndComments) {
1230 8872 : aToken.mType = eCSSToken_Whitespace;
1231 8872 : return true;
1232 : }
1233 33209 : continue; // start again at the beginning
1234 : }
1235 92759 : if (ch == '/' && Peek(1) == '*') {
1236 1090 : SkipComment();
1237 1090 : if (aSkip == eCSSScannerExclude_None) {
1238 0 : aToken.mType = eCSSToken_Comment;
1239 0 : return true;
1240 : }
1241 1090 : continue; // start again at the beginning
1242 : }
1243 91669 : break;
1244 : }
1245 :
1246 : // EOF
1247 91669 : if (ch < 0) {
1248 5456 : return false;
1249 : }
1250 :
1251 : // 'u' could be UNICODE-RANGE or an identifier-family token
1252 86213 : if (ch == 'u' || ch == 'U') {
1253 974 : int32_t c2 = Peek(1);
1254 974 : int32_t c3 = Peek(2);
1255 974 : if (c2 == '+' && (IsHexDigit(c3) || c3 == '?')) {
1256 0 : return ScanURange(aToken);
1257 : }
1258 974 : return ScanIdent(aToken);
1259 : }
1260 :
1261 : // identifier family
1262 85239 : if (IsIdentStart(ch)) {
1263 25547 : return ScanIdent(aToken);
1264 : }
1265 :
1266 : // number family
1267 59692 : if (IsDigit(ch)) {
1268 6997 : return ScanNumber(aToken);
1269 : }
1270 :
1271 52695 : if (ch == '.' && IsDigit(Peek(1))) {
1272 308 : return ScanNumber(aToken);
1273 : }
1274 :
1275 52387 : if (ch == '+') {
1276 296 : int32_t c2 = Peek(1);
1277 296 : if (IsDigit(c2) || (c2 == '.' && IsDigit(Peek(2)))) {
1278 0 : return ScanNumber(aToken);
1279 : }
1280 : }
1281 :
1282 : // '-' can start an identifier-family token, a number-family token,
1283 : // or an HTML-comment
1284 52387 : if (ch == '-') {
1285 4492 : int32_t c2 = Peek(1);
1286 4492 : int32_t c3 = Peek(2);
1287 4492 : if (IsIdentStart(c2) || (c2 == '-' && c3 != '>')) {
1288 4092 : return ScanIdent(aToken);
1289 : }
1290 400 : if (IsDigit(c2) || (c2 == '.' && IsDigit(c3))) {
1291 302 : return ScanNumber(aToken);
1292 : }
1293 98 : if (c2 == '-' && c3 == '>') {
1294 0 : Advance(3);
1295 0 : aToken.mType = eCSSToken_HTMLComment;
1296 0 : aToken.mIdent.AssignLiteral("-->");
1297 0 : return true;
1298 : }
1299 : }
1300 :
1301 : // the other HTML-comment token
1302 47993 : if (ch == '<' && Peek(1) == '!' && Peek(2) == '-' && Peek(3) == '-') {
1303 0 : Advance(4);
1304 0 : aToken.mType = eCSSToken_HTMLComment;
1305 0 : aToken.mIdent.AssignLiteral("<!--");
1306 0 : return true;
1307 : }
1308 :
1309 : // AT_KEYWORD
1310 47993 : if (ch == '@') {
1311 286 : return ScanAtKeyword(aToken);
1312 : }
1313 :
1314 : // HASH
1315 47707 : if (ch == '#') {
1316 2386 : return ScanHash(aToken);
1317 : }
1318 :
1319 : // STRING
1320 45321 : if (ch == '"' || ch == '\'') {
1321 1343 : return ScanString(aToken);
1322 : }
1323 :
1324 : // Match operators: ~= |= ^= $= *=
1325 43978 : nsCSSTokenType opType = MatchOperatorType(ch);
1326 43978 : if (opType != eCSSToken_Symbol && Peek(1) == '=') {
1327 117 : aToken.mType = opType;
1328 117 : Advance(2);
1329 117 : return true;
1330 : }
1331 :
1332 : // Otherwise, a symbol (DELIM).
1333 43861 : aToken.mSymbol = ch;
1334 43861 : Advance();
1335 43861 : return true;
1336 : }
1337 :
1338 : /* nsCSSGridTemplateAreaScanner methods. */
1339 :
1340 0 : nsCSSGridTemplateAreaScanner::nsCSSGridTemplateAreaScanner(const nsAString& aBuffer)
1341 0 : : mBuffer(aBuffer.BeginReading())
1342 : , mOffset(0)
1343 0 : , mCount(aBuffer.Length())
1344 : {
1345 0 : }
1346 :
1347 : bool
1348 0 : nsCSSGridTemplateAreaScanner::Next(nsCSSGridTemplateAreaToken& aTokenResult)
1349 : {
1350 : int32_t ch;
1351 : // Skip whitespace
1352 0 : do {
1353 0 : if (mOffset >= mCount) {
1354 0 : return false;
1355 : }
1356 0 : ch = mBuffer[mOffset];
1357 0 : mOffset++;
1358 : } while (IsWhitespace(ch));
1359 :
1360 0 : if (IsOpenCharClass(ch, IS_IDCHAR)) {
1361 : // Named cell token
1362 0 : uint32_t start = mOffset - 1; // offset of |ch|
1363 0 : while (mOffset < mCount && IsOpenCharClass(mBuffer[mOffset], IS_IDCHAR)) {
1364 0 : mOffset++;
1365 : }
1366 0 : aTokenResult.mName.Assign(&mBuffer[start], mOffset - start);
1367 0 : aTokenResult.isTrash = false;
1368 0 : } else if (ch == '.') {
1369 : // Null cell token
1370 : // Skip any other '.'
1371 0 : while (mOffset < mCount && mBuffer[mOffset] == '.') {
1372 0 : mOffset++;
1373 : }
1374 0 : aTokenResult.mName.Truncate();
1375 0 : aTokenResult.isTrash = false;
1376 : } else {
1377 : // Trash token
1378 0 : aTokenResult.isTrash = true;
1379 : }
1380 0 : return true;
1381 : }
|