LCOV - code coverage report
Current view: top level - js/src/irregexp - RegExpParser.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 433 1063 40.7 %
Date: 2017-07-14 16:53:18 Functions: 38 83 45.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2             :  * vim: set ts=8 sts=4 et sw=4 tw=99: */
       3             : 
       4             : // Copyright 2012 the V8 project authors. All rights reserved.
       5             : // Redistribution and use in source and binary forms, with or without
       6             : // modification, are permitted provided that the following conditions are
       7             : // met:
       8             : //
       9             : //     * Redistributions of source code must retain the above copyright
      10             : //       notice, this list of conditions and the following disclaimer.
      11             : //     * Redistributions in binary form must reproduce the above
      12             : //       copyright notice, this list of conditions and the following
      13             : //       disclaimer in the documentation and/or other materials provided
      14             : //       with the distribution.
      15             : //     * Neither the name of Google Inc. nor the names of its
      16             : //       contributors may be used to endorse or promote products derived
      17             : //       from this software without specific prior written permission.
      18             : //
      19             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      20             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      21             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      22             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      23             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      24             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      25             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      26             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      27             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      28             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      29             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             : 
      31             : #include "irregexp/RegExpParser.h"
      32             : 
      33             : #include "mozilla/ArrayUtils.h"
      34             : #include "mozilla/Move.h"
      35             : 
      36             : #include "frontend/TokenStream.h"
      37             : #include "irregexp/RegExpCharacters.h"
      38             : #include "vm/ErrorReporting.h"
      39             : #include "vm/StringBuffer.h"
      40             : 
      41             : using namespace js;
      42             : using namespace js::irregexp;
      43             : 
      44             : using mozilla::Move;
      45             : using mozilla::PointerRangeSize;
      46             : 
      47             : // ----------------------------------------------------------------------------
      48             : // RegExpBuilder
      49             : 
      50        1121 : RegExpBuilder::RegExpBuilder(LifoAlloc* alloc)
      51             :   : alloc(alloc),
      52             :     pending_empty_(false),
      53             :     characters_(nullptr)
      54             : #ifdef DEBUG
      55        1121 :   , last_added_(ADD_NONE)
      56             : #endif
      57        1121 : {}
      58             : 
      59             : void
      60        3717 : RegExpBuilder::FlushCharacters()
      61             : {
      62        3717 :     pending_empty_ = false;
      63        3717 :     if (characters_ != nullptr) {
      64        1134 :         RegExpTree* atom = alloc->newInfallible<RegExpAtom>(characters_);
      65        1134 :         characters_ = nullptr;
      66        1134 :         text_.Add(alloc, atom);
      67             : #ifdef DEBUG
      68        1134 :         last_added_ = ADD_ATOM;
      69             : #endif
      70             :     }
      71        3717 : }
      72             : 
      73             : void
      74        2991 : RegExpBuilder::FlushText()
      75             : {
      76        2991 :     FlushCharacters();
      77        2991 :     int num_text = text_.length();
      78        2991 :     if (num_text == 0)
      79        1670 :         return;
      80        1321 :     if (num_text == 1) {
      81        1285 :         terms_.Add(alloc, text_.last());
      82             :     } else {
      83          36 :         RegExpText* text = alloc->newInfallible<RegExpText>(alloc);
      84         111 :         for (int i = 0; i < num_text; i++)
      85          75 :             text_.Get(i)->AppendToText(text);
      86          36 :         terms_.Add(alloc, text);
      87             :     }
      88        1321 :     text_.Clear();
      89             : }
      90             : 
      91             : void
      92        5130 : RegExpBuilder::AddCharacter(char16_t c)
      93             : {
      94        5130 :     pending_empty_ = false;
      95        5130 :     if (characters_ == nullptr)
      96        1195 :         characters_ = alloc->newInfallible<CharacterVector>(*alloc);
      97        5130 :     characters_->append(c);
      98             : #ifdef DEBUG
      99        5130 :     last_added_ = ADD_CHAR;
     100             : #endif
     101        5130 : }
     102             : 
     103             : void
     104           0 : RegExpBuilder::AddEmpty()
     105             : {
     106           0 :     pending_empty_ = true;
     107           0 : }
     108             : 
     109             : void
     110        1088 : RegExpBuilder::AddAtom(RegExpTree* term)
     111             : {
     112        1088 :     if (term->IsEmpty()) {
     113           0 :         AddEmpty();
     114           0 :         return;
     115             :     }
     116        1088 :     if (term->IsTextElement()) {
     117         726 :         FlushCharacters();
     118         726 :         text_.Add(alloc, term);
     119             :     } else {
     120         362 :         FlushText();
     121         362 :         terms_.Add(alloc, term);
     122             :     }
     123             : #ifdef DEBUG
     124        1088 :     last_added_ = ADD_ATOM;
     125             : #endif
     126             : }
     127             : 
     128             : void
     129         489 : RegExpBuilder::AddAssertion(RegExpTree* assert)
     130             : {
     131         489 :     FlushText();
     132         489 :     terms_.Add(alloc, assert);
     133             : #ifdef DEBUG
     134         489 :     last_added_ = ADD_ASSERT;
     135             : #endif
     136         489 : }
     137             : 
     138             : void
     139         430 : RegExpBuilder::NewAlternative()
     140             : {
     141         430 :     FlushTerms();
     142         430 : }
     143             : 
     144             : void
     145        1551 : RegExpBuilder::FlushTerms()
     146             : {
     147        1551 :     FlushText();
     148        1551 :     int num_terms = terms_.length();
     149             :     RegExpTree* alternative;
     150        1551 :     if (num_terms == 0)
     151           2 :         alternative = RegExpEmpty::GetInstance();
     152        1549 :     else if (num_terms == 1)
     153         988 :         alternative = terms_.last();
     154             :     else
     155         561 :         alternative = alloc->newInfallible<RegExpAlternative>(terms_.GetList(alloc));
     156        1551 :     alternatives_.Add(alloc, alternative);
     157        1551 :     terms_.Clear();
     158             : #ifdef DEBUG
     159        1551 :     last_added_ = ADD_NONE;
     160             : #endif
     161        1551 : }
     162             : 
     163             : RegExpTree*
     164        1121 : RegExpBuilder::ToRegExp()
     165             : {
     166        1121 :     FlushTerms();
     167        1121 :     int num_alternatives = alternatives_.length();
     168        1121 :     if (num_alternatives == 0) {
     169           0 :         return RegExpEmpty::GetInstance();
     170             :     }
     171        1121 :     if (num_alternatives == 1) {
     172         957 :         return alternatives_.last();
     173             :     }
     174         164 :     return alloc->newInfallible<RegExpDisjunction>(alternatives_.GetList(alloc));
     175             : }
     176             : 
     177             : void
     178         652 : RegExpBuilder::AddQuantifierToAtom(int min, int max,
     179             :                                    RegExpQuantifier::QuantifierType quantifier_type)
     180             : {
     181         652 :     if (pending_empty_) {
     182           0 :         pending_empty_ = false;
     183           0 :         return;
     184             :     }
     185             :     RegExpTree* atom;
     186         652 :     if (characters_ != nullptr) {
     187          61 :         MOZ_ASSERT(last_added_ == ADD_CHAR);
     188             :         // Last atom was character.
     189          61 :         CharacterVector* char_vector = characters_;
     190          61 :         int num_chars = char_vector->length();
     191          61 :         if (num_chars > 1) {
     192          28 :             CharacterVector* prefix = alloc->newInfallible<CharacterVector>(*alloc);
     193          28 :             prefix->append(char_vector->begin(), num_chars - 1);
     194          28 :             text_.Add(alloc, alloc->newInfallible<RegExpAtom>(prefix));
     195          28 :             char_vector = alloc->newInfallible<CharacterVector>(*alloc);
     196          28 :             char_vector->append((*characters_)[num_chars - 1]);
     197             :         }
     198          61 :         characters_ = nullptr;
     199          61 :         atom = alloc->newInfallible<RegExpAtom>(char_vector);
     200          61 :         FlushText();
     201         591 :     } else if (text_.length() > 0) {
     202         528 :         MOZ_ASSERT(last_added_ == ADD_ATOM);
     203         528 :         atom = text_.RemoveLast();
     204         528 :         FlushText();
     205          63 :     } else if (terms_.length() > 0) {
     206          63 :         MOZ_ASSERT(last_added_ == ADD_ATOM);
     207          63 :         atom = terms_.RemoveLast();
     208          63 :         if (atom->max_match() == 0) {
     209             :             // Guaranteed to only match an empty string.
     210             : #ifdef DEBUG
     211           0 :             last_added_ = ADD_TERM;
     212             : #endif
     213           0 :             if (min == 0)
     214           0 :                 return;
     215           0 :             terms_.Add(alloc, atom);
     216           0 :             return;
     217             :         }
     218             :     } else {
     219             :         // Only call immediately after adding an atom or character!
     220           0 :         MOZ_CRASH("Bad call");
     221             :     }
     222         652 :     terms_.Add(alloc, alloc->newInfallible<RegExpQuantifier>(min, max, quantifier_type, atom));
     223             : #ifdef DEBUG
     224         652 :     last_added_ = ADD_TERM;
     225             : #endif
     226             : }
     227             : 
     228             : // ----------------------------------------------------------------------------
     229             : // RegExpParser
     230             : 
     231             : template <typename CharT>
     232         757 : RegExpParser<CharT>::RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
     233             :                                   const CharT* chars, const CharT* end, bool multiline_mode,
     234             :                                   bool unicode, bool ignore_case)
     235             :   : ts(ts),
     236             :     alloc(alloc),
     237             :     captures_(nullptr),
     238             :     start_(chars),
     239         757 :     next_pos_(start_),
     240             :     end_(end),
     241             :     current_(kEndMarker),
     242             :     capture_count_(0),
     243             :     has_more_(true),
     244             :     multiline_(multiline_mode),
     245             :     unicode_(unicode),
     246             :     ignore_case_(ignore_case),
     247             :     simple_(false),
     248             :     contains_anchor_(false),
     249        1514 :     is_scanned_for_captures_(false)
     250             : {
     251         757 :     Advance();
     252         757 : }
     253             : 
     254             : template <typename CharT>
     255             : void
     256           0 : RegExpParser<CharT>::SyntaxError(unsigned errorNumber, ...)
     257             : {
     258           0 :     ErrorMetadata err;
     259             : 
     260           0 :     ts.fillExcludingContext(&err, ts.currentToken().pos.begin);
     261             : 
     262             :     // For most error reporting, the line of context derives from the token
     263             :     // stream.  So when location information doesn't come from the token
     264             :     // stream, we can't give a line of context.  But here the "line of context"
     265             :     // can be (and is) derived from the pattern text, so we can provide it no
     266             :     // matter if the location is derived from the caller.
     267           0 :     size_t offset = PointerRangeSize(start_, next_pos_ - 1);
     268           0 :     size_t end = PointerRangeSize(start_, end_);
     269             : 
     270             :     const CharT* windowStart = (offset > ErrorMetadata::lineOfContextRadius)
     271           0 :                                ? start_ + (offset - ErrorMetadata::lineOfContextRadius)
     272           0 :                                : start_;
     273             : 
     274           0 :     const CharT* windowEnd = (end - offset > ErrorMetadata::lineOfContextRadius)
     275           0 :                              ? start_ + offset + ErrorMetadata::lineOfContextRadius
     276           0 :                              : end_;
     277             : 
     278           0 :     size_t windowLength = PointerRangeSize(windowStart, windowEnd);
     279           0 :     MOZ_ASSERT(windowLength <= ErrorMetadata::lineOfContextRadius * 2);
     280             : 
     281             :     // Create the windowed string, not including the potential line
     282             :     // terminator.
     283           0 :     StringBuffer windowBuf(ts.context());
     284           0 :     if (!windowBuf.append(windowStart, windowEnd))
     285           0 :         return;
     286             : 
     287             :     // The line of context must be null-terminated, and StringBuffer doesn't
     288             :     // make that happen unless we force it to.
     289           0 :     if (!windowBuf.append('\0'))
     290           0 :         return;
     291             : 
     292           0 :     err.lineOfContext.reset(windowBuf.stealChars());
     293           0 :     if (!err.lineOfContext)
     294           0 :         return;
     295             : 
     296           0 :     err.lineLength = windowLength;
     297           0 :     err.tokenOffset = offset - (windowStart - start_);
     298             : 
     299             :     va_list args;
     300           0 :     va_start(args, errorNumber);
     301             : 
     302           0 :     ReportCompileError(ts.context(), Move(err), nullptr, JSREPORT_ERROR, errorNumber, args);
     303             : 
     304           0 :     va_end(args);
     305             : }
     306             : 
     307             : template <typename CharT>
     308             : RegExpTree*
     309           0 : RegExpParser<CharT>::ReportError(unsigned errorNumber, const char* param /* = nullptr */)
     310             : {
     311           0 :     gc::AutoSuppressGC suppressGC(ts.context());
     312           0 :     SyntaxError(errorNumber, param);
     313           0 :     return nullptr;
     314             : }
     315             : 
     316             : template <typename CharT>
     317             : void
     318       12344 : RegExpParser<CharT>::Advance()
     319             : {
     320       12344 :     if (next_pos_ < end_) {
     321       11587 :         current_ = *next_pos_;
     322       11587 :         next_pos_++;
     323             :     } else {
     324         757 :         current_ = kEndMarker;
     325         757 :         has_more_ = false;
     326             :     }
     327       12344 : }
     328             : 
     329             : // Returns the value (0 .. 15) of a hexadecimal character c.
     330             : // If c is not a legal hexadecimal character, returns a value < 0.
     331             : inline int
     332         638 : HexValue(uint32_t c)
     333             : {
     334         638 :     c -= '0';
     335         638 :     if (static_cast<unsigned>(c) <= 9) return c;
     336         239 :     c = (c | 0x20) - ('a' - '0');  // detect 0x11..0x16 and 0x31..0x36.
     337         239 :     if (static_cast<unsigned>(c) <= 5) return c + 10;
     338           0 :     return -1;
     339             : }
     340             : 
     341             : template <typename CharT>
     342             : widechar
     343           6 : RegExpParser<CharT>::ParseOctalLiteral()
     344             : {
     345           6 :     MOZ_ASSERT('0' <= current() && current() <= '7');
     346             :     // For compatibility with some other browsers (not all), we parse
     347             :     // up to three octal digits with a value below 256.
     348           6 :     widechar value = current() - '0';
     349           6 :     Advance();
     350           6 :     if ('0' <= current() && current() <= '7') {
     351           0 :         value = value * 8 + current() - '0';
     352           0 :         Advance();
     353           0 :         if (value < 32 && '0' <= current() && current() <= '7') {
     354           0 :             value = value * 8 + current() - '0';
     355           0 :             Advance();
     356             :         }
     357             :     }
     358           6 :     return value;
     359             : }
     360             : 
     361             : template <typename CharT>
     362             : bool
     363         167 : RegExpParser<CharT>::ParseHexEscape(int length, widechar* value)
     364             : {
     365         167 :     const CharT* start = position();
     366         167 :     uint32_t val = 0;
     367         167 :     bool done = false;
     368         805 :     for (int i = 0; !done; i++) {
     369         638 :         widechar c = current();
     370         638 :         int d = HexValue(c);
     371         638 :         if (d < 0) {
     372           0 :             Reset(start);
     373           0 :             return false;
     374             :         }
     375         638 :         val = val * 16 + d;
     376         638 :         Advance();
     377         638 :         if (i == length - 1) {
     378         167 :             done = true;
     379             :         }
     380             :     }
     381         167 :     *value = val;
     382         167 :     return true;
     383             : }
     384             : 
     385             : template <typename CharT>
     386             : bool
     387           0 : RegExpParser<CharT>::ParseBracedHexEscape(widechar* value)
     388             : {
     389           0 :     MOZ_ASSERT(current() == '{');
     390           0 :     Advance();
     391             : 
     392           0 :     bool first = true;
     393           0 :     uint32_t code = 0;
     394           0 :     while (true) {
     395           0 :         widechar c = current();
     396           0 :         if (c == kEndMarker) {
     397           0 :             ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
     398           0 :             return false;
     399             :         }
     400           0 :         if (c == '}') {
     401           0 :             if (first) {
     402           0 :                 ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
     403           0 :                 return false;
     404             :             }
     405           0 :             Advance();
     406           0 :             break;
     407             :         }
     408             : 
     409           0 :         int d = HexValue(c);
     410           0 :         if (d < 0) {
     411           0 :             ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
     412           0 :             return false;
     413             :         }
     414           0 :         code = (code << 4) | d;
     415           0 :         if (code > unicode::NonBMPMax) {
     416           0 :             ReportError(JSMSG_UNICODE_OVERFLOW, "regular expression");
     417           0 :             return false;
     418             :         }
     419           0 :         Advance();
     420           0 :         first = false;
     421             :     }
     422             : 
     423           0 :     *value = code;
     424           0 :     return true;
     425             : }
     426             : 
     427             : template <typename CharT>
     428             : bool
     429           0 : RegExpParser<CharT>::ParseTrailSurrogate(widechar* value)
     430             : {
     431           0 :     if (current() != '\\')
     432           0 :         return false;
     433             : 
     434           0 :     const CharT* start = position();
     435           0 :     Advance();
     436           0 :     if (current() != 'u') {
     437           0 :         Reset(start);
     438           0 :         return false;
     439             :     }
     440           0 :     Advance();
     441           0 :     if (!ParseHexEscape(4, value)) {
     442           0 :         Reset(start);
     443           0 :         return false;
     444             :     }
     445           0 :     if (!unicode::IsTrailSurrogate(*value)) {
     446           0 :         Reset(start);
     447           0 :         return false;
     448             :     }
     449           0 :     return true;
     450             : }
     451             : 
     452             : template <typename CharT>
     453             : bool
     454           0 : RegExpParser<CharT>::ParseRawSurrogatePair(char16_t* lead, char16_t* trail)
     455             : {
     456           0 :     widechar c1 = current();
     457           0 :     if (!unicode::IsLeadSurrogate(c1))
     458           0 :         return false;
     459             : 
     460           0 :     const CharT* start = position();
     461           0 :     Advance();
     462           0 :     widechar c2 = current();
     463           0 :     if (!unicode::IsTrailSurrogate(c2)) {
     464           0 :         Reset(start);
     465           0 :         return false;
     466             :     }
     467           0 :     Advance();
     468           0 :     *lead = c1;
     469           0 :     *trail = c2;
     470           0 :     return true;
     471             : }
     472             : 
     473             : static inline RegExpTree*
     474           0 : RangeAtom(LifoAlloc* alloc, char16_t from, char16_t to)
     475             : {
     476           0 :     CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
     477           0 :     ranges->append(CharacterRange::Range(from, to));
     478           0 :     return alloc->newInfallible<RegExpCharacterClass>(ranges, false);
     479             : }
     480             : 
     481             : static inline RegExpTree*
     482           0 : NegativeLookahead(LifoAlloc* alloc, char16_t from, char16_t to)
     483             : {
     484           0 :     return alloc->newInfallible<RegExpLookahead>(RangeAtom(alloc, from, to), false, 0, 0);
     485             : }
     486             : 
     487             : static bool
     488           0 : IsSyntaxCharacter(widechar c)
     489             : {
     490           0 :   switch (c) {
     491             :     case '^':
     492             :     case '$':
     493             :     case '\\':
     494             :     case '.':
     495             :     case '*':
     496             :     case '+':
     497             :     case '?':
     498             :     case '(':
     499             :     case ')':
     500             :     case '[':
     501             :     case ']':
     502             :     case '{':
     503             :     case '}':
     504             :     case '|':
     505             :     case '/':
     506           0 :       return true;
     507             :     default:
     508           0 :       return false;
     509             :   }
     510             : }
     511             : 
     512             : inline bool
     513         324 : IsInRange(int value, int lower_limit, int higher_limit)
     514             : {
     515         324 :     MOZ_ASSERT(lower_limit <= higher_limit);
     516         324 :     return static_cast<unsigned int>(value - lower_limit) <=
     517         324 :            static_cast<unsigned int>(higher_limit - lower_limit);
     518             : }
     519             : 
     520             : inline bool
     521         324 : IsDecimalDigit(widechar c)
     522             : {
     523             :     // ECMA-262, 3rd, 7.8.3 (p 16)
     524         324 :     return IsInRange(c, '0', '9');
     525             : }
     526             : 
     527             : #ifdef DEBUG
     528             : // Currently only used in an assert.kASSERT.
     529             : static bool
     530         362 : IsSpecialClassEscape(widechar c)
     531             : {
     532         362 :   switch (c) {
     533             :     case 'd': case 'D':
     534             :     case 's': case 'S':
     535             :     case 'w': case 'W':
     536           0 :       return true;
     537             :     default:
     538         362 :       return false;
     539             :   }
     540             : }
     541             : #endif
     542             : 
     543             : template <typename CharT>
     544             : bool
     545         362 : RegExpParser<CharT>::ParseClassCharacterEscape(widechar* code)
     546             : {
     547         362 :     MOZ_ASSERT(current() == '\\');
     548         362 :     MOZ_ASSERT(has_next() && !IsSpecialClassEscape(Next()));
     549         362 :     Advance();
     550         362 :     switch (current()) {
     551             :       case 'b':
     552           0 :         Advance();
     553           0 :         *code = '\b';
     554           0 :         return true;
     555             :       // ControlEscape :: one of
     556             :       //   f n r t v
     557             :       case 'f':
     558           4 :         Advance();
     559           4 :         *code = '\f';
     560           4 :         return true;
     561             :       case 'n':
     562           9 :         Advance();
     563           9 :         *code = '\n';
     564           9 :         return true;
     565             :       case 'r':
     566           9 :         Advance();
     567           9 :         *code = '\r';
     568           9 :         return true;
     569             :       case 't':
     570           7 :         Advance();
     571           7 :         *code = '\t';
     572           7 :         return true;
     573             :       case 'v':
     574           0 :         Advance();
     575           0 :         *code = '\v';
     576           0 :         return true;
     577             :       case 'c': {
     578           0 :         widechar controlLetter = Next();
     579           0 :         widechar letter = controlLetter & ~('A' ^ 'a');
     580             :         // For compatibility with JSC, inside a character class
     581             :         // we also accept digits and underscore as control characters,
     582             :         // but only in non-unicode mode
     583           0 :         if ((!unicode_ &&
     584           0 :              ((controlLetter >= '0' && controlLetter <= '9') ||
     585           0 :               controlLetter == '_')) ||
     586           0 :             (letter >= 'A' && letter <= 'Z'))
     587             :         {
     588           0 :             Advance(2);
     589             :             // Control letters mapped to ASCII control characters in the range
     590             :             // 0x00-0x1f.
     591           0 :             *code = controlLetter & 0x1f;
     592           0 :             return true;
     593             :         }
     594           0 :         if (unicode_) {
     595           0 :             ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
     596           0 :             return false;
     597             :         }
     598             :         // We match JSC in reading the backslash as a literal
     599             :         // character instead of as starting an escape.
     600           0 :         *code = '\\';
     601           0 :         return true;
     602             :       }
     603             :       case '0':
     604           4 :         if (unicode_) {
     605           0 :             Advance();
     606           0 :             if (IsDecimalDigit(current()))
     607           0 :                 return ReportError(JSMSG_INVALID_DECIMAL_ESCAPE);
     608           0 :             *code = 0;
     609           0 :             return true;
     610             :         }
     611             :         MOZ_FALLTHROUGH;
     612             :       case '1': case '2': case '3': case '4': case '5': case '6': case '7':
     613           4 :         if (unicode_) {
     614           0 :             ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
     615           0 :             return false;
     616             :         }
     617             :         // For compatibility, outside of unicode mode, we interpret a decimal
     618             :         // escape that isn't a back reference (and therefore either \0 or not
     619             :         // valid according to the specification) as a 1..3 digit octal
     620             :         // character code.
     621           4 :         *code = ParseOctalLiteral();
     622           4 :         return true;
     623             :       case 'x': {
     624           9 :         Advance();
     625             :         widechar value;
     626           9 :         if (ParseHexEscape(2, &value)) {
     627           9 :             *code = value;
     628           9 :             return true;
     629             :         }
     630           0 :         if (unicode_) {
     631           0 :             ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
     632           0 :             return false;
     633             :         }
     634             :         // If \x is not followed by a two-digit hexadecimal, treat it
     635             :         // as an identity escape in non-unicode mode.
     636           0 :         *code = 'x';
     637           0 :         return true;
     638             :       }
     639             :       case 'u': {
     640         147 :         Advance();
     641             :         widechar value;
     642         147 :         if (unicode_) {
     643           0 :             if (current() == '{') {
     644           0 :                 if (!ParseBracedHexEscape(&value))
     645           0 :                     return false;
     646           0 :                 *code = value;
     647           0 :                 return true;
     648             :             }
     649           0 :             if (ParseHexEscape(4, &value)) {
     650           0 :                 if (unicode::IsLeadSurrogate(value)) {
     651             :                     widechar trail;
     652           0 :                     if (ParseTrailSurrogate(&trail)) {
     653           0 :                         *code = unicode::UTF16Decode(value, trail);
     654           0 :                         return true;
     655             :                     }
     656             :                 }
     657           0 :                 *code = value;
     658           0 :                 return true;
     659             :             }
     660           0 :             ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
     661           0 :             return false;
     662             :         }
     663         147 :         if (ParseHexEscape(4, &value)) {
     664         147 :             *code = value;
     665         147 :             return true;
     666             :         }
     667             :         // If \u is not followed by a four-digit or braced hexadecimal, treat it
     668             :         // as an identity escape.
     669           0 :         *code = 'u';
     670           0 :         return true;
     671             :       }
     672             :       default: {
     673             :         // Extended identity escape (non-unicode only). We accept any character
     674             :         // that hasn't been matched by a more specific case, not just the subset
     675             :         // required by the ECMAScript specification.
     676         173 :         widechar result = current();
     677         173 :         if (unicode_ && result != '-' && !IsSyntaxCharacter(result)) {
     678           0 :             ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
     679           0 :             return false;
     680             :         }
     681         173 :         Advance();
     682         173 :         *code = result;
     683         173 :         return true;
     684             :       }
     685             :     }
     686             :     return true;
     687             : }
     688             : 
     689             : class WideCharRange
     690             : {
     691             :   public:
     692             :     WideCharRange()
     693             :       : from_(0), to_(0)
     694             :     {}
     695             : 
     696           0 :     WideCharRange(widechar from, widechar to)
     697           0 :       : from_(from), to_(to)
     698           0 :     {}
     699             : 
     700           0 :     static inline WideCharRange Singleton(widechar value) {
     701           0 :         return WideCharRange(value, value);
     702             :     }
     703           0 :     static inline WideCharRange Range(widechar from, widechar to) {
     704           0 :         MOZ_ASSERT(from <= to);
     705           0 :         return WideCharRange(from, to);
     706             :     }
     707             : 
     708           0 :     bool Contains(widechar i) const { return from_ <= i && i <= to_; }
     709           0 :     widechar from() const { return from_; }
     710           0 :     widechar to() const { return to_; }
     711             : 
     712             :   private:
     713             :     widechar from_;
     714             :     widechar to_;
     715             : };
     716             : 
     717             : typedef InfallibleVector<WideCharRange, 1> WideCharRangeVector;
     718             : 
     719             : static inline CharacterRange
     720           0 : LeadSurrogateRange()
     721             : {
     722           0 :     return CharacterRange::Range(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax);
     723             : }
     724             : 
     725             : static inline CharacterRange
     726           0 : TrailSurrogateRange()
     727             : {
     728           0 :     return CharacterRange::Range(unicode::TrailSurrogateMin, unicode::TrailSurrogateMax);
     729             : }
     730             : 
     731             : static inline WideCharRange
     732           0 : NonBMPRange()
     733             : {
     734           0 :     return WideCharRange::Range(unicode::NonBMPMin, unicode::NonBMPMax);
     735             : }
     736             : 
     737             : static const char16_t kNoCharClass = 0;
     738             : 
     739             : // Adds a character or pre-defined character class to character ranges.
     740             : // If char_class is not kInvalidClass, it's interpreted as a class
     741             : // escape (i.e., 's' means whitespace, from '\s').
     742             : static inline void
     743         615 : AddCharOrEscape(LifoAlloc* alloc,
     744             :                 CharacterRangeVector* ranges,
     745             :                 char16_t char_class,
     746             :                 widechar c)
     747             : {
     748         615 :     if (char_class != kNoCharClass)
     749          37 :         CharacterRange::AddClassEscape(alloc, char_class, ranges);
     750             :     else
     751         578 :         ranges->append(CharacterRange::Singleton(c));
     752         615 : }
     753             : 
     754             : static inline void
     755           0 : AddCharOrEscapeUnicode(LifoAlloc* alloc,
     756             :                        CharacterRangeVector* ranges,
     757             :                        CharacterRangeVector* lead_ranges,
     758             :                        CharacterRangeVector* trail_ranges,
     759             :                        WideCharRangeVector* wide_ranges,
     760             :                        char16_t char_class,
     761             :                        widechar c,
     762             :                        bool ignore_case)
     763             : {
     764           0 :     if (char_class != kNoCharClass) {
     765           0 :         CharacterRange::AddClassEscapeUnicode(alloc, char_class, ranges, ignore_case);
     766           0 :         switch (char_class) {
     767             :           case 'S':
     768             :           case 'W':
     769             :           case 'D':
     770           0 :             lead_ranges->append(LeadSurrogateRange());
     771           0 :             trail_ranges->append(TrailSurrogateRange());
     772           0 :             wide_ranges->append(NonBMPRange());
     773           0 :             break;
     774             :           case '.':
     775           0 :             MOZ_CRASH("Bad char_class!");
     776             :         }
     777           0 :         return;
     778             :     }
     779             : 
     780           0 :     if (unicode::IsLeadSurrogate(c))
     781           0 :         lead_ranges->append(CharacterRange::Singleton(c));
     782           0 :     else if (unicode::IsTrailSurrogate(c))
     783           0 :         trail_ranges->append(CharacterRange::Singleton(c));
     784           0 :     else if (c >= unicode::NonBMPMin)
     785           0 :         wide_ranges->append(WideCharRange::Singleton(c));
     786             :     else
     787           0 :         ranges->append(CharacterRange::Singleton(c));
     788             : }
     789             : 
     790             : static inline void
     791           0 : AddUnicodeRange(LifoAlloc* alloc,
     792             :                 CharacterRangeVector* ranges,
     793             :                 CharacterRangeVector* lead_ranges,
     794             :                 CharacterRangeVector* trail_ranges,
     795             :                 WideCharRangeVector* wide_ranges,
     796             :                 widechar first,
     797             :                 widechar next)
     798             : {
     799           0 :     MOZ_ASSERT(first <= next);
     800           0 :     if (first < unicode::LeadSurrogateMin) {
     801           0 :         if (next < unicode::LeadSurrogateMin) {
     802           0 :             ranges->append(CharacterRange::Range(first, next));
     803           0 :             return;
     804             :         }
     805           0 :         ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMin - 1));
     806           0 :         first = unicode::LeadSurrogateMin;
     807             :     }
     808           0 :     if (first <= unicode::LeadSurrogateMax) {
     809           0 :         if (next <= unicode::LeadSurrogateMax) {
     810           0 :             lead_ranges->append(CharacterRange::Range(first, next));
     811           0 :             return;
     812             :         }
     813           0 :         lead_ranges->append(CharacterRange::Range(first, unicode::LeadSurrogateMax));
     814           0 :         first = unicode::LeadSurrogateMax + 1;
     815             :     }
     816             :     MOZ_ASSERT(unicode::LeadSurrogateMax + 1 == unicode::TrailSurrogateMin);
     817           0 :     if (first <= unicode::TrailSurrogateMax) {
     818           0 :         if (next <= unicode::TrailSurrogateMax) {
     819           0 :             trail_ranges->append(CharacterRange::Range(first, next));
     820           0 :             return;
     821             :         }
     822           0 :         trail_ranges->append(CharacterRange::Range(first, unicode::TrailSurrogateMax));
     823           0 :         first = unicode::TrailSurrogateMax + 1;
     824             :     }
     825           0 :     if (first <= unicode::UTF16Max) {
     826           0 :         if (next <= unicode::UTF16Max) {
     827           0 :             ranges->append(CharacterRange::Range(first, next));
     828           0 :             return;
     829             :         }
     830           0 :         ranges->append(CharacterRange::Range(first, unicode::UTF16Max));
     831           0 :         first = unicode::NonBMPMin;
     832             :     }
     833             :     MOZ_ASSERT(unicode::UTF16Max + 1 == unicode::NonBMPMin);
     834           0 :     wide_ranges->append(WideCharRange::Range(first, next));
     835             : }
     836             : 
     837             : // Negate a vector of ranges by subtracting its ranges from a range
     838             : // encompassing the full range of possible values.
     839             : template <typename RangeType>
     840             : static inline void
     841           0 : NegateUnicodeRanges(LifoAlloc* alloc, InfallibleVector<RangeType, 1>** ranges,
     842             :                     RangeType full_range)
     843             : {
     844             :     typedef InfallibleVector<RangeType, 1> RangeVector;
     845           0 :     RangeVector* tmp_ranges = alloc->newInfallible<RangeVector>(*alloc);
     846           0 :     tmp_ranges->append(full_range);
     847           0 :     RangeVector* result_ranges = alloc->newInfallible<RangeVector>(*alloc);
     848             : 
     849             :     // Perform the following calculation:
     850             :     //   result_ranges = tmp_ranges - ranges
     851             :     // with the following steps:
     852             :     //   result_ranges = tmp_ranges - ranges[0]
     853             :     //   SWAP(result_ranges, tmp_ranges)
     854             :     //   result_ranges = tmp_ranges - ranges[1]
     855             :     //   SWAP(result_ranges, tmp_ranges)
     856             :     //   ...
     857             :     //   result_ranges = tmp_ranges - ranges[N-1]
     858             :     //   SWAP(result_ranges, tmp_ranges)
     859             :     // The last SWAP is just for simplicity of the loop.
     860           0 :     for (size_t i = 0; i < (*ranges)->length(); i++) {
     861           0 :         result_ranges->clear();
     862             : 
     863           0 :         const RangeType& range = (**ranges)[i];
     864           0 :         for (size_t j = 0; j < tmp_ranges->length(); j++) {
     865           0 :             const RangeType& tmpRange = (*tmp_ranges)[j];
     866           0 :             auto from1 = tmpRange.from();
     867           0 :             auto to1 = tmpRange.to();
     868           0 :             auto from2 = range.from();
     869           0 :             auto to2 = range.to();
     870             : 
     871           0 :             if (from1 < from2) {
     872           0 :                 if (to1 < from2) {
     873           0 :                     result_ranges->append(tmpRange);
     874           0 :                 } else if (to1 <= to2) {
     875           0 :                     result_ranges->append(RangeType::Range(from1, from2 - 1));
     876             :                 } else {
     877           0 :                     result_ranges->append(RangeType::Range(from1, from2 - 1));
     878           0 :                     result_ranges->append(RangeType::Range(to2 + 1, to1));
     879             :                 }
     880           0 :             } else if (from1 <= to2) {
     881           0 :                 if (to1 > to2)
     882           0 :                     result_ranges->append(RangeType::Range(to2 + 1, to1));
     883             :             } else {
     884           0 :                 result_ranges->append(tmpRange);
     885             :             }
     886             :         }
     887             : 
     888           0 :         auto tmp = tmp_ranges;
     889           0 :         tmp_ranges = result_ranges;
     890           0 :         result_ranges = tmp;
     891             :     }
     892             : 
     893             :     // After the loop, result is pointed at by tmp_ranges, instead of
     894             :     // result_ranges.
     895           0 :     *ranges = tmp_ranges;
     896           0 : }
     897             : 
     898             : static bool
     899           0 : WideCharRangesContain(WideCharRangeVector* wide_ranges, widechar c)
     900             : {
     901           0 :     for (size_t i = 0; i < wide_ranges->length(); i++) {
     902           0 :         const WideCharRange& range = (*wide_ranges)[i];
     903           0 :         if (range.Contains(c))
     904           0 :             return true;
     905             :     }
     906           0 :     return false;
     907             : }
     908             : 
     909             : static void
     910           0 : CalculateCaseInsensitiveRanges(LifoAlloc* alloc, widechar from, widechar to, int32_t diff,
     911             :                                WideCharRangeVector* wide_ranges,
     912             :                                WideCharRangeVector** tmp_wide_ranges)
     913             : {
     914           0 :     widechar contains_from = 0;
     915           0 :     widechar contains_to = 0;
     916           0 :     for (widechar c = from; c <= to; c++) {
     917           0 :         if (WideCharRangesContain(wide_ranges, c) &&
     918           0 :             !WideCharRangesContain(wide_ranges, c + diff))
     919             :         {
     920           0 :             if (contains_from == 0)
     921           0 :                 contains_from = c;
     922           0 :             contains_to = c;
     923           0 :         } else if (contains_from != 0) {
     924           0 :             if (!*tmp_wide_ranges)
     925           0 :                 *tmp_wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
     926             : 
     927           0 :             (*tmp_wide_ranges)->append(WideCharRange::Range(contains_from + diff,
     928           0 :                                                             contains_to + diff));
     929           0 :             contains_from = 0;
     930             :         }
     931             :     }
     932             : 
     933           0 :     if (contains_from != 0) {
     934           0 :         if (!*tmp_wide_ranges)
     935           0 :             *tmp_wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
     936             : 
     937           0 :         (*tmp_wide_ranges)->append(WideCharRange::Range(contains_from + diff,
     938           0 :                                                         contains_to + diff));
     939             :     }
     940           0 : }
     941             : 
     942             : static RegExpTree*
     943           0 : UnicodeRangesAtom(LifoAlloc* alloc,
     944             :                   CharacterRangeVector* ranges,
     945             :                   CharacterRangeVector* lead_ranges,
     946             :                   CharacterRangeVector* trail_ranges,
     947             :                   WideCharRangeVector* wide_ranges,
     948             :                   bool is_negated,
     949             :                   bool ignore_case)
     950             : {
     951             :     // Calculate case folding for non-BMP first and negate the range if needed.
     952           0 :     if (ignore_case) {
     953           0 :         WideCharRangeVector* tmp_wide_ranges = nullptr;
     954             : #define CALL_CALC(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) \
     955             :         CalculateCaseInsensitiveRanges(alloc, FROM, TO, DIFF, wide_ranges, &tmp_wide_ranges);
     956           0 :         FOR_EACH_NON_BMP_CASE_FOLDING(CALL_CALC)
     957           0 :         FOR_EACH_NON_BMP_REV_CASE_FOLDING(CALL_CALC)
     958             : #undef CALL_CALC
     959             : 
     960           0 :         if (tmp_wide_ranges) {
     961           0 :             for (size_t i = 0; i < tmp_wide_ranges->length(); i++)
     962           0 :                 wide_ranges->append((*tmp_wide_ranges)[i]);
     963             :         }
     964             :     }
     965             : 
     966           0 :     if (is_negated) {
     967           0 :         NegateUnicodeRanges(alloc, &lead_ranges, LeadSurrogateRange());
     968           0 :         NegateUnicodeRanges(alloc, &trail_ranges, TrailSurrogateRange());
     969           0 :         NegateUnicodeRanges(alloc, &wide_ranges, NonBMPRange());
     970             :     }
     971             : 
     972           0 :     RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
     973             : 
     974           0 :     bool added = false;
     975             : 
     976           0 :     if (is_negated) {
     977           0 :         ranges->append(LeadSurrogateRange());
     978           0 :         ranges->append(TrailSurrogateRange());
     979             :     }
     980           0 :     if (ranges->length() > 0) {
     981           0 :         builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated));
     982           0 :         added = true;
     983             :     }
     984             : 
     985           0 :     if (lead_ranges->length() > 0) {
     986           0 :         if (added)
     987           0 :             builder->NewAlternative();
     988           0 :         builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(lead_ranges, false));
     989           0 :         builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
     990           0 :                                            unicode::TrailSurrogateMax));
     991           0 :         added = true;
     992             :     }
     993             : 
     994           0 :     if (trail_ranges->length() > 0) {
     995           0 :         if (added)
     996           0 :             builder->NewAlternative();
     997           0 :         builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
     998           0 :             RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
     999           0 :         builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(trail_ranges, false));
    1000           0 :         added = true;
    1001             :     }
    1002             : 
    1003           0 :     for (size_t i = 0; i < wide_ranges->length(); i++) {
    1004           0 :         if (added)
    1005           0 :             builder->NewAlternative();
    1006             : 
    1007           0 :         const WideCharRange& range = (*wide_ranges)[i];
    1008           0 :         widechar from = range.from();
    1009           0 :         widechar to = range.to();
    1010             :         char16_t from_lead, from_trail;
    1011             :         char16_t to_lead, to_trail;
    1012             : 
    1013           0 :         unicode::UTF16Encode(from, &from_lead, &from_trail);
    1014           0 :         if (from == to) {
    1015           0 :             builder->AddCharacter(from_lead);
    1016           0 :             builder->AddCharacter(from_trail);
    1017             :         } else {
    1018           0 :             unicode::UTF16Encode(to, &to_lead, &to_trail);
    1019           0 :             if (from_lead == to_lead) {
    1020           0 :                 MOZ_ASSERT(from_trail != to_trail);
    1021           0 :                 builder->AddCharacter(from_lead);
    1022           0 :                 builder->AddAtom(RangeAtom(alloc, from_trail, to_trail));
    1023           0 :             } else if (from_trail == unicode::TrailSurrogateMin &&
    1024           0 :                        to_trail == unicode::TrailSurrogateMax)
    1025             :             {
    1026           0 :                 builder->AddAtom(RangeAtom(alloc, from_lead, to_lead));
    1027           0 :                 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin,
    1028           0 :                                            unicode::TrailSurrogateMax));
    1029           0 :             } else if (from_lead + 1 == to_lead) {
    1030           0 :                 builder->AddCharacter(from_lead);
    1031           0 :                 builder->AddAtom(RangeAtom(alloc, from_trail, unicode::TrailSurrogateMax));
    1032             : 
    1033           0 :                 builder->NewAlternative();
    1034             : 
    1035           0 :                 builder->AddCharacter(to_lead);
    1036           0 :                 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, to_trail));
    1037           0 :             } else if (from_lead + 2 == to_lead) {
    1038           0 :                 builder->AddCharacter(from_lead);
    1039           0 :                 builder->AddAtom(RangeAtom(alloc, from_trail, unicode::TrailSurrogateMax));
    1040             : 
    1041           0 :                 builder->NewAlternative();
    1042             : 
    1043           0 :                 builder->AddCharacter(from_lead + 1);
    1044           0 :                 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin,
    1045           0 :                                            unicode::TrailSurrogateMax));
    1046             : 
    1047           0 :                 builder->NewAlternative();
    1048             : 
    1049           0 :                 builder->AddCharacter(to_lead);
    1050           0 :                 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, to_trail));
    1051             :             } else {
    1052           0 :                 builder->AddCharacter(from_lead);
    1053           0 :                 builder->AddAtom(RangeAtom(alloc, from_trail, unicode::TrailSurrogateMax));
    1054             : 
    1055           0 :                 builder->NewAlternative();
    1056             : 
    1057           0 :                 builder->AddAtom(RangeAtom(alloc, from_lead + 1, to_lead - 1));
    1058           0 :                 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin,
    1059           0 :                                            unicode::TrailSurrogateMax));
    1060             : 
    1061           0 :                 builder->NewAlternative();
    1062             : 
    1063           0 :                 builder->AddCharacter(to_lead);
    1064           0 :                 builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, to_trail));
    1065             :             }
    1066             :         }
    1067           0 :         added = true;
    1068             :     }
    1069             : 
    1070           0 :     return builder->ToRegExp();
    1071             : }
    1072             : 
    1073             : template <typename CharT>
    1074             : RegExpTree*
    1075         377 : RegExpParser<CharT>::ParseCharacterClass()
    1076             : {
    1077         377 :     MOZ_ASSERT(current() == '[');
    1078         377 :     Advance();
    1079         377 :     bool is_negated = false;
    1080         377 :     if (current() == '^') {
    1081          91 :         is_negated = true;
    1082          91 :         Advance();
    1083             :     }
    1084         377 :     CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    1085         377 :     CharacterRangeVector* lead_ranges = nullptr;
    1086         377 :     CharacterRangeVector* trail_ranges = nullptr;
    1087         377 :     WideCharRangeVector* wide_ranges = nullptr;
    1088             : 
    1089         377 :     if (unicode_) {
    1090           0 :         lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    1091           0 :         trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    1092           0 :         wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
    1093             :     }
    1094             : 
    1095        2259 :     while (has_more() && current() != ']') {
    1096         946 :         char16_t char_class = kNoCharClass;
    1097         946 :         widechar first = 0;
    1098         946 :         if (!ParseClassAtom(&char_class, &first))
    1099           0 :             return nullptr;
    1100         946 :         if (current() == '-') {
    1101         336 :             Advance();
    1102         336 :             if (current() == kEndMarker) {
    1103             :                 // If we reach the end we break out of the loop and let the
    1104             :                 // following code report an error.
    1105           5 :                 break;
    1106         336 :             } else if (current() == ']') {
    1107           5 :                 if (unicode_) {
    1108           0 :                     AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
    1109           0 :                                            char_class, first, ignore_case_);
    1110             :                 } else {
    1111           5 :                     AddCharOrEscape(alloc, ranges, char_class, first);
    1112             :                 }
    1113           5 :                 ranges->append(CharacterRange::Singleton('-'));
    1114           5 :                 break;
    1115             :             }
    1116         331 :             char16_t char_class_2 = kNoCharClass;
    1117         331 :             widechar next = 0;
    1118         331 :             if (!ParseClassAtom(&char_class_2, &next))
    1119           0 :                 return nullptr;
    1120         331 :             if (char_class != kNoCharClass || char_class_2 != kNoCharClass) {
    1121           0 :                 if (unicode_)
    1122           0 :                     return ReportError(JSMSG_RANGE_WITH_CLASS_ESCAPE);
    1123             : 
    1124             :                 // Either end is an escaped character class. Treat the '-' verbatim.
    1125           0 :                 AddCharOrEscape(alloc, ranges, char_class, first);
    1126           0 :                 ranges->append(CharacterRange::Singleton('-'));
    1127           0 :                 AddCharOrEscape(alloc, ranges, char_class_2, next);
    1128           0 :                 continue;
    1129             :             }
    1130         331 :             if (first > next)
    1131           0 :                 return ReportError(JSMSG_BAD_CLASS_RANGE);
    1132         331 :             if (unicode_)
    1133           0 :                 AddUnicodeRange(alloc, ranges, lead_ranges, trail_ranges,wide_ranges, first, next);
    1134             :             else
    1135         331 :                 ranges->append(CharacterRange::Range(first, next));
    1136             :         } else {
    1137         610 :             if (unicode_) {
    1138           0 :                 AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges,
    1139           0 :                                        char_class, first, ignore_case_);
    1140             :             } else {
    1141         610 :                 AddCharOrEscape(alloc, ranges, char_class, first);
    1142             :             }
    1143             :         }
    1144             :     }
    1145         377 :     if (!has_more())
    1146           0 :         return ReportError(JSMSG_UNTERM_CLASS);
    1147         377 :     Advance();
    1148         377 :     if (!unicode_) {
    1149         377 :         if (ranges->length() == 0) {
    1150           0 :             ranges->append(CharacterRange::Everything());
    1151           0 :             is_negated = !is_negated;
    1152             :         }
    1153         377 :         return alloc->newInfallible<RegExpCharacterClass>(ranges, is_negated);
    1154             :     }
    1155             : 
    1156           0 :     if (!is_negated && ranges->length() == 0 && lead_ranges->length() == 0 &&
    1157           0 :         trail_ranges->length() == 0 && wide_ranges->length() == 0)
    1158             :     {
    1159           0 :         ranges->append(CharacterRange::Everything());
    1160           0 :         return alloc->newInfallible<RegExpCharacterClass>(ranges, true);
    1161             :     }
    1162             : 
    1163           0 :     return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, is_negated,
    1164           0 :                              ignore_case_);
    1165             : }
    1166             : 
    1167             : template <typename CharT>
    1168             : bool
    1169        1277 : RegExpParser<CharT>::ParseClassAtom(char16_t* char_class, widechar* value)
    1170             : {
    1171        1277 :     MOZ_ASSERT(*char_class == kNoCharClass);
    1172        1277 :     widechar first = current();
    1173        1277 :     if (first == '\\') {
    1174         399 :         switch (Next()) {
    1175             :           case 'w': case 'W': case 'd': case 'D': case 's': case 'S': {
    1176          37 :             *char_class = Next();
    1177          37 :             Advance(2);
    1178          37 :             return true;
    1179             :           }
    1180             :           case kEndMarker:
    1181           0 :             return ReportError(JSMSG_ESCAPE_AT_END_OF_REGEXP);
    1182             :           default:
    1183         362 :             if (!ParseClassCharacterEscape(value))
    1184           0 :                 return false;
    1185         362 :             return true;
    1186             :         }
    1187             :     } else {
    1188         878 :         if (unicode_) {
    1189             :             char16_t lead, trail;
    1190           0 :             if (ParseRawSurrogatePair(&lead, &trail)) {
    1191           0 :                 *value = unicode::UTF16Decode(lead, trail);
    1192           0 :                 return true;
    1193             :             }
    1194             :         }
    1195         878 :         Advance();
    1196         878 :         *value = first;
    1197         878 :         return true;
    1198             :     }
    1199             : }
    1200             : 
    1201             : // In order to know whether an escape is a backreference or not we have to scan
    1202             : // the entire regexp and find the number of capturing parentheses.  However we
    1203             : // don't want to scan the regexp twice unless it is necessary.  This mini-parser
    1204             : // is called when needed.  It can see the difference between capturing and
    1205             : // noncapturing parentheses and can skip character classes and backslash-escaped
    1206             : // characters.
    1207             : template <typename CharT>
    1208             : void
    1209           0 : RegExpParser<CharT>::ScanForCaptures()
    1210             : {
    1211             :     // Start with captures started previous to current position
    1212           0 :     int capture_count = captures_started();
    1213             :     // Add count of captures after this position.
    1214             :     widechar n;
    1215           0 :     while ((n = current()) != kEndMarker) {
    1216           0 :         Advance();
    1217           0 :         switch (n) {
    1218             :           case '\\':
    1219           0 :             Advance();
    1220           0 :             break;
    1221             :           case '[': {
    1222             :             widechar c;
    1223           0 :             while ((c = current()) != kEndMarker) {
    1224           0 :                 Advance();
    1225           0 :                 if (c == '\\') {
    1226           0 :                     Advance();
    1227             :                 } else {
    1228           0 :                     if (c == ']') break;
    1229             :                 }
    1230             :             }
    1231           0 :             break;
    1232             :           }
    1233             :           case '(':
    1234           0 :             if (current() != '?') capture_count++;
    1235           0 :             break;
    1236             :         }
    1237             :     }
    1238           0 :     capture_count_ = capture_count;
    1239           0 :     is_scanned_for_captures_ = true;
    1240           0 : }
    1241             : 
    1242             : template <typename CharT>
    1243             : bool
    1244           0 : RegExpParser<CharT>::ParseBackReferenceIndex(int* index_out)
    1245             : {
    1246           0 :     MOZ_ASSERT('\\' == current());
    1247           0 :     MOZ_ASSERT('1' <= Next() && Next() <= '9');
    1248             : 
    1249             :     // Try to parse a decimal literal that is no greater than the total number
    1250             :     // of left capturing parentheses in the input.
    1251           0 :     const CharT* start = position();
    1252           0 :     int value = Next() - '0';
    1253           0 :     Advance(2);
    1254           0 :     while (true) {
    1255           0 :         widechar c = current();
    1256           0 :         if (IsDecimalDigit(c)) {
    1257           0 :             value = 10 * value + (c - '0');
    1258           0 :             if (value > kMaxCaptures) {
    1259           0 :                 Reset(start);
    1260           0 :                 return false;
    1261             :             }
    1262           0 :             Advance();
    1263             :         } else {
    1264           0 :             break;
    1265             :         }
    1266             :     }
    1267           0 :     if (value > captures_started()) {
    1268           0 :         if (!is_scanned_for_captures_) {
    1269           0 :             const CharT* saved_position = position();
    1270           0 :             ScanForCaptures();
    1271           0 :             Reset(saved_position);
    1272             :         }
    1273           0 :         if (value > capture_count_) {
    1274           0 :             Reset(start);
    1275           0 :             return false;
    1276             :         }
    1277             :     }
    1278           0 :     *index_out = value;
    1279           0 :     return true;
    1280             : }
    1281             : 
    1282             : // QuantifierPrefix ::
    1283             : //   { DecimalDigits }
    1284             : //   { DecimalDigits , }
    1285             : //   { DecimalDigits , DecimalDigits }
    1286             : //
    1287             : // Returns true if parsing succeeds, and set the min_out and max_out
    1288             : // values. Values are truncated to RegExpTree::kInfinity if they overflow.
    1289             : template <typename CharT>
    1290             : bool
    1291          94 : RegExpParser<CharT>::ParseIntervalQuantifier(int* min_out, int* max_out)
    1292             : {
    1293          94 :     MOZ_ASSERT(current() == '{');
    1294          94 :     const CharT* start = position();
    1295          94 :     Advance();
    1296          94 :     int min = 0;
    1297          94 :     if (!IsDecimalDigit(current())) {
    1298           0 :         Reset(start);
    1299           0 :         return false;
    1300             :     }
    1301         116 :     while (IsDecimalDigit(current())) {
    1302         116 :         int next = current() - '0';
    1303         116 :         if (min > (RegExpTree::kInfinity - next) / 10) {
    1304             :             // Overflow. Skip past remaining decimal digits and return -1.
    1305           0 :             do {
    1306           0 :                 Advance();
    1307           0 :             } while (IsDecimalDigit(current()));
    1308           0 :             min = RegExpTree::kInfinity;
    1309           0 :             break;
    1310             :         }
    1311         116 :         min = 10 * min + next;
    1312         116 :         Advance();
    1313             :     }
    1314          94 :     int max = 0;
    1315          94 :     if (current() == '}') {
    1316          74 :         max = min;
    1317          74 :         Advance();
    1318          20 :     } else if (current() == ',') {
    1319          20 :         Advance();
    1320          20 :         if (current() == '}') {
    1321          10 :             max = RegExpTree::kInfinity;
    1322          10 :             Advance();
    1323             :         } else {
    1324          10 :             while (IsDecimalDigit(current())) {
    1325          10 :                 int next = current() - '0';
    1326          10 :                 if (max > (RegExpTree::kInfinity - next) / 10) {
    1327           0 :                     do {
    1328           0 :                         Advance();
    1329           0 :                     } while (IsDecimalDigit(current()));
    1330           0 :                     max = RegExpTree::kInfinity;
    1331           0 :                     break;
    1332             :                 }
    1333          10 :                 max = 10 * max + next;
    1334          10 :                 Advance();
    1335             :             }
    1336          10 :             if (current() != '}') {
    1337           0 :                 Reset(start);
    1338           0 :                 return false;
    1339             :             }
    1340          10 :             Advance();
    1341             :         }
    1342             :     } else {
    1343           0 :         Reset(start);
    1344           0 :         return false;
    1345             :     }
    1346          94 :     *min_out = min;
    1347          94 :     *max_out = max;
    1348          94 :     return true;
    1349             : }
    1350             : 
    1351             : // Pattern ::
    1352             : //   Disjunction
    1353             : template <typename CharT>
    1354             : RegExpTree*
    1355         757 : RegExpParser<CharT>::ParsePattern()
    1356             : {
    1357         757 :     RegExpTree* result = ParseDisjunction();
    1358         757 :     MOZ_ASSERT_IF(result, !has_more());
    1359         757 :     return result;
    1360             : }
    1361             : 
    1362             : static inline RegExpTree*
    1363           0 : CaseFoldingSurrogatePairAtom(LifoAlloc* alloc, char16_t lead, char16_t trail, int32_t diff)
    1364             : {
    1365           0 :     RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
    1366             : 
    1367           0 :     builder->AddCharacter(lead);
    1368           0 :     CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    1369           0 :     ranges->append(CharacterRange::Range(trail, trail));
    1370           0 :     ranges->append(CharacterRange::Range(trail + diff, trail + diff));
    1371           0 :     builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
    1372             : 
    1373           0 :     return builder->ToRegExp();
    1374             : }
    1375             : 
    1376             : static inline RegExpTree*
    1377           0 : SurrogatePairAtom(LifoAlloc* alloc, char16_t lead, char16_t trail, bool ignore_case)
    1378             : {
    1379           0 :     if (ignore_case) {
    1380             : #define CALL_ATOM(FROM, TO, LEAD, TRAIL_FROM, TRAIL_TO, DIFF) \
    1381             :         if (lead == LEAD &&trail >= TRAIL_FROM && trail <= TRAIL_TO) \
    1382             :             return CaseFoldingSurrogatePairAtom(alloc, lead, trail, DIFF);
    1383           0 :         FOR_EACH_NON_BMP_CASE_FOLDING(CALL_ATOM)
    1384           0 :         FOR_EACH_NON_BMP_REV_CASE_FOLDING(CALL_ATOM)
    1385             : #undef CALL_ATOM
    1386             :     }
    1387             : 
    1388           0 :     RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
    1389           0 :     builder->AddCharacter(lead);
    1390           0 :     builder->AddCharacter(trail);
    1391           0 :     return builder->ToRegExp();
    1392             : }
    1393             : 
    1394             : static inline RegExpTree*
    1395           0 : LeadSurrogateAtom(LifoAlloc* alloc, char16_t value)
    1396             : {
    1397           0 :     RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
    1398           0 :     builder->AddCharacter(value);
    1399           0 :     builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
    1400           0 :                                        unicode::TrailSurrogateMax));
    1401           0 :     return builder->ToRegExp();
    1402             : }
    1403             : 
    1404             : static inline RegExpTree*
    1405           0 : TrailSurrogateAtom(LifoAlloc* alloc, char16_t value)
    1406             : {
    1407           0 :     RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
    1408           0 :     builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
    1409           0 :         RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
    1410           0 :     builder->AddCharacter(value);
    1411           0 :     return builder->ToRegExp();
    1412             : }
    1413             : 
    1414             : static inline RegExpTree*
    1415           0 : UnicodeEverythingAtom(LifoAlloc* alloc)
    1416             : {
    1417           0 :     RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
    1418             : 
    1419             :     // everything except \x0a, \x0d, \u2028 and \u2029
    1420             : 
    1421           0 :     CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    1422           0 :     AddClassNegated(kLineTerminatorAndSurrogateRanges,
    1423             :                     kLineTerminatorAndSurrogateRangeCount,
    1424           0 :                     ranges);
    1425           0 :     builder->AddAtom(alloc->newInfallible<RegExpCharacterClass>(ranges, false));
    1426             : 
    1427           0 :     builder->NewAlternative();
    1428             : 
    1429           0 :     builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
    1430           0 :     builder->AddAtom(NegativeLookahead(alloc, unicode::TrailSurrogateMin,
    1431           0 :                                        unicode::TrailSurrogateMax));
    1432             : 
    1433           0 :     builder->NewAlternative();
    1434             : 
    1435           0 :     builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
    1436           0 :         RegExpAssertion::NOT_AFTER_LEAD_SURROGATE));
    1437           0 :     builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
    1438             : 
    1439           0 :     builder->NewAlternative();
    1440             : 
    1441           0 :     builder->AddAtom(RangeAtom(alloc, unicode::LeadSurrogateMin, unicode::LeadSurrogateMax));
    1442           0 :     builder->AddAtom(RangeAtom(alloc, unicode::TrailSurrogateMin, unicode::TrailSurrogateMax));
    1443             : 
    1444           0 :     return builder->ToRegExp();
    1445             : }
    1446             : 
    1447             : RegExpTree*
    1448           0 : UnicodeCharacterClassEscapeAtom(LifoAlloc* alloc, char16_t char_class, bool ignore_case)
    1449             : {
    1450           0 :     CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    1451           0 :     CharacterRangeVector* lead_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    1452           0 :     CharacterRangeVector* trail_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    1453           0 :     WideCharRangeVector* wide_ranges = alloc->newInfallible<WideCharRangeVector>(*alloc);
    1454           0 :     AddCharOrEscapeUnicode(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, char_class, 0,
    1455           0 :                            ignore_case);
    1456             : 
    1457           0 :     return UnicodeRangesAtom(alloc, ranges, lead_ranges, trail_ranges, wide_ranges, false, false);
    1458             : }
    1459             : 
    1460             : static inline RegExpTree*
    1461           0 : UnicodeBackReferenceAtom(LifoAlloc* alloc, RegExpTree* atom)
    1462             : {
    1463             :     // If a back reference has a standalone lead surrogate as its last
    1464             :     // character, then that lead surrogate shouldn't match lead surrogates that
    1465             :     // are paired with a corresponding trail surrogate.
    1466           0 :     RegExpBuilder* builder = alloc->newInfallible<RegExpBuilder>(alloc);
    1467             : 
    1468           0 :     builder->AddAtom(atom);
    1469           0 :     builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(
    1470           0 :         RegExpAssertion::NOT_IN_SURROGATE_PAIR));
    1471             : 
    1472           0 :     return builder->ToRegExp();
    1473             : }
    1474             : 
    1475             : // Disjunction ::
    1476             : //   Alternative
    1477             : //   Alternative | Disjunction
    1478             : // Alternative ::
    1479             : //   [empty]
    1480             : //   Term Alternative
    1481             : // Term ::
    1482             : //   Assertion
    1483             : //   Atom
    1484             : //   Atom Quantifier
    1485             : template <typename CharT>
    1486             : RegExpTree*
    1487         757 : RegExpParser<CharT>::ParseDisjunction()
    1488             : {
    1489             :     // Used to store current state while parsing subexpressions.
    1490         757 :     RegExpParserState initial_state(alloc, nullptr, INITIAL, 0);
    1491         757 :     RegExpParserState* stored_state = &initial_state;
    1492             :     // Cache the builder in a local variable for quick access.
    1493         757 :     RegExpBuilder* builder = initial_state.builder();
    1494        7501 :     while (true) {
    1495        8258 :         switch (current()) {
    1496             :           case kEndMarker:
    1497         757 :             if (stored_state->IsSubexpression()) {
    1498             :                 // Inside a parenthesized group when hitting end of input.
    1499         757 :                 return ReportError(JSMSG_MISSING_PAREN);
    1500             :             }
    1501         757 :             MOZ_ASSERT(INITIAL == stored_state->group_type());
    1502             :             // Parsing completed successfully.
    1503         757 :             return builder->ToRegExp();
    1504             :           case ')': {
    1505         364 :             if (!stored_state->IsSubexpression())
    1506           0 :                 return ReportError(JSMSG_UNMATCHED_RIGHT_PAREN);
    1507         364 :             MOZ_ASSERT(INITIAL != stored_state->group_type());
    1508             : 
    1509         364 :             Advance();
    1510             :             // End disjunction parsing and convert builder content to new single
    1511             :             // regexp atom.
    1512         364 :             RegExpTree* body = builder->ToRegExp();
    1513             : 
    1514         364 :             int end_capture_index = captures_started();
    1515             : 
    1516         364 :             int capture_index = stored_state->capture_index();
    1517         364 :             SubexpressionType group_type = stored_state->group_type();
    1518             : 
    1519             :             // Restore previous state.
    1520         364 :             stored_state = stored_state->previous_state();
    1521         364 :             builder = stored_state->builder();
    1522             : 
    1523             :             // Build result of subexpression.
    1524         364 :             if (group_type == CAPTURE) {
    1525         271 :                 RegExpCapture* capture = alloc->newInfallible<RegExpCapture>(body, capture_index);
    1526         271 :                 (*captures_)[capture_index - 1] = capture;
    1527         271 :                 body = capture;
    1528          93 :             } else if (group_type != GROUPING) {
    1529          27 :                 MOZ_ASSERT(group_type == POSITIVE_LOOKAHEAD ||
    1530             :                            group_type == NEGATIVE_LOOKAHEAD);
    1531          27 :                 bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
    1532          27 :                 body = alloc->newInfallible<RegExpLookahead>(body,
    1533             :                                                    is_positive,
    1534          54 :                                                    end_capture_index - capture_index,
    1535             :                                                    capture_index);
    1536             :             }
    1537         364 :             builder->AddAtom(body);
    1538         364 :             if (unicode_ && (group_type == POSITIVE_LOOKAHEAD || group_type == NEGATIVE_LOOKAHEAD))
    1539           0 :                 continue;
    1540             :             // For compatability with JSC and ES3, we allow quantifiers after
    1541             :             // lookaheads, and break in all cases.
    1542         364 :             break;
    1543             :           }
    1544             :           case '|': {
    1545         430 :             Advance();
    1546         430 :             builder->NewAlternative();
    1547         430 :             continue;
    1548             :           }
    1549             :           case '*':
    1550             :           case '+':
    1551             :           case '?':
    1552           0 :             return ReportError(JSMSG_NOTHING_TO_REPEAT);
    1553             :           case '^': {
    1554         259 :             Advance();
    1555         259 :             if (multiline_) {
    1556           0 :                 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(RegExpAssertion::START_OF_LINE));
    1557             :             } else {
    1558         259 :                 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(RegExpAssertion::START_OF_INPUT));
    1559         259 :                 set_contains_anchor();
    1560             :             }
    1561         259 :             continue;
    1562             :           }
    1563             :           case '$': {
    1564         212 :             Advance();
    1565             :             RegExpAssertion::AssertionType assertion_type =
    1566         212 :                 multiline_ ? RegExpAssertion::END_OF_LINE :
    1567         212 :                 RegExpAssertion::END_OF_INPUT;
    1568         212 :             builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(assertion_type));
    1569         212 :             continue;
    1570             :           }
    1571             :           case '.': {
    1572         123 :             Advance();
    1573             :             // everything except \x0a, \x0d, \u2028 and \u2029
    1574         123 :             if (unicode_) {
    1575           0 :                 builder->AddAtom(UnicodeEverythingAtom(alloc));
    1576           0 :                 break;
    1577             :             }
    1578         123 :             CharacterRangeVector* ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    1579         123 :             CharacterRange::AddClassEscape(alloc, '.', ranges);
    1580         123 :             RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
    1581         123 :             builder->AddAtom(atom);
    1582         123 :             break;
    1583             :           }
    1584             :           case '(': {
    1585         364 :             SubexpressionType subexpr_type = CAPTURE;
    1586         364 :             Advance();
    1587         364 :             if (current() == '?') {
    1588          93 :                 switch (Next()) {
    1589             :                   case ':':
    1590          66 :                     subexpr_type = GROUPING;
    1591          66 :                     break;
    1592             :                   case '=':
    1593          20 :                     subexpr_type = POSITIVE_LOOKAHEAD;
    1594          20 :                     break;
    1595             :                   case '!':
    1596           7 :                     subexpr_type = NEGATIVE_LOOKAHEAD;
    1597           7 :                     break;
    1598             :                   default:
    1599           0 :                     return ReportError(JSMSG_INVALID_GROUP);
    1600             :                 }
    1601          93 :                 Advance(2);
    1602             :             } else {
    1603         271 :                 if (captures_ == nullptr)
    1604         184 :                     captures_ = alloc->newInfallible<RegExpCaptureVector>(*alloc);
    1605         271 :                 if (captures_started() >= kMaxCaptures)
    1606           0 :                     return ReportError(JSMSG_TOO_MANY_PARENS);
    1607         271 :                 captures_->append((RegExpCapture*) nullptr);
    1608             :             }
    1609             :             // Store current state and begin new disjunction parsing.
    1610         364 :             stored_state = alloc->newInfallible<RegExpParserState>(alloc, stored_state, subexpr_type,
    1611         728 :                                                                    captures_started());
    1612         364 :             builder = stored_state->builder();
    1613         364 :             continue;
    1614             :           }
    1615             :           case '[': {
    1616         377 :             RegExpTree* atom = ParseCharacterClass();
    1617         377 :             if (!atom)
    1618           0 :                 return nullptr;
    1619         377 :             builder->AddAtom(atom);
    1620         377 :             break;
    1621             :           }
    1622             :             // Atom ::
    1623             :             //   \ AtomEscape
    1624             :           case '\\':
    1625         599 :             switch (Next()) {
    1626             :               case kEndMarker:
    1627           0 :                 return ReportError(JSMSG_ESCAPE_AT_END_OF_REGEXP);
    1628             :               case 'b':
    1629          18 :                 Advance(2);
    1630          18 :                 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(RegExpAssertion::BOUNDARY));
    1631          18 :                 continue;
    1632             :               case 'B':
    1633           0 :                 Advance(2);
    1634           0 :                 builder->AddAssertion(alloc->newInfallible<RegExpAssertion>(RegExpAssertion::NON_BOUNDARY));
    1635           0 :                 continue;
    1636             :                 // AtomEscape ::
    1637             :                 //   CharacterClassEscape
    1638             :                 //
    1639             :                 // CharacterClassEscape :: one of
    1640             :                 //   d D s S w W
    1641             :               case 'D': case 'S': case 'W':
    1642          36 :                 if (unicode_) {
    1643           0 :                     Advance();
    1644           0 :                     builder->AddAtom(UnicodeCharacterClassEscapeAtom(alloc, current(),
    1645           0 :                                                                      ignore_case_));
    1646           0 :                     Advance();
    1647           0 :                     break;
    1648             :                 }
    1649             :                 MOZ_FALLTHROUGH;
    1650             :               case 'd': case 's': case 'w': {
    1651         224 :                 widechar c = Next();
    1652         224 :                 Advance(2);
    1653             :                 CharacterRangeVector* ranges =
    1654         224 :                     alloc->newInfallible<CharacterRangeVector>(*alloc);
    1655         224 :                 if (unicode_)
    1656           0 :                     CharacterRange::AddClassEscapeUnicode(alloc, c, ranges, ignore_case_);
    1657             :                 else
    1658         224 :                     CharacterRange::AddClassEscape(alloc, c, ranges);
    1659         224 :                 RegExpTree* atom = alloc->newInfallible<RegExpCharacterClass>(ranges, false);
    1660         224 :                 builder->AddAtom(atom);
    1661         224 :                 break;
    1662             :               }
    1663             :               case '1': case '2': case '3': case '4': case '5': case '6':
    1664             :               case '7': case '8': case '9': {
    1665           0 :                 int index = 0;
    1666           0 :                 if (ParseBackReferenceIndex(&index)) {
    1667           0 :                     RegExpCapture* capture = nullptr;
    1668           0 :                     if (captures_ != nullptr && index <= (int) captures_->length()) {
    1669           0 :                         capture = (*captures_)[index - 1];
    1670             :                     }
    1671           0 :                     if (capture == nullptr) {
    1672           0 :                         builder->AddEmpty();
    1673           0 :                         break;
    1674             :                     }
    1675           0 :                     RegExpTree* atom = alloc->newInfallible<RegExpBackReference>(capture);
    1676           0 :                     if (unicode_)
    1677           0 :                         builder->AddAtom(UnicodeBackReferenceAtom(alloc, atom));
    1678             :                     else
    1679           0 :                         builder->AddAtom(atom);
    1680           0 :                     break;
    1681             :                 }
    1682           0 :                 if (unicode_)
    1683           0 :                     return ReportError(JSMSG_BACK_REF_OUT_OF_RANGE);
    1684           0 :                 widechar first_digit = Next();
    1685           0 :                 if (first_digit == '8' || first_digit == '9') {
    1686             :                     // Treat as identity escape
    1687           0 :                     builder->AddCharacter(first_digit);
    1688           0 :                     Advance(2);
    1689           0 :                     break;
    1690             :                 }
    1691             :                 MOZ_FALLTHROUGH;
    1692             :               }
    1693             :               case '0': {
    1694           2 :                 if (unicode_) {
    1695           0 :                     Advance(2);
    1696           0 :                     if (IsDecimalDigit(current()))
    1697           0 :                         return ReportError(JSMSG_INVALID_DECIMAL_ESCAPE);
    1698           0 :                     builder->AddCharacter(0);
    1699           0 :                     break;
    1700             :                 }
    1701             : 
    1702           2 :                 Advance();
    1703           2 :                 widechar octal = ParseOctalLiteral();
    1704           2 :                 builder->AddCharacter(octal);
    1705           2 :                 break;
    1706             :               }
    1707             :                 // ControlEscape :: one of
    1708             :                 //   f n r t v
    1709             :               case 'f':
    1710           0 :                 Advance(2);
    1711           0 :                 builder->AddCharacter('\f');
    1712           0 :                 break;
    1713             :               case 'n':
    1714          28 :                 Advance(2);
    1715          28 :                 builder->AddCharacter('\n');
    1716          28 :                 break;
    1717             :               case 'r':
    1718           9 :                 Advance(2);
    1719           9 :                 builder->AddCharacter('\r');
    1720           9 :                 break;
    1721             :               case 't':
    1722           4 :                 Advance(2);
    1723           4 :                 builder->AddCharacter('\t');
    1724           4 :                 break;
    1725             :               case 'v':
    1726           0 :                 Advance(2);
    1727           0 :                 builder->AddCharacter('\v');
    1728           0 :                 break;
    1729             :               case 'c': {
    1730           0 :                 Advance();
    1731           0 :                 widechar controlLetter = Next();
    1732             :                 // Special case if it is an ASCII letter.
    1733             :                 // Convert lower case letters to uppercase.
    1734           0 :                 widechar letter = controlLetter & ~('a' ^ 'A');
    1735           0 :                 if (letter < 'A' || 'Z' < letter) {
    1736           0 :                     if (unicode_)
    1737           0 :                         return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
    1738             :                     // controlLetter is not in range 'A'-'Z' or 'a'-'z'.
    1739             :                     // This is outside the specification. We match JSC in
    1740             :                     // reading the backslash as a literal character instead
    1741             :                     // of as starting an escape.
    1742           0 :                     builder->AddCharacter('\\');
    1743             :                 } else {
    1744           0 :                     Advance(2);
    1745           0 :                     builder->AddCharacter(controlLetter & 0x1f);
    1746             :                 }
    1747           0 :                 break;
    1748             :               }
    1749             :               case 'x': {
    1750           6 :                 Advance(2);
    1751             :                 widechar value;
    1752           6 :                 if (ParseHexEscape(2, &value)) {
    1753           6 :                     builder->AddCharacter(value);
    1754             :                 } else {
    1755           0 :                     if (unicode_)
    1756           0 :                         return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
    1757           0 :                     builder->AddCharacter('x');
    1758             :                 }
    1759           6 :                 break;
    1760             :               }
    1761             :               case 'u': {
    1762           5 :                 Advance(2);
    1763             :                 widechar value;
    1764           5 :                 if (unicode_) {
    1765           0 :                     if (current() == '{') {
    1766           0 :                         if (!ParseBracedHexEscape(&value))
    1767           0 :                             return nullptr;
    1768           0 :                         if (unicode::IsLeadSurrogate(value)) {
    1769           0 :                             builder->AddAtom(LeadSurrogateAtom(alloc, value));
    1770           0 :                         } else if (unicode::IsTrailSurrogate(value)) {
    1771           0 :                             builder->AddAtom(TrailSurrogateAtom(alloc, value));
    1772           0 :                         } else if (value >= unicode::NonBMPMin) {
    1773             :                             char16_t lead, trail;
    1774           0 :                             unicode::UTF16Encode(value, &lead, &trail);
    1775           0 :                             builder->AddAtom(SurrogatePairAtom(alloc, lead, trail,
    1776           0 :                                                                ignore_case_));
    1777             :                         } else {
    1778           0 :                             builder->AddCharacter(value);
    1779             :                         }
    1780           0 :                     } else if (ParseHexEscape(4, &value)) {
    1781           0 :                         if (unicode::IsLeadSurrogate(value)) {
    1782             :                             widechar trail;
    1783           0 :                             if (ParseTrailSurrogate(&trail)) {
    1784           0 :                                 builder->AddAtom(SurrogatePairAtom(alloc, value, trail,
    1785           0 :                                                                    ignore_case_));
    1786             :                             } else {
    1787           0 :                                 builder->AddAtom(LeadSurrogateAtom(alloc, value));
    1788             :                             }
    1789           0 :                         } else if (unicode::IsTrailSurrogate(value)) {
    1790           0 :                             builder->AddAtom(TrailSurrogateAtom(alloc, value));
    1791             :                         } else {
    1792           0 :                             builder->AddCharacter(value);
    1793             :                         }
    1794             :                     } else {
    1795           0 :                         return ReportError(JSMSG_INVALID_UNICODE_ESCAPE);
    1796             :                     }
    1797           5 :                     break;
    1798             :                 }
    1799           5 :                 if (ParseHexEscape(4, &value)) {
    1800           5 :                     builder->AddCharacter(value);
    1801             :                 } else {
    1802           0 :                     builder->AddCharacter('u');
    1803             :                 }
    1804           5 :                 break;
    1805             :               }
    1806             :               default:
    1807             :                 // Identity escape.
    1808         303 :                 if (unicode_ && !IsSyntaxCharacter(Next()))
    1809           0 :                     return ReportError(JSMSG_INVALID_IDENTITY_ESCAPE);
    1810         303 :                 builder->AddCharacter(Next());
    1811         303 :                 Advance(2);
    1812         303 :                 break;
    1813             :             }
    1814         581 :             break;
    1815             :           case '{': {
    1816           0 :             if (unicode_)
    1817           0 :                 return ReportError(JSMSG_RAW_BRACE_IN_REGEP);
    1818             :             int dummy;
    1819           0 :             if (ParseIntervalQuantifier(&dummy, &dummy))
    1820           0 :                 return ReportError(JSMSG_NOTHING_TO_REPEAT);
    1821             :             MOZ_FALLTHROUGH;
    1822             :           }
    1823             :           default:
    1824        4773 :             if (unicode_) {
    1825             :                 char16_t lead, trail;
    1826           0 :                 if (ParseRawSurrogatePair(&lead, &trail)) {
    1827           0 :                     builder->AddAtom(SurrogatePairAtom(alloc, lead, trail, ignore_case_));
    1828             :                 } else {
    1829           0 :                     widechar c = current();
    1830           0 :                     if (unicode::IsLeadSurrogate(c))
    1831           0 :                         builder->AddAtom(LeadSurrogateAtom(alloc, c));
    1832           0 :                     else if (unicode::IsTrailSurrogate(c))
    1833           0 :                         builder->AddAtom(TrailSurrogateAtom(alloc, c));
    1834           0 :                     else if (c == ']')
    1835           0 :                         return ReportError(JSMSG_RAW_BRACKET_IN_REGEP);
    1836           0 :                     else if (c == '}')
    1837           0 :                         return ReportError(JSMSG_RAW_BRACE_IN_REGEP);
    1838             :                     else
    1839           0 :                         builder->AddCharacter(c);
    1840           0 :                     Advance();
    1841             :                 }
    1842           0 :                 break;
    1843             :             }
    1844        4773 :             builder->AddCharacter(current());
    1845        4773 :             Advance();
    1846        4773 :             break;
    1847             :         }  // end switch(current())
    1848             : 
    1849             :         int min;
    1850             :         int max;
    1851        6218 :         switch (current()) {
    1852             :             // QuantifierPrefix ::
    1853             :             //   *
    1854             :             //   +
    1855             :             //   ?
    1856             :             //   {
    1857             :           case '*':
    1858         176 :             min = 0;
    1859         176 :             max = RegExpTree::kInfinity;
    1860         176 :             Advance();
    1861         176 :             break;
    1862             :           case '+':
    1863         297 :             min = 1;
    1864         297 :             max = RegExpTree::kInfinity;
    1865         297 :             Advance();
    1866         297 :             break;
    1867             :           case '?':
    1868          85 :             min = 0;
    1869          85 :             max = 1;
    1870          85 :             Advance();
    1871          85 :             break;
    1872             :           case '{':
    1873          94 :             if (ParseIntervalQuantifier(&min, &max)) {
    1874          94 :                 if (max < min)
    1875           0 :                     return ReportError(JSMSG_NUMBERS_OUT_OF_ORDER);
    1876          94 :                 break;
    1877             :             } else {
    1878           0 :                 continue;
    1879             :             }
    1880             :           default:
    1881        5566 :             continue;
    1882             :         }
    1883         652 :         RegExpQuantifier::QuantifierType quantifier_type = RegExpQuantifier::GREEDY;
    1884         652 :         if (current() == '?') {
    1885          18 :             quantifier_type = RegExpQuantifier::NON_GREEDY;
    1886          18 :             Advance();
    1887             :         }
    1888         652 :         builder->AddQuantifierToAtom(min, max, quantifier_type);
    1889             :     }
    1890             : }
    1891             : 
    1892             : template class irregexp::RegExpParser<Latin1Char>;
    1893             : template class irregexp::RegExpParser<char16_t>;
    1894             : 
    1895             : template <typename CharT>
    1896             : static bool
    1897          40 : ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
    1898             :              bool multiline, bool match_only, bool unicode, bool ignore_case,
    1899             :              bool global, bool sticky, RegExpCompileData* data)
    1900             : {
    1901             :     // We shouldn't strip pattern for exec, or test with global/sticky,
    1902             :     // to reflect correct match position and lastIndex.
    1903          40 :     if (match_only && !global && !sticky) {
    1904             :         // Try to strip a leading '.*' from the RegExp, but only if it is not
    1905             :         // followed by a '?' (which will affect how the .* is parsed). This
    1906             :         // pattern will affect the captures produced by the RegExp, but not
    1907             :         // whether there is a match or not.
    1908          18 :         if (length >= 3 && chars[0] == '.' && chars[1] == '*' && chars[2] != '?') {
    1909           0 :             chars += 2;
    1910           0 :             length -= 2;
    1911             :         }
    1912             : 
    1913             :         // Try to strip a trailing '.*' from the RegExp, which as above will
    1914             :         // affect the captures but not whether there is a match. Only do this
    1915             :         // when there are no other meta characters in the RegExp, so that we
    1916             :         // are sure this will not affect how the RegExp is parsed.
    1917          37 :         if (length >= 3 && !HasRegExpMetaChars(chars, length - 2) &&
    1918          19 :             chars[length - 2] == '.' && chars[length - 1] == '*')
    1919             :         {
    1920           0 :             length -= 2;
    1921             :         }
    1922             :     }
    1923             : 
    1924          40 :     RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, multiline, unicode, ignore_case);
    1925          40 :     data->tree = parser.ParsePattern();
    1926          40 :     if (!data->tree)
    1927           0 :         return false;
    1928             : 
    1929          40 :     data->simple = parser.simple();
    1930          40 :     data->contains_anchor = parser.contains_anchor();
    1931          40 :     data->capture_count = parser.captures_started();
    1932          40 :     return true;
    1933             : }
    1934             : 
    1935             : bool
    1936          40 : irregexp::ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
    1937             :                        bool multiline, bool match_only, bool unicode, bool ignore_case,
    1938             :                        bool global, bool sticky, RegExpCompileData* data)
    1939             : {
    1940          80 :     JS::AutoCheckCannotGC nogc;
    1941          40 :     return str->hasLatin1Chars()
    1942          40 :            ? ::ParsePattern(ts, alloc, str->latin1Chars(nogc), str->length(),
    1943             :                             multiline, match_only, unicode, ignore_case, global, sticky, data)
    1944           0 :            : ::ParsePattern(ts, alloc, str->twoByteChars(nogc), str->length(),
    1945          80 :                             multiline, match_only, unicode, ignore_case, global, sticky, data);
    1946             : }
    1947             : 
    1948             : template <typename CharT>
    1949             : static bool
    1950         717 : ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, const CharT* chars, size_t length,
    1951             :                    bool unicode)
    1952             : {
    1953        1434 :     LifoAllocScope scope(&alloc);
    1954             : 
    1955         717 :     RegExpParser<CharT> parser(ts, &alloc, chars, chars + length, false, unicode, false);
    1956        1434 :     return parser.ParsePattern() != nullptr;
    1957             : }
    1958             : 
    1959             : bool
    1960         676 : irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
    1961             :                              bool unicode)
    1962             : {
    1963        1352 :     JS::AutoCheckCannotGC nogc;
    1964         676 :     return str->hasLatin1Chars()
    1965         676 :            ? ::ParsePatternSyntax(ts, alloc, str->latin1Chars(nogc), str->length(), unicode)
    1966        1352 :            : ::ParsePatternSyntax(ts, alloc, str->twoByteChars(nogc), str->length(), unicode);
    1967             : }
    1968             : 
    1969             : bool
    1970          41 : irregexp::ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc,
    1971             :                              const mozilla::Range<const char16_t> chars, bool unicode)
    1972             : {
    1973          41 :     return ::ParsePatternSyntax(ts, alloc, chars.begin().get(), chars.length(), unicode);
    1974             : }

Generated by: LCOV version 1.13