LCOV - code coverage report
Current view: top level - js/src/irregexp - RegExpEngine.cpp (source / functions) Hit Total Coverage
Test: output.info Lines: 1970 2462 80.0 %
Date: 2017-07-14 16:53:18 Functions: 180 198 90.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2             :  * vim: set ts=8 sts=4 et sw=4 tw=99: */
       3             : 
       4             : // Copyright 2012 the V8 project authors. All rights reserved.
       5             : // Redistribution and use in source and binary forms, with or without
       6             : // modification, are permitted provided that the following conditions are
       7             : // met:
       8             : //
       9             : //     * Redistributions of source code must retain the above copyright
      10             : //       notice, this list of conditions and the following disclaimer.
      11             : //     * Redistributions in binary form must reproduce the above
      12             : //       copyright notice, this list of conditions and the following
      13             : //       disclaimer in the documentation and/or other materials provided
      14             : //       with the distribution.
      15             : //     * Neither the name of Google Inc. nor the names of its
      16             : //       contributors may be used to endorse or promote products derived
      17             : //       from this software without specific prior written permission.
      18             : //
      19             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      20             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      21             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      22             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      23             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      24             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      25             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      26             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      27             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      28             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      29             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             : 
      31             : #include "irregexp/RegExpEngine.h"
      32             : 
      33             : #include "irregexp/NativeRegExpMacroAssembler.h"
      34             : #include "irregexp/RegExpCharacters.h"
      35             : #include "irregexp/RegExpMacroAssembler.h"
      36             : #include "jit/ExecutableAllocator.h"
      37             : #include "jit/JitCommon.h"
      38             : 
      39             : #include "irregexp/RegExpCharacters-inl.h"
      40             : 
      41             : using namespace js;
      42             : using namespace js::irregexp;
      43             : 
      44             : using mozilla::ArrayLength;
      45             : using mozilla::DebugOnly;
      46             : using mozilla::Maybe;
      47             : 
      48             : #define DEFINE_ACCEPT(Type)                                          \
      49             :     void Type##Node::Accept(NodeVisitor* visitor) {                  \
      50             :         visitor->Visit##Type(this);                                  \
      51             :     }
      52         501 : FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
      53             : #undef DEFINE_ACCEPT
      54             : 
      55          56 : void LoopChoiceNode::Accept(NodeVisitor* visitor) {
      56          56 :     visitor->VisitLoopChoice(this);
      57          56 : }
      58             : 
      59             : static const int kMaxLookaheadForBoyerMoore = 8;
      60             : 
      61         566 : RegExpNode::RegExpNode(LifoAlloc* alloc)
      62         566 :   : replacement_(nullptr), trace_count_(0), alloc_(alloc)
      63             : {
      64         566 :     bm_info_[0] = bm_info_[1] = nullptr;
      65         566 : }
      66             : 
      67             : static const int kMaxOneByteCharCode = 0xff;
      68             : static const int kMaxUtf16CodeUnit = 0xffff;
      69             : 
      70             : static char16_t
      71         599 : MaximumCharacter(bool latin1)
      72             : {
      73         599 :     return latin1 ? kMaxOneByteCharCode : kMaxUtf16CodeUnit;
      74             : }
      75             : 
      76             : static void
      77         225 : AddClass(const int* elmv, int elmc,
      78             :          CharacterRangeVector* ranges)
      79             : {
      80         225 :     elmc--;
      81         225 :     MOZ_ASSERT(elmv[elmc] == 0x10000);
      82        1629 :     for (int i = 0; i < elmc; i += 2) {
      83        1404 :         MOZ_ASSERT(elmv[i] < elmv[i + 1]);
      84        1404 :         ranges->append(CharacterRange(elmv[i], elmv[i + 1] - 1));
      85             :     }
      86         225 : }
      87             : 
      88             : void
      89         159 : js::irregexp::AddClassNegated(const int* elmv,
      90             :                               int elmc,
      91             :                               CharacterRangeVector* ranges)
      92             : {
      93         159 :     elmc--;
      94         159 :     MOZ_ASSERT(elmv[elmc] == 0x10000);
      95         159 :     MOZ_ASSERT(elmv[0] != 0x0000);
      96         159 :     MOZ_ASSERT(elmv[elmc-1] != kMaxUtf16CodeUnit);
      97         159 :     char16_t last = 0x0000;
      98         816 :     for (int i = 0; i < elmc; i += 2) {
      99         657 :         MOZ_ASSERT(last <= elmv[i] - 1);
     100         657 :         MOZ_ASSERT(elmv[i] < elmv[i + 1]);
     101         657 :         ranges->append(CharacterRange(last, elmv[i] - 1));
     102         657 :         last = elmv[i + 1];
     103             :     }
     104         159 :     ranges->append(CharacterRange(last, kMaxUtf16CodeUnit));
     105         159 : }
     106             : 
     107             : void
     108         402 : CharacterRange::AddClassEscape(LifoAlloc* alloc, char16_t type,
     109             :                                CharacterRangeVector* ranges)
     110             : {
     111         402 :     switch (type) {
     112             :       case 's':
     113         124 :         AddClass(kSpaceRanges, kSpaceRangeCount, ranges);
     114         124 :         break;
     115             :       case 'S':
     116          24 :         AddClassNegated(kSpaceRanges, kSpaceRangeCount, ranges);
     117          24 :         break;
     118             :       case 'w':
     119          21 :         AddClass(kWordRanges, kWordRangeCount, ranges);
     120          21 :         break;
     121             :       case 'W':
     122          12 :         AddClassNegated(kWordRanges, kWordRangeCount, ranges);
     123          12 :         break;
     124             :       case 'd':
     125          80 :         AddClass(kDigitRanges, kDigitRangeCount, ranges);
     126          80 :         break;
     127             :       case 'D':
     128           0 :         AddClassNegated(kDigitRanges, kDigitRangeCount, ranges);
     129           0 :         break;
     130             :       case '.':
     131         123 :         AddClassNegated(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
     132         123 :         break;
     133             :         // This is not a character range as defined by the spec but a
     134             :         // convenient shorthand for a character class that matches any
     135             :         // character.
     136             :       case '*':
     137          18 :         ranges->append(CharacterRange::Everything());
     138          18 :         break;
     139             :         // This is the set of characters matched by the $ and ^ symbols
     140             :         // in multiline mode.
     141             :       case 'n':
     142           0 :         AddClass(kLineTerminatorRanges, kLineTerminatorRangeCount, ranges);
     143           0 :         break;
     144             :       default:
     145           0 :         MOZ_CRASH("Bad character class escape");
     146             :     }
     147         402 : }
     148             : 
     149             : // Add class escape, excluding surrogate pair range.
     150             : void
     151           0 : CharacterRange::AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
     152             :                                       CharacterRangeVector* ranges, bool ignore_case)
     153             : {
     154           0 :     switch (type) {
     155             :       case 's':
     156             :       case 'd':
     157           0 :         return AddClassEscape(alloc, type, ranges);
     158             :         break;
     159             :       case 'S':
     160           0 :         AddClassNegated(kSpaceAndSurrogateRanges, kSpaceAndSurrogateRangeCount, ranges);
     161           0 :         break;
     162             :       case 'w':
     163           0 :         if (ignore_case)
     164           0 :             AddClass(kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, ranges);
     165             :         else
     166           0 :             AddClassEscape(alloc, type, ranges);
     167           0 :         break;
     168             :       case 'W':
     169           0 :         if (ignore_case) {
     170             :             AddClass(kNegatedIgnoreCaseWordAndSurrogateRanges,
     171           0 :                      kNegatedIgnoreCaseWordAndSurrogateRangeCount, ranges);
     172             :         } else {
     173           0 :             AddClassNegated(kWordAndSurrogateRanges, kWordAndSurrogateRangeCount, ranges);
     174             :         }
     175           0 :         break;
     176             :       case 'D':
     177           0 :         AddClassNegated(kDigitAndSurrogateRanges, kDigitAndSurrogateRangeCount, ranges);
     178           0 :         break;
     179             :       default:
     180           0 :         MOZ_CRASH("Bad type!");
     181             :     }
     182             : }
     183             : 
     184             : static bool
     185           0 : RangesContainLatin1Equivalents(const CharacterRangeVector& ranges, bool unicode)
     186             : {
     187           0 :     for (size_t i = 0; i < ranges.length(); i++) {
     188             :         // TODO(dcarney): this could be a lot more efficient.
     189           0 :         if (RangeContainsLatin1Equivalents(ranges[i], unicode))
     190           0 :             return true;
     191             :     }
     192           0 :     return false;
     193             : }
     194             : 
     195             : static const size_t kEcma262UnCanonicalizeMaxWidth = 4;
     196             : 
     197             : // Returns the number of characters in the equivalence class, omitting those
     198             : // that cannot occur in the source string if it is a one byte string.
     199             : static MOZ_ALWAYS_INLINE int
     200         562 : GetCaseIndependentLetters(char16_t character,
     201             :                           bool latin1_subject,
     202             :                           bool unicode,
     203             :                           const char16_t* choices,
     204             :                           size_t choices_length,
     205             :                           char16_t* letters)
     206             : {
     207         562 :     size_t count = 0;
     208        3372 :     for (size_t i = 0; i < choices_length; i++) {
     209        2810 :         char16_t c = choices[i];
     210             : 
     211             :         // Skip characters that can't appear in one byte strings.
     212        2810 :         if (!unicode && latin1_subject && c > kMaxOneByteCharCode)
     213           0 :             continue;
     214             : 
     215             :         // Watch for duplicates.
     216        2810 :         bool found = false;
     217        3212 :         for (size_t j = 0; j < count; j++) {
     218        2248 :             if (letters[j] == c) {
     219        1846 :                 found = true;
     220        1846 :                 break;
     221             :             }
     222             :         }
     223        2810 :         if (found)
     224        1846 :             continue;
     225             : 
     226         964 :         letters[count++] = c;
     227             :     }
     228             : 
     229         562 :     return count;
     230             : }
     231             : 
     232             : static int
     233         562 : GetCaseIndependentLetters(char16_t character,
     234             :                           bool latin1_subject,
     235             :                           bool unicode,
     236             :                           char16_t* letters)
     237             : {
     238         562 :     if (unicode) {
     239             :         const char16_t choices[] = {
     240             :             character,
     241           0 :             unicode::FoldCase(character),
     242           0 :             unicode::ReverseFoldCase1(character),
     243           0 :             unicode::ReverseFoldCase2(character),
     244           0 :             unicode::ReverseFoldCase3(character),
     245           0 :         };
     246           0 :         return GetCaseIndependentLetters(character, latin1_subject, unicode,
     247           0 :                                          choices, ArrayLength(choices), letters);
     248             :     }
     249             : 
     250         562 :     char16_t upper = unicode::ToUpperCase(character);
     251         562 :     unicode::CodepointsWithSameUpperCase others(character);
     252         562 :     char16_t other1 = others.other1();
     253         562 :     char16_t other2 = others.other2();
     254         562 :     char16_t other3 = others.other3();
     255             : 
     256             :     // ES 2017 draft 996af87b7072b3c3dd2b1def856c66f456102215 21.2.4.2
     257             :     // step 3.g.
     258             :     // The standard requires that non-ASCII characters cannot have ASCII
     259             :     // character codes in their equivalence class, even though this
     260             :     // situation occurs multiple times in the Unicode tables.
     261             :     static const unsigned kMaxAsciiCharCode = 127;
     262         562 :     if (upper <= kMaxAsciiCharCode) {
     263         562 :         if (character > kMaxAsciiCharCode) {
     264             :             // If Canonicalize(character) == character, all other characters
     265             :             // should be ignored.
     266           0 :             return GetCaseIndependentLetters(character, latin1_subject, unicode,
     267           0 :                                              &character, 1, letters);
     268             :         }
     269             : 
     270         562 :         if (other1 > kMaxAsciiCharCode)
     271           0 :             other1 = character;
     272         562 :         if (other2 > kMaxAsciiCharCode)
     273          59 :             other2 = character;
     274         562 :         if (other3 > kMaxAsciiCharCode)
     275           0 :             other3 = character;
     276             :     }
     277             : 
     278             :     const char16_t choices[] = {
     279             :         character,
     280             :         upper,
     281             :         other1,
     282             :         other2,
     283             :         other3
     284         562 :     };
     285         562 :     return GetCaseIndependentLetters(character, latin1_subject, unicode,
     286         562 :                                      choices, ArrayLength(choices), letters);
     287             : }
     288             : 
     289             : void
     290          25 : CharacterRange::AddCaseEquivalents(bool is_latin1, bool unicode, CharacterRangeVector* ranges)
     291             : {
     292          25 :     char16_t bottom = from();
     293          25 :     char16_t top = to();
     294             : 
     295          25 :     if (is_latin1 && !RangeContainsLatin1Equivalents(*this, unicode)) {
     296          15 :         if (bottom > kMaxOneByteCharCode)
     297           0 :             return;
     298          15 :         if (top > kMaxOneByteCharCode)
     299           0 :             top = kMaxOneByteCharCode;
     300             :     } else {
     301             :         // Nothing to do for surrogates.
     302          10 :         if (bottom >= unicode::LeadSurrogateMin && top <= unicode::TrailSurrogateMax)
     303           0 :             return;
     304             :     }
     305             : 
     306         190 :     for (char16_t c = bottom;; c++) {
     307             :         char16_t chars[kEcma262UnCanonicalizeMaxWidth];
     308         190 :         size_t length = GetCaseIndependentLetters(c, is_latin1, unicode, chars);
     309             : 
     310         488 :         for (size_t i = 0; i < length; i++) {
     311         298 :             char16_t other = chars[i];
     312         298 :             if (other == c)
     313         190 :                 continue;
     314             : 
     315             :             // Try to combine with an existing range.
     316         108 :             bool found = false;
     317         454 :             for (size_t i = 0; i < ranges->length(); i++) {
     318         448 :                 CharacterRange& range = (*ranges)[i];
     319         448 :                 if (range.Contains(other)) {
     320          52 :                     found = true;
     321          52 :                     break;
     322         396 :                 } else if (other == range.from() - 1) {
     323           0 :                     range.set_from(other);
     324           0 :                     found = true;
     325           0 :                     break;
     326         396 :                 } else if (other == range.to() + 1) {
     327          50 :                     range.set_to(other);
     328          50 :                     found = true;
     329          50 :                     break;
     330             :                 }
     331             :             }
     332             : 
     333         108 :             if (!found)
     334           6 :                 ranges->append(CharacterRange::Singleton(other));
     335             :         }
     336             : 
     337         190 :         if (c == top)
     338          25 :             break;
     339         165 :     }
     340             : }
     341             : 
     342             : static bool
     343         208 : CompareInverseRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length)
     344             : {
     345         208 :     length--;  // Remove final 0x10000.
     346         208 :     MOZ_ASSERT(special_class[length] == 0x10000);
     347         208 :     MOZ_ASSERT(ranges.length() != 0);
     348         208 :     MOZ_ASSERT(length != 0);
     349         208 :     MOZ_ASSERT(special_class[0] != 0);
     350         208 :     if (ranges.length() != (length >> 1) + 1)
     351         192 :         return false;
     352          16 :     CharacterRange range = ranges[0];
     353          16 :     if (range.from() != 0)
     354           7 :         return false;
     355          40 :     for (size_t i = 0; i < length; i += 2) {
     356          32 :         if (special_class[i] != (range.to() + 1))
     357           1 :             return false;
     358          31 :         range = ranges[(i >> 1) + 1];
     359          31 :         if (special_class[i+1] != range.from())
     360           0 :             return false;
     361             :     }
     362           8 :     if (range.to() != 0xffff)
     363           0 :         return false;
     364           8 :     return true;
     365             : }
     366             : 
     367             : static bool
     368         193 : CompareRanges(const CharacterRangeVector& ranges, const int* special_class, size_t length)
     369             : {
     370         193 :     length--;  // Remove final 0x10000.
     371         193 :     MOZ_ASSERT(special_class[length] == 0x10000);
     372         193 :     if (ranges.length() * 2 != length)
     373         181 :         return false;
     374          32 :     for (size_t i = 0; i < length; i += 2) {
     375          30 :         CharacterRange range = ranges[i >> 1];
     376          30 :         if (range.from() != special_class[i] || range.to() != special_class[i + 1] - 1)
     377          10 :             return false;
     378             :     }
     379           2 :     return true;
     380             : }
     381             : 
     382             : bool
     383          96 : RegExpCharacterClass::is_standard(LifoAlloc* alloc)
     384             : {
     385             :     // TODO(lrn): Remove need for this function, by not throwing away information
     386             :     // along the way.
     387          96 :     if (is_negated_)
     388          15 :         return false;
     389          81 :     if (set_.is_standard())
     390          10 :         return true;
     391          71 :     if (CompareRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) {
     392           2 :         set_.set_standard_set_type('s');
     393           2 :         return true;
     394             :     }
     395          69 :     if (CompareInverseRanges(set_.ranges(alloc), kSpaceRanges, kSpaceRangeCount)) {
     396           1 :         set_.set_standard_set_type('S');
     397           1 :         return true;
     398             :     }
     399          68 :     if (CompareInverseRanges(set_.ranges(alloc),
     400             :                              kLineTerminatorRanges,
     401             :                              kLineTerminatorRangeCount)) {
     402           7 :         set_.set_standard_set_type('.');
     403           7 :         return true;
     404             :     }
     405          61 :     if (CompareRanges(set_.ranges(alloc),
     406             :                       kLineTerminatorRanges,
     407             :                       kLineTerminatorRangeCount)) {
     408           0 :         set_.set_standard_set_type('n');
     409           0 :         return true;
     410             :     }
     411          61 :     if (CompareRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) {
     412           0 :         set_.set_standard_set_type('w');
     413           0 :         return true;
     414             :     }
     415          61 :     if (CompareInverseRanges(set_.ranges(alloc), kWordRanges, kWordRangeCount)) {
     416           0 :         set_.set_standard_set_type('W');
     417           0 :         return true;
     418             :     }
     419          61 :     return false;
     420             : }
     421             : 
     422             : bool
     423         207 : CharacterRange::IsCanonical(const CharacterRangeVector& ranges)
     424             : {
     425         207 :     int n = ranges.length();
     426         207 :     if (n <= 1)
     427         100 :         return true;
     428             : 
     429         107 :     int max = ranges[0].to();
     430         416 :     for (int i = 1; i < n; i++) {
     431         329 :         CharacterRange next_range = ranges[i];
     432         329 :         if (next_range.from() <= max + 1)
     433          20 :             return false;
     434         309 :         max = next_range.to();
     435             :     }
     436          87 :     return true;
     437             : }
     438             : 
     439             : // Move a number of elements in a zonelist to another position
     440             : // in the same list. Handles overlapping source and target areas.
     441             : static
     442          24 : void MoveRanges(CharacterRangeVector& list, int from, int to, int count)
     443             : {
     444             :     // Ranges are potentially overlapping.
     445          24 :     if (from < to) {
     446          68 :         for (int i = count - 1; i >= 0; i--)
     447          45 :             list[to + i] = list[from + i];
     448             :     } else {
     449           4 :         for (int i = 0; i < count; i++)
     450           3 :             list[to + i] = list[from + i];
     451             :     }
     452          24 : }
     453             : 
     454             : static int
     455          53 : InsertRangeInCanonicalList(CharacterRangeVector& list,
     456             :                            int count,
     457             :                            CharacterRange insert)
     458             : {
     459             :     // Inserts a range into list[0..count[, which must be sorted
     460             :     // by from value and non-overlapping and non-adjacent, using at most
     461             :     // list[0..count] for the result. Returns the number of resulting
     462             :     // canonicalized ranges. Inserting a range may collapse existing ranges into
     463             :     // fewer ranges, so the return value can be anything in the range 1..count+1.
     464          53 :     char16_t from = insert.from();
     465          53 :     char16_t to = insert.to();
     466          53 :     int start_pos = 0;
     467          53 :     int end_pos = count;
     468         140 :     for (int i = count - 1; i >= 0; i--) {
     469         120 :         CharacterRange current = list[i];
     470         120 :         if (current.from() > to + 1) {
     471          65 :             end_pos = i;
     472          55 :         } else if (current.to() + 1 < from) {
     473          33 :             start_pos = i + 1;
     474          33 :             break;
     475             :         }
     476             :     }
     477             : 
     478             :     // Inserted range overlaps, or is adjacent to, ranges at positions
     479             :     // [start_pos..end_pos[. Ranges before start_pos or at or after end_pos are
     480             :     // not affected by the insertion.
     481             :     // If start_pos == end_pos, the range must be inserted before start_pos.
     482             :     // if start_pos < end_pos, the entire range from start_pos to end_pos
     483             :     // must be merged with the insert range.
     484             : 
     485          53 :     if (start_pos == end_pos) {
     486             :         // Insert between existing ranges at position start_pos.
     487          32 :         if (start_pos < count) {
     488          23 :             MoveRanges(list, start_pos, start_pos + 1, count - start_pos);
     489             :         }
     490          32 :         list[start_pos] = insert;
     491          32 :         return count + 1;
     492             :     }
     493          21 :     if (start_pos + 1 == end_pos) {
     494             :         // Replace single existing range at position start_pos.
     495          20 :         CharacterRange to_replace = list[start_pos];
     496          20 :         int new_from = Min(to_replace.from(), from);
     497          20 :         int new_to = Max(to_replace.to(), to);
     498          20 :         list[start_pos] = CharacterRange(new_from, new_to);
     499          20 :         return count;
     500             :     }
     501             :     // Replace a number of existing ranges from start_pos to end_pos - 1.
     502             :     // Move the remaining ranges down.
     503             : 
     504           1 :     int new_from = Min(list[start_pos].from(), from);
     505           1 :     int new_to = Max(list[end_pos - 1].to(), to);
     506           1 :     if (end_pos < count) {
     507           1 :         MoveRanges(list, end_pos, start_pos + 1, count - end_pos);
     508             :     }
     509           1 :     list[start_pos] = CharacterRange(new_from, new_to);
     510           1 :     return count - (end_pos - start_pos) + 1;
     511             : }
     512             : 
     513             : void
     514          20 : CharacterRange::Canonicalize(CharacterRangeVector& character_ranges)
     515             : {
     516          20 :     if (character_ranges.length() <= 1) return;
     517             :     // Check whether ranges are already canonical (increasing, non-overlapping,
     518             :     // non-adjacent).
     519          20 :     int n = character_ranges.length();
     520          20 :     int max = character_ranges[0].to();
     521          20 :     int i = 1;
     522          56 :     while (i < n) {
     523          38 :         CharacterRange current = character_ranges[i];
     524          38 :         if (current.from() <= max + 1) {
     525          20 :             break;
     526             :         }
     527          18 :         max = current.to();
     528          18 :         i++;
     529             :     }
     530             :     // Canonical until the i'th range. If that's all of them, we are done.
     531          20 :     if (i == n) return;
     532             : 
     533             :     // The ranges at index i and forward are not canonicalized. Make them so by
     534             :     // doing the equivalent of insertion sort (inserting each into the previous
     535             :     // list, in order).
     536             :     // Notice that inserting a range can reduce the number of ranges in the
     537             :     // result due to combining of adjacent and overlapping ranges.
     538          20 :     int read = i;  // Range to insert.
     539          20 :     size_t num_canonical = i;  // Length of canonicalized part of list.
     540          33 :     do {
     541          53 :         num_canonical = InsertRangeInCanonicalList(character_ranges,
     542             :                                                    num_canonical,
     543          53 :                                                    character_ranges[read]);
     544          53 :         read++;
     545          53 :     } while (read < n);
     546             : 
     547          22 :     while (character_ranges.length() > num_canonical)
     548          22 :         character_ranges.popBack();
     549             : 
     550          20 :     MOZ_ASSERT(CharacterRange::IsCanonical(character_ranges));
     551             : }
     552             : 
     553             : // -------------------------------------------------------------------
     554             : // SeqRegExpNode
     555             : 
     556             : class VisitMarker
     557             : {
     558             :   public:
     559         473 :     explicit VisitMarker(NodeInfo* info)
     560         473 :       : info_(info)
     561             :     {
     562         473 :         MOZ_ASSERT(!info->visited);
     563         473 :         info->visited = true;
     564         473 :     }
     565         946 :     ~VisitMarker() {
     566         473 :         info_->visited = false;
     567         473 :     }
     568             :   private:
     569             :     NodeInfo* info_;
     570             : };
     571             : 
     572             : bool
     573           0 : SeqRegExpNode::FillInBMInfo(int offset,
     574             :                             int budget,
     575             :                             BoyerMooreLookahead* bm,
     576             :                             bool not_at_start)
     577             : {
     578           0 :     if (!bm->CheckOverRecursed())
     579           0 :         return false;
     580           0 :     if (!on_success_->FillInBMInfo(offset, budget - 1, bm, not_at_start))
     581           0 :         return false;
     582           0 :     if (offset == 0)
     583           0 :         set_bm_info(not_at_start, bm);
     584           0 :     return true;
     585             : }
     586             : 
     587             : RegExpNode*
     588         249 : SeqRegExpNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
     589             : {
     590         249 :     if (info()->replacement_calculated)
     591          86 :         return replacement();
     592             : 
     593         163 :     if (depth < 0)
     594           0 :         return this;
     595             : 
     596         163 :     MOZ_ASSERT(!info()->visited);
     597         326 :     VisitMarker marker(info());
     598         163 :     return FilterSuccessor(depth - 1, ignore_case, unicode);
     599             : }
     600             : 
     601             : RegExpNode*
     602         330 : SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case, bool unicode)
     603             : {
     604         330 :     RegExpNode* next = on_success_->FilterLATIN1(depth - 1, ignore_case, unicode);
     605         330 :     if (next == nullptr)
     606           1 :         return set_replacement(nullptr);
     607             : 
     608         329 :     on_success_ = next;
     609         329 :     return set_replacement(this);
     610             : }
     611             : 
     612             : // -------------------------------------------------------------------
     613             : // ActionNode
     614             : 
     615             : int
     616         256 : ActionNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     617             : {
     618         256 :     if (budget <= 0)
     619          10 :         return 0;
     620         246 :     if (action_type_ == POSITIVE_SUBMATCH_SUCCESS)
     621          10 :         return 0;  // Rewinds input!
     622         472 :     return on_success()->EatsAtLeast(still_to_find,
     623             :                                      budget - 1,
     624         472 :                                      not_at_start);
     625             : }
     626             : 
     627             : bool
     628          23 : ActionNode::FillInBMInfo(int offset,
     629             :                          int budget,
     630             :                          BoyerMooreLookahead* bm,
     631             :                          bool not_at_start)
     632             : {
     633          23 :     if (!bm->CheckOverRecursed())
     634           0 :         return false;
     635             : 
     636          23 :     if (action_type_ == BEGIN_SUBMATCH) {
     637           0 :         bm->SetRest(offset);
     638          23 :     } else if (action_type_ != POSITIVE_SUBMATCH_SUCCESS) {
     639          23 :         if (!on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start))
     640           0 :             return false;
     641             :     }
     642          23 :     SaveBMInfo(bm, not_at_start, offset);
     643             : 
     644          23 :     return true;
     645             : }
     646             : 
     647             : /* static */ ActionNode*
     648          11 : ActionNode::SetRegister(int reg,
     649             :                         int val,
     650             :                         RegExpNode* on_success)
     651             : {
     652          11 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(SET_REGISTER, on_success);
     653          11 :     result->data_.u_store_register.reg = reg;
     654          11 :     result->data_.u_store_register.value = val;
     655          11 :     return result;
     656             : }
     657             : 
     658             : /* static */ ActionNode*
     659          11 : ActionNode::IncrementRegister(int reg, RegExpNode* on_success)
     660             : {
     661          11 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(INCREMENT_REGISTER, on_success);
     662          11 :     result->data_.u_increment_register.reg = reg;
     663          11 :     return result;
     664             : }
     665             : 
     666             : /* static */ ActionNode*
     667         128 : ActionNode::StorePosition(int reg, bool is_capture, RegExpNode* on_success)
     668             : {
     669         128 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(STORE_POSITION, on_success);
     670         128 :     result->data_.u_position_register.reg = reg;
     671         128 :     result->data_.u_position_register.is_capture = is_capture;
     672         128 :     return result;
     673             : }
     674             : 
     675             : /* static */ ActionNode*
     676           6 : ActionNode::ClearCaptures(Interval range, RegExpNode* on_success)
     677             : {
     678           6 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(CLEAR_CAPTURES, on_success);
     679           6 :     result->data_.u_clear_captures.range_from = range.from();
     680           6 :     result->data_.u_clear_captures.range_to = range.to();
     681           6 :     return result;
     682             : }
     683             : 
     684             : /* static */ ActionNode*
     685          12 : ActionNode::BeginSubmatch(int stack_pointer_reg, int position_reg, RegExpNode* on_success)
     686             : {
     687          12 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(BEGIN_SUBMATCH, on_success);
     688          12 :     result->data_.u_submatch.stack_pointer_register = stack_pointer_reg;
     689          12 :     result->data_.u_submatch.current_position_register = position_reg;
     690          12 :     return result;
     691             : }
     692             : 
     693             : /* static */ ActionNode*
     694          10 : ActionNode::PositiveSubmatchSuccess(int stack_pointer_reg,
     695             :                                     int restore_reg,
     696             :                                     int clear_capture_count,
     697             :                                     int clear_capture_from,
     698             :                                     RegExpNode* on_success)
     699             : {
     700          10 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(POSITIVE_SUBMATCH_SUCCESS, on_success);
     701          10 :     result->data_.u_submatch.stack_pointer_register = stack_pointer_reg;
     702          10 :     result->data_.u_submatch.current_position_register = restore_reg;
     703          10 :     result->data_.u_submatch.clear_register_count = clear_capture_count;
     704          10 :     result->data_.u_submatch.clear_register_from = clear_capture_from;
     705          10 :     return result;
     706             : }
     707             : 
     708             : /* static */ ActionNode*
     709           0 : ActionNode::EmptyMatchCheck(int start_register,
     710             :                             int repetition_register,
     711             :                             int repetition_limit,
     712             :                             RegExpNode* on_success)
     713             : {
     714           0 :     ActionNode* result = on_success->alloc()->newInfallible<ActionNode>(EMPTY_MATCH_CHECK, on_success);
     715           0 :     result->data_.u_empty_match_check.start_register = start_register;
     716           0 :     result->data_.u_empty_match_check.repetition_register = repetition_register;
     717           0 :     result->data_.u_empty_match_check.repetition_limit = repetition_limit;
     718           0 :     return result;
     719             : }
     720             : 
     721             : // -------------------------------------------------------------------
     722             : // TextNode
     723             : 
     724             : int
     725         183 : TextNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     726             : {
     727         183 :     int answer = Length();
     728         183 :     if (answer >= still_to_find)
     729          59 :         return answer;
     730         124 :     if (budget <= 0)
     731           0 :         return answer;
     732             : 
     733             :     // We are not at start after this node so we set the last argument to 'true'.
     734         248 :     return answer + on_success()->EatsAtLeast(still_to_find - answer,
     735             :                                               budget - 1,
     736         248 :                                               true);
     737             : }
     738             : 
     739             : int
     740          87 : TextNode::GreedyLoopTextLength()
     741             : {
     742          87 :     TextElement elm = elements()[elements().length() - 1];
     743          87 :     return elm.cp_offset() + elm.length();
     744             : }
     745             : 
     746             : RegExpNode*
     747         177 : TextNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
     748             : {
     749         177 :     if (info()->replacement_calculated)
     750           7 :         return replacement();
     751             : 
     752         170 :     if (depth < 0)
     753           0 :         return this;
     754             : 
     755         170 :     MOZ_ASSERT(!info()->visited);
     756         340 :     VisitMarker marker(info());
     757         170 :     int element_count = elements().length();
     758         337 :     for (int i = 0; i < element_count; i++) {
     759         170 :         TextElement elm = elements()[i];
     760         170 :         if (elm.text_type() == TextElement::ATOM) {
     761         103 :             CharacterVector& quarks = const_cast<CharacterVector&>(elm.atom()->data());
     762         675 :             for (size_t j = 0; j < quarks.length(); j++) {
     763         573 :                 uint16_t c = quarks[j];
     764         573 :                 if (c <= kMaxOneByteCharCode)
     765         572 :                     continue;
     766           1 :                 if (!ignore_case)
     767           4 :                     return set_replacement(nullptr);
     768             : 
     769             :                 // Here, we need to check for characters whose upper and lower cases
     770             :                 // are outside the Latin-1 range.
     771           0 :                 char16_t converted = ConvertNonLatin1ToLatin1(c, unicode);
     772           0 :                 if (converted == 0) {
     773             :                     // Character is outside Latin-1 completely
     774           0 :                     return set_replacement(nullptr);
     775             :                 }
     776             : 
     777             :                 // Convert quark to Latin-1 in place.
     778           0 :                 quarks[j] = converted;
     779             :             }
     780             :         } else {
     781          67 :             MOZ_ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
     782          67 :             RegExpCharacterClass* cc = elm.char_class();
     783             : 
     784          67 :             CharacterRangeVector& ranges = cc->ranges(alloc());
     785          67 :             if (!CharacterRange::IsCanonical(ranges))
     786          14 :                 CharacterRange::Canonicalize(ranges);
     787             : 
     788             :             // Now they are in order so we only need to look at the first.
     789          67 :             int range_count = ranges.length();
     790          67 :             if (cc->is_negated()) {
     791          20 :                 if (range_count != 0 &&
     792          10 :                     ranges[0].from() == 0 &&
     793           0 :                     ranges[0].to() >= kMaxOneByteCharCode)
     794             :                 {
     795             :                     // This will be handled in a later filter.
     796           0 :                     if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode))
     797           0 :                         continue;
     798           0 :                     return set_replacement(nullptr);
     799             :                 }
     800             :             } else {
     801         114 :                 if (range_count == 0 ||
     802          57 :                     ranges[0].from() > kMaxOneByteCharCode)
     803             :                 {
     804             :                     // This will be handled in a later filter.
     805           2 :                     if (ignore_case && RangesContainLatin1Equivalents(ranges, unicode))
     806           0 :                         continue;
     807           2 :                     return set_replacement(nullptr);
     808             :                 }
     809             :             }
     810             :         }
     811             :     }
     812         167 :     return FilterSuccessor(depth - 1, ignore_case, unicode);
     813             : }
     814             : 
     815             : void
     816         211 : TextNode::CalculateOffsets()
     817             : {
     818         211 :     int element_count = elements().length();
     819             : 
     820             :     // Set up the offsets of the elements relative to the start.  This is a fixed
     821             :     // quantity since a TextNode can only contain fixed-width things.
     822         211 :     int cp_offset = 0;
     823         423 :     for (int i = 0; i < element_count; i++) {
     824         212 :         TextElement& elm = elements()[i];
     825         212 :         elm.set_cp_offset(cp_offset);
     826         212 :         cp_offset += elm.length();
     827             :     }
     828         211 : }
     829             : 
     830          62 : void TextNode::MakeCaseIndependent(bool is_latin1, bool unicode)
     831             : {
     832          62 :     int element_count = elements().length();
     833         124 :     for (int i = 0; i < element_count; i++) {
     834          62 :         TextElement elm = elements()[i];
     835          62 :         if (elm.text_type() == TextElement::CHAR_CLASS) {
     836          14 :             RegExpCharacterClass* cc = elm.char_class();
     837             : 
     838             :             // None of the standard character classes is different in the case
     839             :             // independent case and it slows us down if we don't know that.
     840          14 :             if (cc->is_standard(alloc()))
     841           8 :                 continue;
     842             : 
     843             :             // Similarly, there's nothing to do for the character class
     844             :             // containing all characters except line terminators and surrogates.
     845             :             // This one is added by UnicodeEverythingAtom.
     846          10 :             CharacterRangeVector& ranges = cc->ranges(alloc());
     847          10 :             if (CompareInverseRanges(ranges,
     848             :                                      kLineTerminatorAndSurrogateRanges,
     849             :                                      kLineTerminatorAndSurrogateRangeCount))
     850             :             {
     851           0 :                 continue;
     852             :             }
     853             : 
     854          10 :             int range_count = ranges.length();
     855          35 :             for (int j = 0; j < range_count; j++)
     856          25 :                 ranges[j].AddCaseEquivalents(is_latin1, unicode, &ranges);
     857             :         }
     858             :     }
     859          62 : }
     860             : 
     861             : // -------------------------------------------------------------------
     862             : // AssertionNode
     863             : 
     864             : int
     865          47 : AssertionNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     866             : {
     867          47 :     if (budget <= 0)
     868           0 :         return 0;
     869             : 
     870             :     // If we know we are not at the start and we are asked "how many characters
     871             :     // will you match if you succeed?" then we can answer anything since false
     872             :     // implies false.  So lets just return the max answer (still_to_find) since
     873             :     // that won't prevent us from preloading a lot of characters for the other
     874             :     // branches in the node graph.
     875          47 :     if (assertion_type() == AT_START && not_at_start)
     876           0 :         return still_to_find;
     877             : 
     878          47 :     return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
     879             : }
     880             : 
     881             : bool
     882           3 : AssertionNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start)
     883             : {
     884           3 :     if (!bm->CheckOverRecursed())
     885           0 :         return false;
     886             : 
     887             :     // Match the behaviour of EatsAtLeast on this node.
     888           3 :     if (assertion_type() == AT_START && not_at_start)
     889           0 :         return true;
     890             : 
     891           3 :     if (!on_success()->FillInBMInfo(offset, budget - 1, bm, not_at_start))
     892           0 :         return false;
     893           3 :     SaveBMInfo(bm, not_at_start, offset);
     894           3 :     return true;
     895             : }
     896             : 
     897             : // -------------------------------------------------------------------
     898             : // BackReferenceNode
     899             : 
     900             : int
     901           0 : BackReferenceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     902             : {
     903           0 :     if (budget <= 0)
     904           0 :         return 0;
     905           0 :     return on_success()->EatsAtLeast(still_to_find, budget - 1, not_at_start);
     906             : }
     907             : 
     908             : bool
     909           0 : BackReferenceNode::FillInBMInfo(int offset, int budget, BoyerMooreLookahead* bm, bool not_at_start)
     910             : {
     911             :     // Working out the set of characters that a backreference can match is too
     912             :     // hard, so we just say that any character can match.
     913           0 :     bm->SetRest(offset);
     914           0 :     SaveBMInfo(bm, not_at_start, offset);
     915           0 :     return true;
     916             : }
     917             : 
     918             : // -------------------------------------------------------------------
     919             : // ChoiceNode
     920             : 
     921             : int
     922         180 : ChoiceNode::EatsAtLeastHelper(int still_to_find,
     923             :                               int budget,
     924             :                               RegExpNode* ignore_this_node,
     925             :                               bool not_at_start)
     926             : {
     927         180 :     if (budget <= 0)
     928           0 :         return 0;
     929             : 
     930         180 :     int min = 100;
     931         180 :     size_t choice_count = alternatives().length();
     932         180 :     budget = (budget - 1) / choice_count;
     933         490 :     for (size_t i = 0; i < choice_count; i++) {
     934         403 :         RegExpNode* node = alternatives()[i].node();
     935         403 :         if (node == ignore_this_node) continue;
     936             :         int node_eats_at_least =
     937         279 :             node->EatsAtLeast(still_to_find, budget, not_at_start);
     938         279 :         if (node_eats_at_least < min)
     939         221 :             min = node_eats_at_least;
     940         279 :         if (min == 0)
     941          93 :             return 0;
     942             :     }
     943          87 :     return min;
     944             : }
     945             : 
     946             : int
     947          52 : ChoiceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
     948             : {
     949          52 :     return EatsAtLeastHelper(still_to_find,
     950             :                              budget,
     951             :                              nullptr,
     952          52 :                              not_at_start);
     953             : }
     954             : 
     955             : void
     956          37 : ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
     957             :                                  RegExpCompiler* compiler,
     958             :                                  int characters_filled_in,
     959             :                                  bool not_at_start)
     960             : {
     961          37 :     not_at_start = (not_at_start || not_at_start_);
     962          37 :     int choice_count = alternatives().length();
     963          37 :     MOZ_ASSERT(choice_count > 0);
     964          74 :     alternatives()[0].node()->GetQuickCheckDetails(details,
     965             :                                                    compiler,
     966             :                                                    characters_filled_in,
     967          74 :                                                    not_at_start);
     968          83 :     for (int i = 1; i < choice_count; i++) {
     969          46 :         QuickCheckDetails new_details(details->characters());
     970          46 :         RegExpNode* node = alternatives()[i].node();
     971          46 :         node->GetQuickCheckDetails(&new_details, compiler,
     972             :                                    characters_filled_in,
     973          92 :                                    not_at_start);
     974             :         // Here we merge the quick match details of the two branches.
     975          46 :         details->Merge(&new_details, characters_filled_in);
     976             :     }
     977          37 : }
     978             : 
     979             : bool
     980           4 : ChoiceNode::FillInBMInfo(int offset,
     981             :                          int budget,
     982             :                          BoyerMooreLookahead* bm,
     983             :                          bool not_at_start)
     984             : {
     985           4 :     if (!bm->CheckOverRecursed())
     986           0 :         return false;
     987             : 
     988           4 :     const GuardedAlternativeVector& alts = alternatives();
     989           4 :     budget = (budget - 1) / alts.length();
     990          21 :     for (size_t i = 0; i < alts.length(); i++) {
     991          17 :         const GuardedAlternative& alt = alts[i];
     992          17 :         if (alt.guards() != nullptr && alt.guards()->length() != 0) {
     993           0 :             bm->SetRest(offset);  // Give up trying to fill in info.
     994           0 :             SaveBMInfo(bm, not_at_start, offset);
     995           0 :             return true;
     996             :         }
     997          17 :         if (!alt.node()->FillInBMInfo(offset, budget, bm, not_at_start))
     998           0 :             return false;
     999             :     }
    1000           4 :     SaveBMInfo(bm, not_at_start, offset);
    1001           4 :     return true;
    1002             : }
    1003             : 
    1004             : RegExpNode*
    1005          84 : ChoiceNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
    1006             : {
    1007          84 :     if (info()->replacement_calculated)
    1008          19 :         return replacement();
    1009          65 :     if (depth < 0)
    1010           0 :         return this;
    1011          65 :     if (info()->visited)
    1012           0 :         return this;
    1013         130 :     VisitMarker marker(info());
    1014          65 :     int choice_count = alternatives().length();
    1015             : 
    1016         232 :     for (int i = 0; i < choice_count; i++) {
    1017         173 :         const GuardedAlternative alternative = alternatives()[i];
    1018         173 :         if (alternative.guards() != nullptr && alternative.guards()->length() != 0) {
    1019           6 :             set_replacement(this);
    1020           6 :             return this;
    1021             :         }
    1022             :     }
    1023             : 
    1024          59 :     int surviving = 0;
    1025          59 :     RegExpNode* survivor = nullptr;
    1026         226 :     for (int i = 0; i < choice_count; i++) {
    1027         167 :         GuardedAlternative alternative = alternatives()[i];
    1028             :         RegExpNode* replacement =
    1029         167 :             alternative.node()->FilterLATIN1(depth - 1, ignore_case, unicode);
    1030         167 :         MOZ_ASSERT(replacement != this);  // No missing EMPTY_MATCH_CHECK.
    1031         167 :         if (replacement != nullptr) {
    1032         165 :             alternatives()[i].set_node(replacement);
    1033         165 :             surviving++;
    1034         165 :             survivor = replacement;
    1035             :         }
    1036             :     }
    1037          59 :     if (surviving < 2)
    1038           1 :         return set_replacement(survivor);
    1039             : 
    1040          58 :     set_replacement(this);
    1041          58 :     if (surviving == choice_count)
    1042          58 :         return this;
    1043             : 
    1044             :     // Only some of the nodes survived the filtering.  We need to rebuild the
    1045             :     // alternatives list.
    1046           0 :     GuardedAlternativeVector new_alternatives(*alloc());
    1047           0 :     new_alternatives.reserve(surviving);
    1048           0 :     for (int i = 0; i < choice_count; i++) {
    1049             :         RegExpNode* replacement =
    1050           0 :             alternatives()[i].node()->FilterLATIN1(depth - 1, ignore_case, unicode);
    1051           0 :         if (replacement != nullptr) {
    1052           0 :             alternatives()[i].set_node(replacement);
    1053           0 :             new_alternatives.append(alternatives()[i]);
    1054             :         }
    1055             :     }
    1056             : 
    1057           0 :     alternatives_ = Move(new_alternatives);
    1058           0 :     return this;
    1059             : }
    1060             : 
    1061             : // -------------------------------------------------------------------
    1062             : // NegativeLookaheadChoiceNode
    1063             : 
    1064             : bool
    1065           0 : NegativeLookaheadChoiceNode::FillInBMInfo(int offset,
    1066             :                                           int budget,
    1067             :                                           BoyerMooreLookahead* bm,
    1068             :                                           bool not_at_start)
    1069             : {
    1070           0 :     if (!bm->CheckOverRecursed())
    1071           0 :         return false;
    1072             : 
    1073           0 :     if (!alternatives()[1].node()->FillInBMInfo(offset, budget - 1, bm, not_at_start))
    1074           0 :         return false;
    1075           0 :     if (offset == 0)
    1076           0 :         set_bm_info(not_at_start, bm);
    1077           0 :     return true;
    1078             : }
    1079             : 
    1080             : int
    1081           6 : NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, int budget, bool not_at_start)
    1082             : {
    1083           6 :     if (budget <= 0)
    1084           0 :         return 0;
    1085             : 
    1086             :     // Alternative 0 is the negative lookahead, alternative 1 is what comes
    1087             :     // afterwards.
    1088           6 :     RegExpNode* node = alternatives()[1].node();
    1089           6 :     return node->EatsAtLeast(still_to_find, budget - 1, not_at_start);
    1090             : }
    1091             : 
    1092             : void
    1093           0 : NegativeLookaheadChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
    1094             :                                                   RegExpCompiler* compiler,
    1095             :                                                   int filled_in,
    1096             :                                                   bool not_at_start)
    1097             : {
    1098             :     // Alternative 0 is the negative lookahead, alternative 1 is what comes
    1099             :     // afterwards.
    1100           0 :     RegExpNode* node = alternatives()[1].node();
    1101           0 :     return node->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
    1102             : }
    1103             : 
    1104             : RegExpNode*
    1105           2 : NegativeLookaheadChoiceNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
    1106             : {
    1107           2 :     if (info()->replacement_calculated)
    1108           0 :         return replacement();
    1109           2 :     if (depth < 0)
    1110           0 :         return this;
    1111           2 :     if (info()->visited)
    1112           0 :         return this;
    1113             : 
    1114           4 :     VisitMarker marker(info());
    1115             : 
    1116             :     // Alternative 0 is the negative lookahead, alternative 1 is what comes
    1117             :     // afterwards.
    1118           2 :     RegExpNode* node = alternatives()[1].node();
    1119           2 :     RegExpNode* replacement = node->FilterLATIN1(depth - 1, ignore_case, unicode);
    1120             : 
    1121           2 :     if (replacement == nullptr)
    1122           0 :         return set_replacement(nullptr);
    1123           2 :     alternatives()[1].set_node(replacement);
    1124             : 
    1125           2 :     RegExpNode* neg_node = alternatives()[0].node();
    1126           2 :     RegExpNode* neg_replacement = neg_node->FilterLATIN1(depth - 1, ignore_case, unicode);
    1127             : 
    1128             :     // If the negative lookahead is always going to fail then
    1129             :     // we don't need to check it.
    1130           2 :     if (neg_replacement == nullptr)
    1131           0 :         return set_replacement(replacement);
    1132             : 
    1133           2 :     alternatives()[0].set_node(neg_replacement);
    1134           2 :     return set_replacement(this);
    1135             : }
    1136             : 
    1137             : // -------------------------------------------------------------------
    1138             : // LoopChoiceNode
    1139             : 
    1140             : void
    1141          16 : GuardedAlternative::AddGuard(LifoAlloc* alloc, Guard* guard)
    1142             : {
    1143          16 :     if (guards_ == nullptr)
    1144          16 :         guards_ = alloc->newInfallible<GuardVector>(*alloc);
    1145          16 :     guards_->append(guard);
    1146          16 : }
    1147             : 
    1148             : void
    1149          57 : LoopChoiceNode::AddLoopAlternative(GuardedAlternative alt)
    1150             : {
    1151          57 :     MOZ_ASSERT(loop_node_ == nullptr);
    1152          57 :     AddAlternative(alt);
    1153          57 :     loop_node_ = alt.node();
    1154          57 : }
    1155             : 
    1156             : 
    1157             : void
    1158          57 : LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt)
    1159             : {
    1160          57 :     MOZ_ASSERT(continue_node_ == nullptr);
    1161          57 :     AddAlternative(alt);
    1162          57 :     continue_node_ = alt.node();
    1163          57 : }
    1164             : 
    1165             : int
    1166         128 : LoopChoiceNode::EatsAtLeast(int still_to_find,  int budget, bool not_at_start)
    1167             : {
    1168         128 :     return EatsAtLeastHelper(still_to_find,
    1169             :                              budget - 1,
    1170             :                              loop_node_,
    1171         128 :                              not_at_start);
    1172             : }
    1173             : 
    1174             : void
    1175          35 : LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
    1176             :                                      RegExpCompiler* compiler,
    1177             :                                      int characters_filled_in,
    1178             :                                      bool not_at_start)
    1179             : {
    1180          35 :     if (body_can_be_zero_length_ || info()->visited)
    1181           4 :         return;
    1182          62 :     VisitMarker marker(info());
    1183          31 :     return ChoiceNode::GetQuickCheckDetails(details,
    1184             :                                             compiler,
    1185             :                                             characters_filled_in,
    1186          31 :                                             not_at_start);
    1187             : }
    1188             : 
    1189             : bool
    1190           2 : LoopChoiceNode::FillInBMInfo(int offset,
    1191             :                              int budget,
    1192             :                              BoyerMooreLookahead* bm,
    1193             :                              bool not_at_start)
    1194             : {
    1195           2 :     if (body_can_be_zero_length_ || budget <= 0) {
    1196           0 :         bm->SetRest(offset);
    1197           0 :         SaveBMInfo(bm, not_at_start, offset);
    1198           0 :         return true;
    1199             :     }
    1200           2 :     if (!ChoiceNode::FillInBMInfo(offset, budget - 1, bm, not_at_start))
    1201           0 :         return false;
    1202           2 :     SaveBMInfo(bm, not_at_start, offset);
    1203           2 :     return true;
    1204             : }
    1205             : 
    1206             : RegExpNode*
    1207          95 : LoopChoiceNode::FilterLATIN1(int depth, bool ignore_case, bool unicode)
    1208             : {
    1209          95 :     if (info()->replacement_calculated)
    1210          18 :         return replacement();
    1211          77 :     if (depth < 0)
    1212           0 :         return this;
    1213          77 :     if (info()->visited)
    1214          35 :         return this;
    1215             : 
    1216             :     {
    1217          83 :         VisitMarker marker(info());
    1218             : 
    1219             :         RegExpNode* continue_replacement =
    1220          42 :             continue_node_->FilterLATIN1(depth - 1, ignore_case, unicode);
    1221             : 
    1222             :         // If we can't continue after the loop then there is no sense in doing the
    1223             :         // loop.
    1224          42 :         if (continue_replacement == nullptr)
    1225           1 :             return set_replacement(nullptr);
    1226             :     }
    1227             : 
    1228          41 :     return ChoiceNode::FilterLATIN1(depth - 1, ignore_case, unicode);
    1229             : }
    1230             : 
    1231             : // -------------------------------------------------------------------
    1232             : // Analysis
    1233             : 
    1234             : void
    1235         687 : Analysis::EnsureAnalyzed(RegExpNode* that)
    1236             : {
    1237         687 :     if (!CheckRecursionLimit(cx)) {
    1238           0 :         failASCII("Stack overflow");
    1239           0 :         return;
    1240             :     }
    1241             : 
    1242         687 :     if (that->info()->been_analyzed || that->info()->being_analyzed)
    1243         130 :         return;
    1244         557 :     that->info()->being_analyzed = true;
    1245         557 :     that->Accept(this);
    1246         557 :     that->info()->being_analyzed = false;
    1247         557 :     that->info()->been_analyzed = true;
    1248             : }
    1249             : 
    1250             : void
    1251          42 : Analysis::VisitEnd(EndNode* that)
    1252             : {
    1253             :     // nothing to do
    1254          42 : }
    1255             : 
    1256             : void
    1257         211 : Analysis::VisitText(TextNode* that)
    1258             : {
    1259         211 :     if (ignore_case_)
    1260          62 :         that->MakeCaseIndependent(is_latin1_, unicode_);
    1261         211 :     EnsureAnalyzed(that->on_success());
    1262         211 :     if (!has_failed()) {
    1263         211 :         that->CalculateOffsets();
    1264             :     }
    1265         211 : }
    1266             : 
    1267             : void
    1268         176 : Analysis::VisitAction(ActionNode* that)
    1269             : {
    1270         176 :     RegExpNode* target = that->on_success();
    1271         176 :     EnsureAnalyzed(target);
    1272             : 
    1273         176 :     if (!has_failed()) {
    1274             :         // If the next node is interested in what it follows then this node
    1275             :         // has to be interested too so it can pass the information on.
    1276         176 :         that->info()->AddFromFollowing(target->info());
    1277             :     }
    1278         176 : }
    1279             : 
    1280             : void
    1281          28 : Analysis::VisitChoice(ChoiceNode* that)
    1282             : {
    1283          28 :     NodeInfo* info = that->info();
    1284             : 
    1285         132 :     for (size_t i = 0; i < that->alternatives().length(); i++) {
    1286         104 :         RegExpNode* node = that->alternatives()[i].node();
    1287         104 :         EnsureAnalyzed(node);
    1288         104 :         if (has_failed()) return;
    1289             : 
    1290             :         // Anything the following nodes need to know has to be known by
    1291             :         // this node also, so it can pass it on.
    1292         104 :         info->AddFromFollowing(node->info());
    1293             :     }
    1294             : }
    1295             : 
    1296             : void
    1297          56 : Analysis::VisitLoopChoice(LoopChoiceNode* that)
    1298             : {
    1299          56 :     NodeInfo* info = that->info();
    1300         168 :     for (size_t i = 0; i < that->alternatives().length(); i++) {
    1301         112 :         RegExpNode* node = that->alternatives()[i].node();
    1302         112 :         if (node != that->loop_node()) {
    1303          56 :             EnsureAnalyzed(node);
    1304          56 :             if (has_failed()) return;
    1305          56 :             info->AddFromFollowing(node->info());
    1306             :         }
    1307             :     }
    1308             : 
    1309             :     // Check the loop last since it may need the value of this node
    1310             :     // to get a correct result.
    1311          56 :     EnsureAnalyzed(that->loop_node());
    1312          56 :     if (!has_failed())
    1313          56 :         info->AddFromFollowing(that->loop_node()->info());
    1314             : }
    1315             : 
    1316             : void
    1317           0 : Analysis::VisitBackReference(BackReferenceNode* that)
    1318             : {
    1319           0 :     EnsureAnalyzed(that->on_success());
    1320           0 : }
    1321             : 
    1322             : void
    1323          44 : Analysis::VisitAssertion(AssertionNode* that)
    1324             : {
    1325          44 :     EnsureAnalyzed(that->on_success());
    1326          44 : }
    1327             : 
    1328             : // -------------------------------------------------------------------
    1329             : // Implementation of the Irregexp regular expression engine.
    1330             : //
    1331             : // The Irregexp regular expression engine is intended to be a complete
    1332             : // implementation of ECMAScript regular expressions.  It generates either
    1333             : // bytecodes or native code.
    1334             : 
    1335             : //   The Irregexp regexp engine is structured in three steps.
    1336             : //   1) The parser generates an abstract syntax tree.  See RegExpAST.cpp.
    1337             : //   2) From the AST a node network is created.  The nodes are all
    1338             : //      subclasses of RegExpNode.  The nodes represent states when
    1339             : //      executing a regular expression.  Several optimizations are
    1340             : //      performed on the node network.
    1341             : //   3) From the nodes we generate either byte codes or native code
    1342             : //      that can actually execute the regular expression (perform
    1343             : //      the search).  The code generation step is described in more
    1344             : //      detail below.
    1345             : 
    1346             : // Code generation.
    1347             : //
    1348             : //   The nodes are divided into four main categories.
    1349             : //   * Choice nodes
    1350             : //        These represent places where the regular expression can
    1351             : //        match in more than one way.  For example on entry to an
    1352             : //        alternation (foo|bar) or a repetition (*, +, ? or {}).
    1353             : //   * Action nodes
    1354             : //        These represent places where some action should be
    1355             : //        performed.  Examples include recording the current position
    1356             : //        in the input string to a register (in order to implement
    1357             : //        captures) or other actions on register for example in order
    1358             : //        to implement the counters needed for {} repetitions.
    1359             : //   * Matching nodes
    1360             : //        These attempt to match some element part of the input string.
    1361             : //        Examples of elements include character classes, plain strings
    1362             : //        or back references.
    1363             : //   * End nodes
    1364             : //        These are used to implement the actions required on finding
    1365             : //        a successful match or failing to find a match.
    1366             : //
    1367             : //   The code generated (whether as byte codes or native code) maintains
    1368             : //   some state as it runs.  This consists of the following elements:
    1369             : //
    1370             : //   * The capture registers.  Used for string captures.
    1371             : //   * Other registers.  Used for counters etc.
    1372             : //   * The current position.
    1373             : //   * The stack of backtracking information.  Used when a matching node
    1374             : //     fails to find a match and needs to try an alternative.
    1375             : //
    1376             : // Conceptual regular expression execution model:
    1377             : //
    1378             : //   There is a simple conceptual model of regular expression execution
    1379             : //   which will be presented first.  The actual code generated is a more
    1380             : //   efficient simulation of the simple conceptual model:
    1381             : //
    1382             : //   * Choice nodes are implemented as follows:
    1383             : //     For each choice except the last {
    1384             : //       push current position
    1385             : //       push backtrack code location
    1386             : //       <generate code to test for choice>
    1387             : //       backtrack code location:
    1388             : //       pop current position
    1389             : //     }
    1390             : //     <generate code to test for last choice>
    1391             : //
    1392             : //   * Actions nodes are generated as follows
    1393             : //     <push affected registers on backtrack stack>
    1394             : //     <generate code to perform action>
    1395             : //     push backtrack code location
    1396             : //     <generate code to test for following nodes>
    1397             : //     backtrack code location:
    1398             : //     <pop affected registers to restore their state>
    1399             : //     <pop backtrack location from stack and go to it>
    1400             : //
    1401             : //   * Matching nodes are generated as follows:
    1402             : //     if input string matches at current position
    1403             : //       update current position
    1404             : //       <generate code to test for following nodes>
    1405             : //     else
    1406             : //       <pop backtrack location from stack and go to it>
    1407             : //
    1408             : //   Thus it can be seen that the current position is saved and restored
    1409             : //   by the choice nodes, whereas the registers are saved and restored by
    1410             : //   by the action nodes that manipulate them.
    1411             : //
    1412             : //   The other interesting aspect of this model is that nodes are generated
    1413             : //   at the point where they are needed by a recursive call to Emit().  If
    1414             : //   the node has already been code generated then the Emit() call will
    1415             : //   generate a jump to the previously generated code instead.  In order to
    1416             : //   limit recursion it is possible for the Emit() function to put the node
    1417             : //   on a work list for later generation and instead generate a jump.  The
    1418             : //   destination of the jump is resolved later when the code is generated.
    1419             : //
    1420             : // Actual regular expression code generation.
    1421             : //
    1422             : //   Code generation is actually more complicated than the above.  In order
    1423             : //   to improve the efficiency of the generated code some optimizations are
    1424             : //   performed
    1425             : //
    1426             : //   * Choice nodes have 1-character lookahead.
    1427             : //     A choice node looks at the following character and eliminates some of
    1428             : //     the choices immediately based on that character.  This is not yet
    1429             : //     implemented.
    1430             : //   * Simple greedy loops store reduced backtracking information.
    1431             : //     A quantifier like /.*foo/m will greedily match the whole input.  It will
    1432             : //     then need to backtrack to a point where it can match "foo".  The naive
    1433             : //     implementation of this would push each character position onto the
    1434             : //     backtracking stack, then pop them off one by one.  This would use space
    1435             : //     proportional to the length of the input string.  However since the "."
    1436             : //     can only match in one way and always has a constant length (in this case
    1437             : //     of 1) it suffices to store the current position on the top of the stack
    1438             : //     once.  Matching now becomes merely incrementing the current position and
    1439             : //     backtracking becomes decrementing the current position and checking the
    1440             : //     result against the stored current position.  This is faster and saves
    1441             : //     space.
    1442             : //   * The current state is virtualized.
    1443             : //     This is used to defer expensive operations until it is clear that they
    1444             : //     are needed and to generate code for a node more than once, allowing
    1445             : //     specialized an efficient versions of the code to be created. This is
    1446             : //     explained in the section below.
    1447             : //
    1448             : // Execution state virtualization.
    1449             : //
    1450             : //   Instead of emitting code, nodes that manipulate the state can record their
    1451             : //   manipulation in an object called the Trace.  The Trace object can record a
    1452             : //   current position offset, an optional backtrack code location on the top of
    1453             : //   the virtualized backtrack stack and some register changes.  When a node is
    1454             : //   to be emitted it can flush the Trace or update it.  Flushing the Trace
    1455             : //   will emit code to bring the actual state into line with the virtual state.
    1456             : //   Avoiding flushing the state can postpone some work (e.g. updates of capture
    1457             : //   registers).  Postponing work can save time when executing the regular
    1458             : //   expression since it may be found that the work never has to be done as a
    1459             : //   failure to match can occur.  In addition it is much faster to jump to a
    1460             : //   known backtrack code location than it is to pop an unknown backtrack
    1461             : //   location from the stack and jump there.
    1462             : //
    1463             : //   The virtual state found in the Trace affects code generation.  For example
    1464             : //   the virtual state contains the difference between the actual current
    1465             : //   position and the virtual current position, and matching code needs to use
    1466             : //   this offset to attempt a match in the correct location of the input
    1467             : //   string.  Therefore code generated for a non-trivial trace is specialized
    1468             : //   to that trace.  The code generator therefore has the ability to generate
    1469             : //   code for each node several times.  In order to limit the size of the
    1470             : //   generated code there is an arbitrary limit on how many specialized sets of
    1471             : //   code may be generated for a given node.  If the limit is reached, the
    1472             : //   trace is flushed and a generic version of the code for a node is emitted.
    1473             : //   This is subsequently used for that node.  The code emitted for non-generic
    1474             : //   trace is not recorded in the node and so it cannot currently be reused in
    1475             : //   the event that code generation is requested for an identical trace.
    1476             : 
    1477             : /* static */ TextElement
    1478         156 : TextElement::Atom(RegExpAtom* atom)
    1479             : {
    1480         156 :     return TextElement(ATOM, atom);
    1481             : }
    1482             : 
    1483             : /* static */ TextElement
    1484         131 : TextElement::CharClass(RegExpCharacterClass* char_class)
    1485             : {
    1486         131 :     return TextElement(CHAR_CLASS, char_class);
    1487             : }
    1488             : 
    1489             : int
    1490        1021 : TextElement::length() const
    1491             : {
    1492        1021 :     switch (text_type()) {
    1493             :       case ATOM:
    1494         554 :         return atom()->length();
    1495             :       case CHAR_CLASS:
    1496         467 :         return 1;
    1497             :     }
    1498           0 :     MOZ_CRASH("Bad text type");
    1499             : }
    1500             : 
    1501             : class FrequencyCollator
    1502             : {
    1503             :   public:
    1504          40 :     FrequencyCollator() : total_samples_(0) {
    1505        5160 :         for (int i = 0; i < RegExpMacroAssembler::kTableSize; i++) {
    1506        5120 :             frequencies_[i] = CharacterFrequency(i);
    1507             :         }
    1508          40 :     }
    1509             : 
    1510        1012 :     void CountCharacter(int character) {
    1511        1012 :         int index = (character & RegExpMacroAssembler::kTableMask);
    1512        1012 :         frequencies_[index].Increment();
    1513        1012 :         total_samples_++;
    1514        1012 :     }
    1515             : 
    1516             :     // Does not measure in percent, but rather per-128 (the table size from the
    1517             :     // regexp macro assembler).
    1518         179 :     int Frequency(int in_character) {
    1519         179 :         MOZ_ASSERT((in_character & RegExpMacroAssembler::kTableMask) == in_character);
    1520         179 :         if (total_samples_ < 1) return 1;  // Division by zero.
    1521             :         int freq_in_per128 =
    1522         179 :             (frequencies_[in_character].counter() * 128) / total_samples_;
    1523         179 :         return freq_in_per128;
    1524             :     }
    1525             : 
    1526             :   private:
    1527             :     class CharacterFrequency {
    1528             :       public:
    1529        5120 :         CharacterFrequency() : counter_(0), character_(-1) { }
    1530        5120 :         explicit CharacterFrequency(int character)
    1531        5120 :           : counter_(0), character_(character)
    1532        5120 :         {}
    1533             : 
    1534        1012 :         void Increment() { counter_++; }
    1535         179 :         int counter() { return counter_; }
    1536             :         int character() { return character_; }
    1537             : 
    1538             :      private:
    1539             :         int counter_;
    1540             :         int character_;
    1541             :     };
    1542             : 
    1543             :   private:
    1544             :     CharacterFrequency frequencies_[RegExpMacroAssembler::kTableSize];
    1545             :     int total_samples_;
    1546             : };
    1547             : 
    1548          40 : class irregexp::RegExpCompiler
    1549             : {
    1550             :   public:
    1551             :     RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_count,
    1552             :                    bool ignore_case, bool is_latin1, bool match_only, bool unicode);
    1553             : 
    1554          35 :     int AllocateRegister() {
    1555          35 :         if (next_register_ >= RegExpMacroAssembler::kMaxRegister) {
    1556           0 :             reg_exp_too_big_ = true;
    1557           0 :             return next_register_;
    1558             :         }
    1559          35 :         return next_register_++;
    1560             :     }
    1561             : 
    1562             :     RegExpCode Assemble(JSContext* cx,
    1563             :                         RegExpMacroAssembler* assembler,
    1564             :                         RegExpNode* start,
    1565             :                         int capture_count);
    1566             : 
    1567           0 :     inline void AddWork(RegExpNode* node) {
    1568           0 :         AutoEnterOOMUnsafeRegion oomUnsafe;
    1569           0 :         if (!work_list_.append(node))
    1570           0 :             oomUnsafe.crash("AddWork");
    1571           0 :     }
    1572             : 
    1573             :     static const int kImplementationOffset = 0;
    1574             :     static const int kNumberOfRegistersOffset = 0;
    1575             :     static const int kCodeOffset = 1;
    1576             : 
    1577        3243 :     RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
    1578          40 :     EndNode* accept() { return accept_; }
    1579             : 
    1580             :     static const int kMaxRecursion = 100;
    1581         547 :     inline int recursion_depth() { return recursion_depth_; }
    1582         573 :     inline void IncrementRecursionDepth() { recursion_depth_++; }
    1583         573 :     inline void DecrementRecursionDepth() { recursion_depth_--; }
    1584             : 
    1585           0 :     void SetRegExpTooBig() { reg_exp_too_big_ = true; }
    1586          40 :     inline bool isRegExpTooBig() { return reg_exp_too_big_; }
    1587             : 
    1588        1350 :     inline bool ignore_case() { return ignore_case_; }
    1589        1989 :     inline bool latin1() { return latin1_; }
    1590         393 :     inline bool unicode() { return unicode_; }
    1591         219 :     FrequencyCollator* frequency_collator() { return &frequency_collator_; }
    1592             : 
    1593          80 :     int current_expansion_factor() { return current_expansion_factor_; }
    1594         160 :     void set_current_expansion_factor(int value) {
    1595         160 :         current_expansion_factor_ = value;
    1596         160 :     }
    1597             : 
    1598         300 :     JSContext* cx() const { return cx_; }
    1599        1008 :     LifoAlloc* alloc() const { return alloc_; }
    1600             : 
    1601             :     static const int kNoRegister = -1;
    1602             : 
    1603             :     bool CheckOverRecursed();
    1604             : 
    1605             :   private:
    1606             :     EndNode* accept_;
    1607             :     int next_register_;
    1608             :     Vector<RegExpNode*, 4, SystemAllocPolicy> work_list_;
    1609             :     int recursion_depth_;
    1610             :     RegExpMacroAssembler* macro_assembler_;
    1611             :     bool ignore_case_;
    1612             :     bool latin1_;
    1613             :     bool match_only_;
    1614             :     bool unicode_;
    1615             :     bool reg_exp_too_big_;
    1616             :     int current_expansion_factor_;
    1617             :     FrequencyCollator frequency_collator_;
    1618             :     JSContext* cx_;
    1619             :     LifoAlloc* alloc_;
    1620             : };
    1621             : 
    1622             : class RecursionCheck
    1623             : {
    1624             :   public:
    1625         573 :     explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
    1626         573 :         compiler->IncrementRecursionDepth();
    1627         573 :     }
    1628         573 :     ~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
    1629             : 
    1630             :   private:
    1631             :     RegExpCompiler* compiler_;
    1632             : };
    1633             : 
    1634             : static inline bool
    1635         377 : IsLatin1Equivalent(char16_t c, RegExpCompiler* compiler)
    1636             : {
    1637         377 :     if (c <= kMaxOneByteCharCode)
    1638         377 :         return true;
    1639             : 
    1640           0 :     if (!compiler->ignore_case())
    1641           0 :         return false;
    1642             : 
    1643           0 :     char16_t converted = ConvertNonLatin1ToLatin1(c, compiler->unicode());
    1644             : 
    1645           0 :     return converted != 0 && converted <= kMaxOneByteCharCode;
    1646             : }
    1647             : 
    1648             : // Attempts to compile the regexp using an Irregexp code generator.  Returns
    1649             : // a fixed array or a null handle depending on whether it succeeded.
    1650          40 : RegExpCompiler::RegExpCompiler(JSContext* cx, LifoAlloc* alloc, int capture_count,
    1651          40 :                                bool ignore_case, bool latin1, bool match_only, bool unicode)
    1652          40 :   : next_register_(2 * (capture_count + 1)),
    1653             :     recursion_depth_(0),
    1654             :     ignore_case_(ignore_case),
    1655             :     latin1_(latin1),
    1656             :     match_only_(match_only),
    1657             :     unicode_(unicode),
    1658             :     reg_exp_too_big_(false),
    1659             :     current_expansion_factor_(1),
    1660             :     frequency_collator_(),
    1661             :     cx_(cx),
    1662          40 :     alloc_(alloc)
    1663             : {
    1664          40 :     accept_ = alloc->newInfallible<EndNode>(alloc, EndNode::ACCEPT);
    1665          40 :     MOZ_ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
    1666          40 : }
    1667             : 
    1668             : RegExpCode
    1669          40 : RegExpCompiler::Assemble(JSContext* cx,
    1670             :                          RegExpMacroAssembler* assembler,
    1671             :                          RegExpNode* start,
    1672             :                          int capture_count)
    1673             : {
    1674          40 :     macro_assembler_ = assembler;
    1675          40 :     macro_assembler_->set_slow_safe(false);
    1676             : 
    1677             :     // The LifoAlloc used by the regexp compiler is infallible and is currently
    1678             :     // expected to crash on OOM. Thus we have to disable the assertions made to
    1679             :     // prevent us from allocating any new chunk in the LifoAlloc. This is needed
    1680             :     // because the jit::MacroAssembler turns these assertions on by default.
    1681          80 :     LifoAlloc::AutoFallibleScope fallibleAllocator(alloc());
    1682             : 
    1683          80 :     jit::Label fail;
    1684          40 :     macro_assembler_->PushBacktrack(&fail);
    1685          40 :     Trace new_trace;
    1686          40 :     start->Emit(this, &new_trace);
    1687          40 :     macro_assembler_->BindBacktrack(&fail);
    1688          40 :     macro_assembler_->Fail();
    1689             : 
    1690          40 :     while (!work_list_.empty())
    1691           0 :         work_list_.popCopy()->Emit(this, &new_trace);
    1692             : 
    1693          40 :     RegExpCode code = macro_assembler_->GenerateCode(cx, match_only_);
    1694          40 :     if (code.empty())
    1695           0 :         return RegExpCode();
    1696             : 
    1697          40 :     if (reg_exp_too_big_) {
    1698           0 :         code.destroy();
    1699           0 :         JS_ReportErrorASCII(cx, "regexp too big");
    1700           0 :         return RegExpCode();
    1701             :     }
    1702             : 
    1703          40 :     return code;
    1704             : }
    1705             : 
    1706             : template <typename CharT>
    1707             : static void
    1708          40 : SampleChars(FrequencyCollator* collator, const CharT* chars, size_t length)
    1709             : {
    1710             :     // Sample some characters from the middle of the string.
    1711             :     static const int kSampleSize = 128;
    1712             : 
    1713          40 :     int chars_sampled = 0;
    1714          40 :     int half_way = (int(length) - kSampleSize) / 2;
    1715        1052 :     for (size_t i = Max(0, half_way);
    1716        1052 :          i < length && chars_sampled < kSampleSize;
    1717             :          i++, chars_sampled++)
    1718             :     {
    1719        1012 :         collator->CountCharacter(chars[i]);
    1720             :     }
    1721          40 : }
    1722             : 
    1723             : static bool
    1724          40 : IsNativeRegExpEnabled(JSContext* cx)
    1725             : {
    1726             : #ifdef JS_CODEGEN_NONE
    1727             :     return false;
    1728             : #else
    1729          40 :     return cx->options().nativeRegExp();
    1730             : #endif
    1731             : }
    1732             : 
    1733             : RegExpCode
    1734          40 : irregexp::CompilePattern(JSContext* cx, HandleRegExpShared shared, RegExpCompileData* data,
    1735             :                          HandleLinearString sample, bool is_global, bool ignore_case,
    1736             :                          bool is_latin1, bool match_only, bool force_bytecode, bool sticky,
    1737             :                          bool unicode, RegExpShared::JitCodeTables& tables)
    1738             : {
    1739          40 :     if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
    1740           0 :         JS_ReportErrorASCII(cx, "regexp too big");
    1741           0 :         return RegExpCode();
    1742             :     }
    1743             : 
    1744          40 :     LifoAlloc& alloc = cx->tempLifoAlloc();
    1745             :     RegExpCompiler compiler(cx, &alloc, data->capture_count, ignore_case, is_latin1, match_only,
    1746          80 :                             unicode);
    1747             : 
    1748             :     // Sample some characters from the middle of the string.
    1749          40 :     if (sample->hasLatin1Chars()) {
    1750          70 :         JS::AutoCheckCannotGC nogc;
    1751          35 :         SampleChars(compiler.frequency_collator(), sample->latin1Chars(nogc), sample->length());
    1752             :     } else {
    1753          10 :         JS::AutoCheckCannotGC nogc;
    1754           5 :         SampleChars(compiler.frequency_collator(), sample->twoByteChars(nogc), sample->length());
    1755             :     }
    1756             : 
    1757             :     // Wrap the body of the regexp in capture #0.
    1758          40 :     RegExpNode* captured_body = RegExpCapture::ToNode(data->tree,
    1759             :                                                       0,
    1760             :                                                       &compiler,
    1761          80 :                                                       compiler.accept());
    1762          40 :     RegExpNode* node = captured_body;
    1763          40 :     bool is_end_anchored = data->tree->IsAnchoredAtEnd();
    1764          40 :     bool is_start_anchored = sticky || data->tree->IsAnchoredAtStart();
    1765          40 :     int max_length = data->tree->max_match();
    1766          40 :     if (!is_start_anchored) {
    1767             :         // Add a .*? at the beginning, outside the body capture, unless
    1768             :         // this expression is anchored at the beginning.
    1769             :         RegExpNode* loop_node =
    1770          38 :             RegExpQuantifier::ToNode(0,
    1771             :                                      RegExpTree::kInfinity,
    1772             :                                      false,
    1773          38 :                                      alloc.newInfallible<RegExpCharacterClass>('*'),
    1774             :                                      &compiler,
    1775             :                                      captured_body,
    1776          38 :                                      data->contains_anchor);
    1777             : 
    1778          19 :         if (data->contains_anchor) {
    1779             :             // Unroll loop once, to take care of the case that might start
    1780             :             // at the start of input.
    1781           0 :             ChoiceNode* first_step_node = alloc.newInfallible<ChoiceNode>(&alloc, 2);
    1782             :             RegExpNode* char_class =
    1783           0 :                 alloc.newInfallible<TextNode>(alloc.newInfallible<RegExpCharacterClass>('*'), loop_node);
    1784           0 :             first_step_node->AddAlternative(GuardedAlternative(captured_body));
    1785           0 :             first_step_node->AddAlternative(GuardedAlternative(char_class));
    1786           0 :             node = first_step_node;
    1787             :         } else {
    1788          19 :             node = loop_node;
    1789             :         }
    1790             :     }
    1791             : 
    1792          40 :     if (compiler.isRegExpTooBig()) {
    1793           0 :         MOZ_ASSERT(compiler.cx()->isExceptionPending()); // over recursed
    1794           0 :         JS_ReportErrorASCII(cx, "regexp too big");
    1795           0 :         return RegExpCode();
    1796             :     }
    1797             : 
    1798          40 :     if (is_latin1) {
    1799          35 :         node = node->FilterLATIN1(RegExpCompiler::kMaxRecursion, ignore_case, unicode);
    1800             :         // Do it again to propagate the new nodes to places where they were not
    1801             :         // put because they had not been calculated yet.
    1802          35 :         if (node != nullptr) {
    1803          34 :             node = node->FilterLATIN1(RegExpCompiler::kMaxRecursion, ignore_case, unicode);
    1804             :         }
    1805             :     }
    1806             : 
    1807          40 :     if (node == nullptr)
    1808           1 :         node = alloc.newInfallible<EndNode>(&alloc, EndNode::BACKTRACK);
    1809             : 
    1810          80 :     Analysis analysis(cx, ignore_case, is_latin1, unicode);
    1811          40 :     analysis.EnsureAnalyzed(node);
    1812          40 :     if (analysis.has_failed()) {
    1813           0 :         JS_ReportErrorASCII(cx, "%s", analysis.errorMessage());
    1814           0 :         return RegExpCode();
    1815             :     }
    1816             : 
    1817          80 :     Maybe<jit::JitContext> ctx;
    1818          80 :     Maybe<NativeRegExpMacroAssembler> native_assembler;
    1819          80 :     Maybe<InterpretedRegExpMacroAssembler> interpreted_assembler;
    1820             : 
    1821             :     RegExpMacroAssembler* assembler;
    1822         120 :     if (IsNativeRegExpEnabled(cx) &&
    1823          80 :         !force_bytecode &&
    1824         120 :         jit::CanLikelyAllocateMoreExecutableMemory() &&
    1825          40 :         shared->getSource()->length() < 32 * 1024)
    1826             :     {
    1827             :         NativeRegExpMacroAssembler::Mode mode =
    1828          40 :             is_latin1 ? NativeRegExpMacroAssembler::LATIN1
    1829          40 :                       : NativeRegExpMacroAssembler::CHAR16;
    1830             : 
    1831          40 :         ctx.emplace(cx, (jit::TempAllocator*) nullptr);
    1832          40 :         native_assembler.emplace(cx, &alloc, mode, (data->capture_count + 1) * 2, tables);
    1833          40 :         assembler = native_assembler.ptr();
    1834             :     } else {
    1835           0 :         interpreted_assembler.emplace(cx, &alloc, (data->capture_count + 1) * 2);
    1836           0 :         assembler = interpreted_assembler.ptr();
    1837             :     }
    1838             : 
    1839             :     // Inserted here, instead of in Assembler, because it depends on information
    1840             :     // in the AST that isn't replicated in the Node structure.
    1841             :     static const int kMaxBacksearchLimit = 1024;
    1842          49 :     if (is_end_anchored &&
    1843          11 :         !is_start_anchored &&
    1844             :         max_length < kMaxBacksearchLimit) {
    1845           0 :         assembler->SetCurrentPositionFromEnd(max_length);
    1846             :     }
    1847             : 
    1848          40 :     if (is_global) {
    1849           0 :         assembler->set_global_mode((data->tree->min_match() > 0)
    1850             :                                    ? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
    1851           0 :                                    : RegExpMacroAssembler::GLOBAL);
    1852             :     }
    1853             : 
    1854          40 :     return compiler.Assemble(cx, assembler, node, data->capture_count);
    1855             : }
    1856             : 
    1857             : template <typename CharT>
    1858             : RegExpRunStatus
    1859         245 : irregexp::ExecuteCode(JSContext* cx, jit::JitCode* codeBlock, const CharT* chars, size_t start,
    1860             :                       size_t length, MatchPairs* matches, size_t* endIndex)
    1861             : {
    1862             :     typedef void (*RegExpCodeSignature)(InputOutputData*);
    1863             : 
    1864         245 :     InputOutputData data(chars, chars + length, start, matches, endIndex);
    1865             : 
    1866         245 :     RegExpCodeSignature function = reinterpret_cast<RegExpCodeSignature>(codeBlock->raw());
    1867             : 
    1868             :     {
    1869         490 :         JS::AutoSuppressGCAnalysis nogc;
    1870         245 :         CALL_GENERATED_1(function, &data);
    1871             :     }
    1872             : 
    1873         245 :     return (RegExpRunStatus) data.result;
    1874             : }
    1875             : 
    1876             : template RegExpRunStatus
    1877             : irregexp::ExecuteCode(JSContext* cx, jit::JitCode* codeBlock, const Latin1Char* chars, size_t start,
    1878             :                       size_t length, MatchPairs* matches, size_t* endIndex);
    1879             : 
    1880             : template RegExpRunStatus
    1881             : irregexp::ExecuteCode(JSContext* cx, jit::JitCode* codeBlock, const char16_t* chars, size_t start,
    1882             :                       size_t length, MatchPairs* matches, size_t* endIndex);
    1883             : 
    1884             : // -------------------------------------------------------------------
    1885             : // Tree to graph conversion
    1886             : 
    1887             : RegExpNode*
    1888         122 : RegExpAtom::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1889             : {
    1890             :     TextElementVector* elms =
    1891         122 :         compiler->alloc()->newInfallible<TextElementVector>(*compiler->alloc());
    1892         122 :     elms->append(TextElement::Atom(this));
    1893         122 :     return compiler->alloc()->newInfallible<TextNode>(elms, on_success);
    1894             : }
    1895             : 
    1896             : RegExpNode*
    1897           3 : RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1898             : {
    1899           3 :     return compiler->alloc()->newInfallible<TextNode>(&elements_, on_success);
    1900             : }
    1901             : 
    1902             : RegExpNode*
    1903          90 : RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1904             : {
    1905          90 :     return compiler->alloc()->newInfallible<TextNode>(this, on_success);
    1906             : }
    1907             : 
    1908             : RegExpNode*
    1909          24 : RegExpDisjunction::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1910             : {
    1911          24 :     if (!compiler->CheckOverRecursed())
    1912           0 :         return on_success;
    1913             : 
    1914          24 :     const RegExpTreeVector& alternatives = this->alternatives();
    1915          24 :     size_t length = alternatives.length();
    1916          24 :     ChoiceNode* result = compiler->alloc()->newInfallible<ChoiceNode>(compiler->alloc(), length);
    1917         121 :     for (size_t i = 0; i < length; i++) {
    1918          97 :         GuardedAlternative alternative(alternatives[i]->ToNode(compiler, on_success));
    1919          97 :         result->AddAlternative(alternative);
    1920             :     }
    1921          24 :     return result;
    1922             : }
    1923             : 
    1924             : RegExpNode*
    1925          42 : RegExpQuantifier::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    1926             : {
    1927          84 :     return ToNode(min(),
    1928             :                   max(),
    1929          42 :                   is_greedy(),
    1930             :                   body(),
    1931             :                   compiler,
    1932          42 :                   on_success);
    1933             : }
    1934             : 
    1935             : // Scoped object to keep track of how much we unroll quantifier loops in the
    1936             : // regexp graph generator.
    1937             : class RegExpExpansionLimiter
    1938             : {
    1939             :   public:
    1940             :     static const int kMaxExpansionFactor = 6;
    1941          80 :     RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
    1942          80 :       : compiler_(compiler),
    1943          80 :         saved_expansion_factor_(compiler->current_expansion_factor()),
    1944         160 :         ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor)
    1945             :     {
    1946          80 :         MOZ_ASSERT(factor > 0);
    1947          80 :         if (ok_to_expand_) {
    1948          80 :             if (factor > kMaxExpansionFactor) {
    1949             :                 // Avoid integer overflow of the current expansion factor.
    1950           2 :                 ok_to_expand_ = false;
    1951           2 :                 compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
    1952             :             } else {
    1953          78 :                 int new_factor = saved_expansion_factor_ * factor;
    1954          78 :                 ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
    1955          78 :                 compiler->set_current_expansion_factor(new_factor);
    1956             :             }
    1957             :         }
    1958          80 :     }
    1959             : 
    1960         160 :     ~RegExpExpansionLimiter() {
    1961          80 :         compiler_->set_current_expansion_factor(saved_expansion_factor_);
    1962          80 :     }
    1963             : 
    1964          26 :     bool ok_to_expand() { return ok_to_expand_; }
    1965             : 
    1966             :   private:
    1967             :     RegExpCompiler* compiler_;
    1968             :     int saved_expansion_factor_;
    1969             :     bool ok_to_expand_;
    1970             : };
    1971             : 
    1972             : /* static */ RegExpNode*
    1973          84 : RegExpQuantifier::ToNode(int min,
    1974             :                          int max,
    1975             :                          bool is_greedy,
    1976             :                          RegExpTree* body,
    1977             :                          RegExpCompiler* compiler,
    1978             :                          RegExpNode* on_success,
    1979             :                          bool not_at_start /* = false */)
    1980             : {
    1981             :     // x{f, t} becomes this:
    1982             :     //
    1983             :     //             (r++)<-.
    1984             :     //               |     `
    1985             :     //               |     (x)
    1986             :     //               v     ^
    1987             :     //      (r=0)-->(?)---/ [if r < t]
    1988             :     //               |
    1989             :     //   [if r >= f] \----> ...
    1990             :     //
    1991             : 
    1992             :     // 15.10.2.5 RepeatMatcher algorithm.
    1993             :     // The parser has already eliminated the case where max is 0.  In the case
    1994             :     // where max_match is zero the parser has removed the quantifier if min was
    1995             :     // > 0 and removed the atom if min was 0.  See AddQuantifierToAtom.
    1996             : 
    1997             :     // If we know that we cannot match zero length then things are a little
    1998             :     // simpler since we don't need to make the special zero length match check
    1999             :     // from step 2.1.  If the min and max are small we can unroll a little in
    2000             :     // this case.
    2001             :     static const int kMaxUnrolledMinMatches = 3;  // Unroll (foo)+ and (foo){3,}
    2002             :     static const int kMaxUnrolledMaxMatches = 3;  // Unroll (foo)? and (foo){x,3}
    2003             : 
    2004          84 :     if (max == 0)
    2005           1 :         return on_success;  // This can happen due to recursion.
    2006             : 
    2007          83 :     if (!compiler->CheckOverRecursed())
    2008           0 :         return on_success;
    2009             : 
    2010          83 :     bool body_can_be_empty = (body->min_match() == 0);
    2011          83 :     int body_start_reg = RegExpCompiler::kNoRegister;
    2012          83 :     Interval capture_registers = body->CaptureRegisters();
    2013          83 :     bool needs_capture_clearing = !capture_registers.is_empty();
    2014          83 :     LifoAlloc* alloc = compiler->alloc();
    2015             : 
    2016          83 :     if (body_can_be_empty) {
    2017           0 :         body_start_reg = compiler->AllocateRegister();
    2018          83 :     } else if (!needs_capture_clearing) {
    2019             :         // Only unroll if there are no captures and the body can't be
    2020             :         // empty.
    2021             :         {
    2022         131 :             RegExpExpansionLimiter limiter(compiler, min + ((max != min) ? 1 : 0));
    2023          77 :             if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
    2024          23 :                 int new_max = (max == kInfinity) ? max : max - min;
    2025             :                 // Recurse once to get the loop or optional matches after the fixed
    2026             :                 // ones.
    2027          23 :                 RegExpNode* answer = ToNode(0, new_max, is_greedy, body, compiler, on_success, true);
    2028             :                 // Unroll the forced matches from 0 to min.  This can cause chains of
    2029             :                 // TextNodes (which the parser does not generate).  These should be
    2030             :                 // combined if it turns out they hinder good code generation.
    2031          47 :                 for (int i = 0; i < min; i++)
    2032          24 :                     answer = body->ToNode(compiler, answer);
    2033          23 :                 return answer;
    2034             :             }
    2035             :         }
    2036          54 :         if (max <= kMaxUnrolledMaxMatches && min == 0) {
    2037           3 :             MOZ_ASSERT(max > 0);  // Due to the 'if' above.
    2038           3 :             RegExpExpansionLimiter limiter(compiler, max);
    2039           3 :             if (limiter.ok_to_expand()) {
    2040             :                 // Unroll the optional matches up to max.
    2041           3 :                 RegExpNode* answer = on_success;
    2042           6 :                 for (int i = 0; i < max; i++) {
    2043           3 :                     ChoiceNode* alternation = alloc->newInfallible<ChoiceNode>(alloc, 2);
    2044           3 :                     if (is_greedy) {
    2045           3 :                         alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer)));
    2046           3 :                         alternation->AddAlternative(GuardedAlternative(on_success));
    2047             :                     } else {
    2048           0 :                         alternation->AddAlternative(GuardedAlternative(on_success));
    2049           0 :                         alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler, answer)));
    2050             :                     }
    2051           3 :                     answer = alternation;
    2052           3 :                     if (not_at_start) alternation->set_not_at_start();
    2053             :                 }
    2054           3 :                 return answer;
    2055             :             }
    2056             :         }
    2057             :     }
    2058          57 :     bool has_min = min > 0;
    2059          57 :     bool has_max = max < RegExpTree::kInfinity;
    2060          57 :     bool needs_counter = has_min || has_max;
    2061             :     int reg_ctr = needs_counter
    2062          57 :         ? compiler->AllocateRegister()
    2063          57 :         : RegExpCompiler::kNoRegister;
    2064          57 :     LoopChoiceNode* center = alloc->newInfallible<LoopChoiceNode>(alloc, body->min_match() == 0);
    2065          57 :     if (not_at_start)
    2066          22 :         center->set_not_at_start();
    2067             :     RegExpNode* loop_return = needs_counter
    2068          57 :         ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
    2069          57 :         : static_cast<RegExpNode*>(center);
    2070          57 :     if (body_can_be_empty) {
    2071             :         // If the body can be empty we need to check if it was and then
    2072             :         // backtrack.
    2073           0 :         loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
    2074             :                                                   reg_ctr,
    2075             :                                                   min,
    2076           0 :                                                   loop_return);
    2077             :     }
    2078          57 :     RegExpNode* body_node = body->ToNode(compiler, loop_return);
    2079          57 :     if (body_can_be_empty) {
    2080             :         // If the body can be empty we need to store the start position
    2081             :         // so we can bail out if it was empty.
    2082           0 :         body_node = ActionNode::StorePosition(body_start_reg, false, body_node);
    2083             :     }
    2084          57 :     if (needs_capture_clearing) {
    2085             :         // Before entering the body of this loop we need to clear captures.
    2086           6 :         body_node = ActionNode::ClearCaptures(capture_registers, body_node);
    2087             :     }
    2088          57 :     GuardedAlternative body_alt(body_node);
    2089          57 :     if (has_max) {
    2090          11 :         Guard* body_guard = alloc->newInfallible<Guard>(reg_ctr, Guard::LT, max);
    2091          11 :         body_alt.AddGuard(alloc, body_guard);
    2092             :     }
    2093          57 :     GuardedAlternative rest_alt(on_success);
    2094          57 :     if (has_min) {
    2095           5 :         Guard* rest_guard = alloc->newInfallible<Guard>(reg_ctr, Guard::GEQ, min);
    2096           5 :         rest_alt.AddGuard(alloc, rest_guard);
    2097             :     }
    2098          57 :     if (is_greedy) {
    2099          37 :         center->AddLoopAlternative(body_alt);
    2100          37 :         center->AddContinueAlternative(rest_alt);
    2101             :     } else {
    2102          20 :         center->AddContinueAlternative(rest_alt);
    2103          20 :         center->AddLoopAlternative(body_alt);
    2104             :     }
    2105          57 :     if (needs_counter)
    2106          11 :         return ActionNode::SetRegister(reg_ctr, 0, center);
    2107          46 :     return center;
    2108             : }
    2109             : 
    2110             : RegExpNode*
    2111          44 : RegExpAssertion::ToNode(RegExpCompiler* compiler,
    2112             :                         RegExpNode* on_success)
    2113             : {
    2114          44 :     NodeInfo info;
    2115          44 :     LifoAlloc* alloc = compiler->alloc();
    2116             : 
    2117          44 :     switch (assertion_type()) {
    2118             :       case START_OF_LINE:
    2119           0 :         return AssertionNode::AfterNewline(on_success);
    2120             :       case START_OF_INPUT:
    2121          21 :         return AssertionNode::AtStart(on_success);
    2122             :       case BOUNDARY:
    2123           2 :         return AssertionNode::AtBoundary(on_success);
    2124             :       case NON_BOUNDARY:
    2125           0 :         return AssertionNode::AtNonBoundary(on_success);
    2126             :       case END_OF_INPUT:
    2127          21 :         return AssertionNode::AtEnd(on_success);
    2128             :       case END_OF_LINE: {
    2129             :         // Compile $ in multiline regexps as an alternation with a positive
    2130             :         // lookahead in one side and an end-of-input on the other side.
    2131             :         // We need two registers for the lookahead.
    2132           0 :         int stack_pointer_register = compiler->AllocateRegister();
    2133           0 :         int position_register = compiler->AllocateRegister();
    2134             :         // The ChoiceNode to distinguish between a newline and end-of-input.
    2135           0 :         ChoiceNode* result = alloc->newInfallible<ChoiceNode>(alloc, 2);
    2136             :         // Create a newline atom.
    2137           0 :         CharacterRangeVector* newline_ranges = alloc->newInfallible<CharacterRangeVector>(*alloc);
    2138           0 :         CharacterRange::AddClassEscape(alloc, 'n', newline_ranges);
    2139           0 :         RegExpCharacterClass* newline_atom = alloc->newInfallible<RegExpCharacterClass>('n');
    2140             :         TextNode* newline_matcher =
    2141           0 :             alloc->newInfallible<TextNode>(newline_atom,
    2142           0 :                 ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
    2143             :                                                     position_register,
    2144             :                                                     0,  // No captures inside.
    2145             :                                                     -1,  // Ignored if no captures.
    2146           0 :                                                     on_success));
    2147             :         // Create an end-of-input matcher.
    2148             :         RegExpNode* end_of_line =
    2149           0 :             ActionNode::BeginSubmatch(stack_pointer_register, position_register, newline_matcher);
    2150             : 
    2151             :         // Add the two alternatives to the ChoiceNode.
    2152           0 :         GuardedAlternative eol_alternative(end_of_line);
    2153           0 :         result->AddAlternative(eol_alternative);
    2154           0 :         GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
    2155           0 :         result->AddAlternative(end_alternative);
    2156           0 :         return result;
    2157             :       }
    2158             :       case NOT_AFTER_LEAD_SURROGATE:
    2159           0 :         return AssertionNode::NotAfterLeadSurrogate(on_success);
    2160             :       case NOT_IN_SURROGATE_PAIR:
    2161           0 :         return AssertionNode::NotInSurrogatePair(on_success);
    2162             :       default:
    2163           0 :         MOZ_CRASH("Bad assertion type");
    2164             :     }
    2165             :     return on_success;
    2166             : }
    2167             : 
    2168             : RegExpNode*
    2169           0 : RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2170             : {
    2171           0 :     return compiler->alloc()->newInfallible<BackReferenceNode>(RegExpCapture::StartRegister(index()),
    2172           0 :                                                                RegExpCapture::EndRegister(index()),
    2173           0 :                                                                on_success);
    2174             : }
    2175             : 
    2176             : RegExpNode*
    2177           1 : RegExpEmpty::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2178             : {
    2179           1 :     return on_success;
    2180             : }
    2181             : 
    2182             : RegExpNode*
    2183          12 : RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2184             : {
    2185          12 :     int stack_pointer_register = compiler->AllocateRegister();
    2186          12 :     int position_register = compiler->AllocateRegister();
    2187             : 
    2188          12 :     const int registers_per_capture = 2;
    2189          12 :     const int register_of_first_capture = 2;
    2190          12 :     int register_count = capture_count_ * registers_per_capture;
    2191             :     int register_start =
    2192          12 :         register_of_first_capture + capture_from_ * registers_per_capture;
    2193             : 
    2194          12 :     if (!compiler->CheckOverRecursed())
    2195           0 :         return on_success;
    2196             : 
    2197          12 :     if (is_positive()) {
    2198             :         RegExpNode* bodyNode =
    2199          20 :             body()->ToNode(compiler,
    2200          10 :                            ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
    2201             :                                                                position_register,
    2202             :                                                                register_count,
    2203             :                                                                register_start,
    2204          20 :                                                                on_success));
    2205          10 :         return ActionNode::BeginSubmatch(stack_pointer_register,
    2206             :                                          position_register,
    2207          10 :                                          bodyNode);
    2208             :     }
    2209             : 
    2210             :     // We use a ChoiceNode for a negative lookahead because it has most of
    2211             :     // the characteristics we need.  It has the body of the lookahead as its
    2212             :     // first alternative and the expression after the lookahead of the second
    2213             :     // alternative.  If the first alternative succeeds then the
    2214             :     // NegativeSubmatchSuccess will unwind the stack including everything the
    2215             :     // choice node set up and backtrack.  If the first alternative fails then
    2216             :     // the second alternative is tried, which is exactly the desired result
    2217             :     // for a negative lookahead.  The NegativeLookaheadChoiceNode is a special
    2218             :     // ChoiceNode that knows to ignore the first exit when calculating quick
    2219             :     // checks.
    2220           2 :     LifoAlloc* alloc = compiler->alloc();
    2221             : 
    2222             :     RegExpNode* success =
    2223           2 :         alloc->newInfallible<NegativeSubmatchSuccess>(alloc,
    2224             :                                                       stack_pointer_register,
    2225             :                                                       position_register,
    2226             :                                                       register_count,
    2227           2 :                                                       register_start);
    2228           2 :     GuardedAlternative body_alt(body()->ToNode(compiler, success));
    2229             : 
    2230             :     ChoiceNode* choice_node =
    2231           2 :         alloc->newInfallible<NegativeLookaheadChoiceNode>(alloc, body_alt, GuardedAlternative(on_success));
    2232             : 
    2233           2 :     return ActionNode::BeginSubmatch(stack_pointer_register,
    2234             :                                      position_register,
    2235           2 :                                      choice_node);
    2236             : }
    2237             : 
    2238             : RegExpNode*
    2239          24 : RegExpCapture::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2240             : {
    2241          24 :     return ToNode(body(), index(), compiler, on_success);
    2242             : }
    2243             : 
    2244             : /* static */ RegExpNode*
    2245          64 : RegExpCapture::ToNode(RegExpTree* body,
    2246             :                       int index,
    2247             :                       RegExpCompiler* compiler,
    2248             :                       RegExpNode* on_success)
    2249             : {
    2250          64 :     if (!compiler->CheckOverRecursed())
    2251           0 :         return on_success;
    2252             : 
    2253          64 :     int start_reg = RegExpCapture::StartRegister(index);
    2254          64 :     int end_reg = RegExpCapture::EndRegister(index);
    2255          64 :     RegExpNode* store_end = ActionNode::StorePosition(end_reg, true, on_success);
    2256          64 :     RegExpNode* body_node = body->ToNode(compiler, store_end);
    2257          64 :     return ActionNode::StorePosition(start_reg, true, body_node);
    2258             : }
    2259             : 
    2260             : RegExpNode*
    2261          49 : RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* on_success)
    2262             : {
    2263          49 :     if (!compiler->CheckOverRecursed())
    2264           0 :         return on_success;
    2265             : 
    2266          49 :     const RegExpTreeVector& children = nodes();
    2267          49 :     RegExpNode* current = on_success;
    2268         203 :     for (int i = children.length() - 1; i >= 0; i--)
    2269         154 :         current = children[i]->ToNode(compiler, current);
    2270          49 :     return current;
    2271             : }
    2272             : 
    2273             : // -------------------------------------------------------------------
    2274             : // BoyerMooreLookahead
    2275             : 
    2276             : ContainedInLattice
    2277         404 : irregexp::AddRange(ContainedInLattice containment,
    2278             :                    const int* ranges,
    2279             :                    int ranges_length,
    2280             :                    Interval new_range)
    2281             : {
    2282         404 :     MOZ_ASSERT((ranges_length & 1) == 1);
    2283         404 :     MOZ_ASSERT(ranges[ranges_length - 1] == kMaxUtf16CodeUnit + 1);
    2284         404 :     if (containment == kLatticeUnknown) return containment;
    2285         393 :     bool inside = false;
    2286         393 :     int last = 0;
    2287        1279 :     for (int i = 0; i < ranges_length; inside = !inside, last = ranges[i], i++) {
    2288             :         // Consider the range from last to ranges[i].
    2289             :         // We haven't got to the new range yet.
    2290        1279 :         if (ranges[i] <= new_range.from())
    2291         886 :             continue;
    2292             : 
    2293             :         // New range is wholly inside last-ranges[i].  Note that new_range.to() is
    2294             :         // inclusive, but the values in ranges are not.
    2295         393 :         if (last <= new_range.from() && new_range.to() < ranges[i])
    2296         392 :             return Combine(containment, inside ? kLatticeIn : kLatticeOut);
    2297             : 
    2298           1 :         return kLatticeUnknown;
    2299             :     }
    2300           0 :     return containment;
    2301             : }
    2302             : 
    2303             : void
    2304          66 : BoyerMoorePositionInfo::Set(int character)
    2305             : {
    2306          66 :     SetInterval(Interval(character, character));
    2307          66 : }
    2308             : 
    2309             : void
    2310         101 : BoyerMoorePositionInfo::SetInterval(const Interval& interval)
    2311             : {
    2312         101 :     s_ = AddRange(s_, kSpaceRanges, kSpaceRangeCount, interval);
    2313         101 :     if (unicode_ignore_case_)
    2314           0 :         w_ = AddRange(w_, kIgnoreCaseWordRanges, kIgnoreCaseWordRangeCount, interval);
    2315             :     else
    2316         101 :         w_ = AddRange(w_, kWordRanges, kWordRangeCount, interval);
    2317         101 :     d_ = AddRange(d_, kDigitRanges, kDigitRangeCount, interval);
    2318         101 :     surrogate_ =
    2319         101 :         AddRange(surrogate_, kSurrogateRanges, kSurrogateRangeCount, interval);
    2320         101 :     if (interval.to() - interval.from() >= kMapSize - 1) {
    2321           0 :         if (map_count_ != kMapSize) {
    2322           0 :             map_count_ = kMapSize;
    2323           0 :             for (int i = 0; i < kMapSize; i++)
    2324           0 :                 map_[i] = true;
    2325             :         }
    2326           0 :         return;
    2327             :     }
    2328         384 :     for (int i = interval.from(); i <= interval.to(); i++) {
    2329         283 :         int mod_character = (i & kMask);
    2330         283 :         if (!map_[mod_character]) {
    2331         237 :             map_count_++;
    2332         237 :             map_[mod_character] = true;
    2333             :         }
    2334         283 :         if (map_count_ == kMapSize)
    2335           0 :             return;
    2336             :     }
    2337             : }
    2338             : 
    2339             : void
    2340           2 : BoyerMoorePositionInfo::SetAll()
    2341             : {
    2342           2 :     s_ = w_ = d_ = kLatticeUnknown;
    2343           2 :     if (map_count_ != kMapSize) {
    2344           2 :         map_count_ = kMapSize;
    2345         258 :         for (int i = 0; i < kMapSize; i++)
    2346         256 :             map_[i] = true;
    2347             :     }
    2348           2 : }
    2349             : 
    2350          19 : BoyerMooreLookahead::BoyerMooreLookahead(LifoAlloc* alloc, size_t length, RegExpCompiler* compiler)
    2351          19 :   : length_(length), compiler_(compiler), bitmaps_(*alloc)
    2352             : {
    2353          19 :     bool unicode_ignore_case = compiler->unicode() && compiler->ignore_case();
    2354          19 :     max_char_ = MaximumCharacter(compiler->latin1());
    2355             : 
    2356          19 :     bitmaps_.reserve(length);
    2357          67 :     for (size_t i = 0; i < length; i++)
    2358          48 :         bitmaps_.append(alloc->newInfallible<BoyerMoorePositionInfo>(alloc, unicode_ignore_case));
    2359          19 : }
    2360             : 
    2361             : // Find the longest range of lookahead that has the fewest number of different
    2362             : // characters that can occur at a given position.  Since we are optimizing two
    2363             : // different parameters at once this is a tradeoff.
    2364          18 : bool BoyerMooreLookahead::FindWorthwhileInterval(int* from, int* to) {
    2365          18 :   int biggest_points = 0;
    2366             :   // If more than 32 characters out of 128 can occur it is unlikely that we can
    2367             :   // be lucky enough to step forwards much of the time.
    2368          18 :   const int kMaxMax = 32;
    2369          72 :   for (int max_number_of_chars = 4;
    2370          72 :        max_number_of_chars < kMaxMax;
    2371          54 :        max_number_of_chars *= 2) {
    2372             :     biggest_points =
    2373          54 :         FindBestInterval(max_number_of_chars, biggest_points, from, to);
    2374             :   }
    2375          18 :   if (biggest_points == 0) return false;
    2376          15 :   return true;
    2377             : }
    2378             : 
    2379             : // Find the highest-points range between 0 and length_ where the character
    2380             : // information is not too vague.  'Too vague' means that there are more than
    2381             : // max_number_of_chars that can occur at this position.  Calculates the number
    2382             : // of points as the product of width-of-the-range and
    2383             : // probability-of-finding-one-of-the-characters, where the probability is
    2384             : // calculated using the frequency distribution of the sample subject string.
    2385             : int
    2386          54 : BoyerMooreLookahead::FindBestInterval(int max_number_of_chars, int old_biggest_points,
    2387             :                                       int* from, int* to)
    2388             : {
    2389          54 :     int biggest_points = old_biggest_points;
    2390             :     static const int kSize = RegExpMacroAssembler::kTableSize;
    2391          96 :     for (int i = 0; i < length_; ) {
    2392          90 :         while (i < length_ && Count(i) > max_number_of_chars) i++;
    2393          63 :         if (i == length_) break;
    2394          42 :         int remembered_from = i;
    2395             :         bool union_map[kSize];
    2396          42 :         for (int j = 0; j < kSize; j++) union_map[j] = false;
    2397         270 :         while (i < length_ && Count(i) <= max_number_of_chars) {
    2398         114 :             BoyerMoorePositionInfo* map = bitmaps_[i];
    2399         114 :             for (int j = 0; j < kSize; j++) union_map[j] |= map->at(j);
    2400         114 :             i++;
    2401             :         }
    2402          42 :         int frequency = 0;
    2403        5418 :         for (int j = 0; j < kSize; j++) {
    2404        5376 :             if (union_map[j]) {
    2405             :                 // Add 1 to the frequency to give a small per-character boost for
    2406             :                 // the cases where our sampling is not good enough and many
    2407             :                 // characters have a frequency of zero.  This means the frequency
    2408             :                 // can theoretically be up to 2*kSize though we treat it mostly as
    2409             :                 // a fraction of kSize.
    2410         179 :                 frequency += compiler_->frequency_collator()->Frequency(j) + 1;
    2411             :             }
    2412             :         }
    2413             :         // We use the probability of skipping times the distance we are skipping to
    2414             :         // judge the effectiveness of this.  Actually we have a cut-off:  By
    2415             :         // dividing by 2 we switch off the skipping if the probability of skipping
    2416             :         // is less than 50%.  This is because the multibyte mask-and-compare
    2417             :         // skipping in quickcheck is more likely to do well on this case.
    2418          99 :         bool in_quickcheck_range = ((i - remembered_from < 4) ||
    2419          60 :                                     (compiler_->latin1() ? remembered_from <= 4 : remembered_from <= 2));
    2420             :         // Called 'probability' but it is only a rough estimate and can actually
    2421             :         // be outside the 0-kSize range.
    2422          42 :         int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
    2423          42 :         int points = (i - remembered_from) * probability;
    2424          42 :         if (points > biggest_points) {
    2425          15 :             *from = remembered_from;
    2426          15 :             *to = i - 1;
    2427          15 :             biggest_points = points;
    2428             :         }
    2429             :     }
    2430          54 :     return biggest_points;
    2431             : }
    2432             : 
    2433             : // Take all the characters that will not prevent a successful match if they
    2434             : // occur in the subject string in the range between min_lookahead and
    2435             : // max_lookahead (inclusive) measured from the current position.  If the
    2436             : // character at max_lookahead offset is not one of these characters, then we
    2437             : // can safely skip forwards by the number of characters in the range.
    2438          10 : int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
    2439             :                                       int max_lookahead,
    2440             :                                       uint8_t* boolean_skip_table)
    2441             : {
    2442          10 :     const int kSize = RegExpMacroAssembler::kTableSize;
    2443             : 
    2444          10 :     const int kSkipArrayEntry = 0;
    2445          10 :     const int kDontSkipArrayEntry = 1;
    2446             : 
    2447        1290 :     for (int i = 0; i < kSize; i++)
    2448        1280 :         boolean_skip_table[i] = kSkipArrayEntry;
    2449          10 :     int skip = max_lookahead + 1 - min_lookahead;
    2450             : 
    2451          43 :     for (int i = max_lookahead; i >= min_lookahead; i--) {
    2452          33 :         BoyerMoorePositionInfo* map = bitmaps_[i];
    2453        4257 :         for (int j = 0; j < kSize; j++) {
    2454        4224 :             if (map->at(j))
    2455          62 :                 boolean_skip_table[j] = kDontSkipArrayEntry;
    2456             :         }
    2457             :     }
    2458             : 
    2459          10 :     return skip;
    2460             : }
    2461             : 
    2462             : // See comment on the implementation of GetSkipTable.
    2463             : bool
    2464          18 : BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm)
    2465             : {
    2466          18 :     const int kSize = RegExpMacroAssembler::kTableSize;
    2467             : 
    2468          18 :     int min_lookahead = 0;
    2469          18 :     int max_lookahead = 0;
    2470             : 
    2471          18 :     if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead))
    2472           3 :         return false;
    2473             : 
    2474          15 :     bool found_single_character = false;
    2475          15 :     int single_character = 0;
    2476          23 :     for (int i = max_lookahead; i >= min_lookahead; i--) {
    2477          18 :         BoyerMoorePositionInfo* map = bitmaps_[i];
    2478          28 :         if (map->map_count() > 1 ||
    2479           3 :             (found_single_character && map->map_count() != 0)) {
    2480          10 :             found_single_character = false;
    2481          10 :             break;
    2482             :         }
    2483         619 :         for (int j = 0; j < kSize; j++) {
    2484         619 :             if (map->at(j)) {
    2485           8 :                 found_single_character = true;
    2486           8 :                 single_character = j;
    2487           8 :                 break;
    2488             :             }
    2489             :         }
    2490             :     }
    2491             : 
    2492          15 :     int lookahead_width = max_lookahead + 1 - min_lookahead;
    2493             : 
    2494          15 :     if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
    2495             :         // The mask-compare can probably handle this better.
    2496           5 :         return false;
    2497             :     }
    2498             : 
    2499          10 :     if (found_single_character) {
    2500           0 :         jit::Label cont, again;
    2501           0 :         masm->Bind(&again);
    2502           0 :         masm->LoadCurrentCharacter(max_lookahead, &cont, true);
    2503           0 :         if (max_char_ > kSize) {
    2504           0 :             masm->CheckCharacterAfterAnd(single_character,
    2505             :                                          RegExpMacroAssembler::kTableMask,
    2506           0 :                                          &cont);
    2507             :         } else {
    2508           0 :             masm->CheckCharacter(single_character, &cont);
    2509             :         }
    2510           0 :         masm->AdvanceCurrentPosition(lookahead_width);
    2511           0 :         masm->JumpOrBacktrack(&again);
    2512           0 :         masm->Bind(&cont);
    2513           0 :         return true;
    2514             :     }
    2515             : 
    2516          20 :     RegExpShared::JitCodeTable boolean_skip_table;
    2517             :     {
    2518          20 :         AutoEnterOOMUnsafeRegion oomUnsafe;
    2519          10 :         boolean_skip_table.reset(static_cast<uint8_t*>(js_malloc(kSize)));
    2520          10 :         if (!boolean_skip_table)
    2521           0 :             oomUnsafe.crash("Table malloc");
    2522             :     }
    2523             : 
    2524          10 :     int skip_distance = GetSkipTable(min_lookahead, max_lookahead, boolean_skip_table.get());
    2525          10 :     MOZ_ASSERT(skip_distance != 0);
    2526             : 
    2527          20 :     jit::Label cont, again;
    2528          10 :     masm->Bind(&again);
    2529          10 :     masm->LoadCurrentCharacter(max_lookahead, &cont, true);
    2530          10 :     masm->CheckBitInTable(Move(boolean_skip_table), &cont);
    2531          10 :     masm->AdvanceCurrentPosition(skip_distance);
    2532          10 :     masm->JumpOrBacktrack(&again);
    2533          10 :     masm->Bind(&cont);
    2534             : 
    2535          10 :     return true;
    2536             : }
    2537             : 
    2538             : bool
    2539         300 : RegExpCompiler::CheckOverRecursed()
    2540             : {
    2541         300 :     if (!CheckRecursionLimit(cx())) {
    2542           0 :         SetRegExpTooBig();
    2543           0 :         return false;
    2544             :     }
    2545             : 
    2546         300 :     return true;
    2547             : }
    2548             : 
    2549             : bool
    2550          68 : BoyerMooreLookahead::CheckOverRecursed()
    2551             : {
    2552          68 :     return compiler()->CheckOverRecursed();
    2553             : }
    2554             : 
    2555             : // -------------------------------------------------------------------
    2556             : // Trace
    2557             : 
    2558         589 : bool Trace::DeferredAction::Mentions(int that)
    2559             : {
    2560         589 :     if (action_type() == ActionNode::CLEAR_CAPTURES) {
    2561          24 :         Interval range = static_cast<DeferredClearCaptures*>(this)->range();
    2562          24 :         return range.Contains(that);
    2563             :     }
    2564         565 :     return reg() == that;
    2565             : }
    2566             : 
    2567          40 : bool Trace::mentions_reg(int reg)
    2568             : {
    2569          40 :     for (DeferredAction* action = actions_; action != nullptr; action = action->next()) {
    2570           0 :         if (action->Mentions(reg))
    2571           0 :             return true;
    2572             :     }
    2573          40 :     return false;
    2574             : }
    2575             : 
    2576             : bool
    2577           0 : Trace::GetStoredPosition(int reg, int* cp_offset)
    2578             : {
    2579           0 :     MOZ_ASSERT(0 == *cp_offset);
    2580           0 :     for (DeferredAction* action = actions_; action != nullptr; action = action->next()) {
    2581           0 :         if (action->Mentions(reg)) {
    2582           0 :             if (action->action_type() == ActionNode::STORE_POSITION) {
    2583           0 :                 *cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
    2584           0 :                 return true;
    2585             :             }
    2586           0 :             return false;
    2587             :         }
    2588             :     }
    2589           0 :     return false;
    2590             : }
    2591             : 
    2592             : int
    2593         166 : Trace::FindAffectedRegisters(LifoAlloc* alloc, OutSet* affected_registers)
    2594             : {
    2595         166 :     int max_register = RegExpCompiler::kNoRegister;
    2596         439 :     for (DeferredAction* action = actions_; action != nullptr; action = action->next()) {
    2597         273 :         if (action->action_type() == ActionNode::CLEAR_CAPTURES) {
    2598           8 :             Interval range = static_cast<DeferredClearCaptures*>(action)->range();
    2599          28 :             for (int i = range.from(); i <= range.to(); i++)
    2600          20 :                 affected_registers->Set(alloc, i);
    2601           8 :             if (range.to() > max_register) max_register = range.to();
    2602             :         } else {
    2603         265 :             affected_registers->Set(alloc, action->reg());
    2604         265 :             if (action->reg() > max_register) max_register = action->reg();
    2605             :         }
    2606             :     }
    2607         166 :     return max_register;
    2608             : }
    2609             : 
    2610             : void
    2611         166 : Trace::RestoreAffectedRegisters(RegExpMacroAssembler* assembler,
    2612             :                                 int max_register,
    2613             :                                 OutSet& registers_to_pop,
    2614             :                                 OutSet& registers_to_clear)
    2615             : {
    2616         870 :     for (int reg = max_register; reg >= 0; reg--) {
    2617         704 :         if (registers_to_pop.Get(reg)) assembler->PopRegister(reg);
    2618         660 :         else if (registers_to_clear.Get(reg)) {
    2619          52 :             int clear_to = reg;
    2620          82 :             while (reg > 0 && registers_to_clear.Get(reg - 1))
    2621          15 :                 reg--;
    2622          52 :             assembler->ClearRegisters(reg, clear_to);
    2623             :         }
    2624             :     }
    2625         166 : }
    2626             : 
    2627             : enum DeferredActionUndoType {
    2628             :     DEFER_IGNORE,
    2629             :     DEFER_RESTORE,
    2630             :     DEFER_CLEAR
    2631             : };
    2632             : 
    2633             : void
    2634         166 : Trace::PerformDeferredActions(LifoAlloc* alloc,
    2635             :                               RegExpMacroAssembler* assembler,
    2636             :                               int max_register,
    2637             :                               OutSet& affected_registers,
    2638             :                               OutSet* registers_to_pop,
    2639             :                               OutSet* registers_to_clear)
    2640             : {
    2641             :     // The "+1" is to avoid a push_limit of zero if stack_limit_slack() is 1.
    2642         166 :     const int push_limit = (assembler->stack_limit_slack() + 1) / 2;
    2643             : 
    2644             :     // Count pushes performed to force a stack limit check occasionally.
    2645         166 :     int pushes = 0;
    2646             : 
    2647         885 :     for (int reg = 0; reg <= max_register; reg++) {
    2648         719 :         if (!affected_registers.Get(reg))
    2649         446 :             continue;
    2650             : 
    2651             :         // The chronologically first deferred action in the trace
    2652             :         // is used to infer the action needed to restore a register
    2653             :         // to its previous state (or not, if it's safe to ignore it).
    2654         273 :         DeferredActionUndoType undo_action = DEFER_IGNORE;
    2655             : 
    2656         273 :         int value = 0;
    2657         273 :         bool absolute = false;
    2658         273 :         bool clear = false;
    2659         273 :         int store_position = -1;
    2660             :         // This is a little tricky because we are scanning the actions in reverse
    2661             :         // historical order (newest first).
    2662         862 :         for (DeferredAction* action = actions_;
    2663         862 :              action != nullptr;
    2664             :              action = action->next()) {
    2665         589 :             if (action->Mentions(reg)) {
    2666         285 :                 switch (action->action_type()) {
    2667             :                   case ActionNode::SET_REGISTER: {
    2668             :                     Trace::DeferredSetRegister* psr =
    2669          11 :                         static_cast<Trace::DeferredSetRegister*>(action);
    2670          11 :                     if (!absolute) {
    2671          11 :                         value += psr->value();
    2672          11 :                         absolute = true;
    2673             :                     }
    2674             :                     // SET_REGISTER is currently only used for newly introduced loop
    2675             :                     // counters. They can have a significant previous value if they
    2676             :                     // occour in a loop. TODO(lrn): Propagate this information, so
    2677             :                     // we can set undo_action to IGNORE if we know there is no value to
    2678             :                     // restore.
    2679          11 :                     undo_action = DEFER_RESTORE;
    2680          11 :                     MOZ_ASSERT(store_position == -1);
    2681          11 :                     MOZ_ASSERT(!clear);
    2682          11 :                     break;
    2683             :                   }
    2684             :                   case ActionNode::INCREMENT_REGISTER:
    2685          13 :                     if (!absolute) {
    2686          13 :                         value++;
    2687             :                     }
    2688          13 :                     MOZ_ASSERT(store_position == -1);
    2689          13 :                     MOZ_ASSERT(!clear);
    2690          13 :                     undo_action = DEFER_RESTORE;
    2691          13 :                     break;
    2692             :                   case ActionNode::STORE_POSITION: {
    2693             :                     Trace::DeferredCapture* pc =
    2694         241 :                         static_cast<Trace::DeferredCapture*>(action);
    2695         241 :                     if (!clear && store_position == -1) {
    2696         241 :                         store_position = pc->cp_offset();
    2697             :                     }
    2698             : 
    2699             :                     // For captures we know that stores and clears alternate.
    2700             :                     // Other register, are never cleared, and if the occur
    2701             :                     // inside a loop, they might be assigned more than once.
    2702         241 :                     if (reg <= 1) {
    2703             :                         // Registers zero and one, aka "capture zero", is
    2704             :                         // always set correctly if we succeed. There is no
    2705             :                         // need to undo a setting on backtrack, because we
    2706             :                         // will set it again or fail.
    2707         162 :                         undo_action = DEFER_IGNORE;
    2708             :                     } else {
    2709          79 :                         undo_action = pc->is_capture() ? DEFER_CLEAR : DEFER_RESTORE;
    2710             :                     }
    2711         241 :                     MOZ_ASSERT(!absolute);
    2712         241 :                     MOZ_ASSERT(value == 0);
    2713         241 :                     break;
    2714             :                   }
    2715             :                   case ActionNode::CLEAR_CAPTURES: {
    2716             :                     // Since we're scanning in reverse order, if we've already
    2717             :                     // set the position we have to ignore historically earlier
    2718             :                     // clearing operations.
    2719          20 :                     if (store_position == -1) {
    2720           8 :                         clear = true;
    2721             :                     }
    2722          20 :                     undo_action = DEFER_RESTORE;
    2723          20 :                     MOZ_ASSERT(!absolute);
    2724          20 :                     MOZ_ASSERT(value == 0);
    2725          20 :                     break;
    2726             :                   }
    2727             :                   default:
    2728           0 :                     MOZ_CRASH("Bad action");
    2729             :                 }
    2730             :             }
    2731             :         }
    2732             :         // Prepare for the undo-action (e.g., push if it's going to be popped).
    2733         273 :         if (undo_action == DEFER_RESTORE) {
    2734          44 :             pushes++;
    2735             :             RegExpMacroAssembler::StackCheckFlag stack_check =
    2736          44 :                 RegExpMacroAssembler::kNoStackLimitCheck;
    2737          44 :             if (pushes == push_limit) {
    2738           0 :                 stack_check = RegExpMacroAssembler::kCheckStackLimit;
    2739           0 :                 pushes = 0;
    2740             :             }
    2741             : 
    2742          44 :             assembler->PushRegister(reg, stack_check);
    2743          44 :             registers_to_pop->Set(alloc, reg);
    2744         229 :         } else if (undo_action == DEFER_CLEAR) {
    2745          67 :             registers_to_clear->Set(alloc, reg);
    2746             :         }
    2747             :         // Perform the chronologically last action (or accumulated increment)
    2748             :         // for the register.
    2749         273 :         if (store_position != -1) {
    2750         241 :             assembler->WriteCurrentPositionToRegister(reg, store_position);
    2751          32 :         } else if (clear) {
    2752           8 :             assembler->ClearRegisters(reg, reg);
    2753          24 :         } else if (absolute) {
    2754          11 :             assembler->SetRegister(reg, value);
    2755          13 :         } else if (value != 0) {
    2756          13 :             assembler->AdvanceRegister(reg, value);
    2757             :         }
    2758             :     }
    2759         166 : }
    2760             : 
    2761             : // This is called as we come into a loop choice node and some other tricky
    2762             : // nodes.  It normalizes the state of the code generator to ensure we can
    2763             : // generate generic code.
    2764         196 : void Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor)
    2765             : {
    2766         196 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2767             : 
    2768         196 :     MOZ_ASSERT(!is_trivial());
    2769             : 
    2770         196 :     if (actions_ == nullptr && backtrack() == nullptr) {
    2771             :         // Here we just have some deferred cp advances to fix and we are back to
    2772             :         // a normal situation.  We may also have to forget some information gained
    2773             :         // through a quick check that was already performed.
    2774          30 :         if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
    2775             :         // Create a new trivial state and generate the node with that.
    2776          30 :         Trace new_state;
    2777          30 :         successor->Emit(compiler, &new_state);
    2778          30 :         return;
    2779             :     }
    2780             : 
    2781             :     // Generate deferred actions here along with code to undo them again.
    2782         166 :     OutSet affected_registers;
    2783             : 
    2784         166 :     if (backtrack() != nullptr) {
    2785             :         // Here we have a concrete backtrack location.  These are set up by choice
    2786             :         // nodes and so they indicate that we have a deferred save of the current
    2787             :         // position which we may need to emit here.
    2788         129 :         assembler->PushCurrentPosition();
    2789             :     }
    2790             : 
    2791         166 :     int max_register = FindAffectedRegisters(compiler->alloc(), &affected_registers);
    2792         166 :     OutSet registers_to_pop;
    2793         166 :     OutSet registers_to_clear;
    2794         166 :     PerformDeferredActions(compiler->alloc(),
    2795             :                            assembler,
    2796             :                            max_register,
    2797             :                            affected_registers,
    2798             :                            &registers_to_pop,
    2799         166 :                            &registers_to_clear);
    2800         166 :     if (cp_offset_ != 0)
    2801         129 :         assembler->AdvanceCurrentPosition(cp_offset_);
    2802             : 
    2803             :     // Create a new trivial state and generate the node with that.
    2804         332 :     jit::Label undo;
    2805         166 :     assembler->PushBacktrack(&undo);
    2806         166 :     Trace new_state;
    2807         166 :     successor->Emit(compiler, &new_state);
    2808             : 
    2809             :     // On backtrack we need to restore state.
    2810         166 :     assembler->BindBacktrack(&undo);
    2811             :     RestoreAffectedRegisters(assembler,
    2812             :                              max_register,
    2813             :                              registers_to_pop,
    2814         166 :                              registers_to_clear);
    2815         166 :     if (backtrack() == nullptr) {
    2816          37 :         assembler->Backtrack();
    2817             :     } else {
    2818         129 :         assembler->PopCurrentPosition();
    2819         129 :         assembler->JumpOrBacktrack(backtrack());
    2820             :     }
    2821             : }
    2822             : 
    2823             : void
    2824          58 : Trace::InvalidateCurrentCharacter()
    2825             : {
    2826          58 :     characters_preloaded_ = 0;
    2827          58 : }
    2828             : 
    2829             : void
    2830         232 : Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler)
    2831             : {
    2832         232 :     MOZ_ASSERT(by > 0);
    2833             :     // We don't have an instruction for shifting the current character register
    2834             :     // down or for using a shifted value for anything so lets just forget that
    2835             :     // we preloaded any characters into it.
    2836         232 :     characters_preloaded_ = 0;
    2837             :     // Adjust the offsets of the quick check performed information.  This
    2838             :     // information is used to find out what we already determined about the
    2839             :     // characters by means of mask and compare.
    2840         232 :     quick_check_performed_.Advance(by);
    2841         232 :     cp_offset_ += by;
    2842         232 :     if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
    2843           0 :         compiler->SetRegExpTooBig();
    2844           0 :         cp_offset_ = 0;
    2845             :     }
    2846         232 :     bound_checked_up_to_ = Max(0, bound_checked_up_to_ - by);
    2847         232 : }
    2848             : 
    2849             : void
    2850         396 : OutSet::Set(LifoAlloc* alloc, unsigned value)
    2851             : {
    2852         396 :     if (value < kFirstLimit) {
    2853         396 :         first_ |= (1 << value);
    2854             :     } else {
    2855           0 :         if (remaining_ == nullptr)
    2856           0 :             remaining_ = alloc->newInfallible<RemainingVector>(*alloc);
    2857             : 
    2858           0 :         for (size_t i = 0; i < remaining().length(); i++) {
    2859           0 :             if (remaining()[i] == value)
    2860           0 :                 return;
    2861             :         }
    2862           0 :         remaining().append(value);
    2863             :     }
    2864             : }
    2865             : 
    2866             : bool
    2867        2150 : OutSet::Get(unsigned value)
    2868             : {
    2869        2150 :     if (value < kFirstLimit)
    2870        2150 :         return (first_ & (1 << value)) != 0;
    2871           0 :     if (remaining_ == nullptr)
    2872           0 :         return false;
    2873           0 :     for (size_t i = 0; i < remaining().length(); i++) {
    2874           0 :         if (remaining()[i] == value)
    2875           0 :             return true;
    2876             :     }
    2877           0 :     return false;
    2878             : }
    2879             : 
    2880             : // -------------------------------------------------------------------
    2881             : // Graph emitting
    2882             : 
    2883             : void
    2884          12 : NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace)
    2885             : {
    2886          12 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2887             : 
    2888             :     // Omit flushing the trace. We discard the entire stack frame anyway.
    2889             : 
    2890          12 :     if (!label()->bound()) {
    2891             :         // We are completely independent of the trace, since we ignore it,
    2892             :         // so this code can be used as the generic version.
    2893           2 :         assembler->Bind(label());
    2894             :     }
    2895             : 
    2896             :     // Throw away everything on the backtrack stack since the start
    2897             :     // of the negative submatch and restore the character position.
    2898          12 :     assembler->ReadCurrentPositionFromRegister(current_position_register_);
    2899          12 :     assembler->ReadBacktrackStackPointerFromRegister(stack_pointer_register_);
    2900             : 
    2901          12 :     if (clear_capture_count_ > 0) {
    2902             :         // Clear any captures that might have been performed during the success
    2903             :         // of the body of the negative look-ahead.
    2904           0 :         int clear_capture_end = clear_capture_start_ + clear_capture_count_ - 1;
    2905           0 :         assembler->ClearRegisters(clear_capture_start_, clear_capture_end);
    2906             :     }
    2907             : 
    2908             :     // Now that we have unwound the stack we find at the top of the stack the
    2909             :     // backtrack that the BeginSubmatch node got.
    2910          12 :     assembler->Backtrack();
    2911          12 : }
    2912             : 
    2913             : void
    2914         133 : EndNode::Emit(RegExpCompiler* compiler, Trace* trace)
    2915             : {
    2916         133 :     if (!trace->is_trivial()) {
    2917          66 :         trace->Flush(compiler, this);
    2918          66 :         return;
    2919             :     }
    2920          67 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2921          67 :     if (!label()->bound()) {
    2922          40 :         assembler->Bind(label());
    2923             :     }
    2924          67 :     switch (action_) {
    2925             :     case ACCEPT:
    2926          66 :         assembler->Succeed();
    2927          66 :         return;
    2928             :     case BACKTRACK:
    2929           1 :         assembler->JumpOrBacktrack(trace->backtrack());
    2930           1 :         return;
    2931             :     case NEGATIVE_SUBMATCH_SUCCESS:
    2932             :         // This case is handled in a different virtual method.
    2933           0 :         MOZ_CRASH("Bad action: NEGATIVE_SUBMATCH_SUCCESS");
    2934             :     }
    2935           0 :     MOZ_CRASH("Bad action");
    2936             : }
    2937             : 
    2938             : // Emit the code to check for a ^ in multiline mode (1-character lookbehind
    2939             : // that matches newline or the start of input).
    2940             : static void
    2941           0 : EmitHat(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
    2942             : {
    2943           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2944             : 
    2945             :     // We will be loading the previous character into the current character
    2946             :     // register.
    2947           0 :     Trace new_trace(*trace);
    2948           0 :     new_trace.InvalidateCurrentCharacter();
    2949             : 
    2950           0 :     jit::Label ok;
    2951           0 :     if (new_trace.cp_offset() == 0) {
    2952             :         // The start of input counts as a newline in this context, so skip to
    2953             :         // ok if we are at the start.
    2954           0 :         assembler->CheckAtStart(&ok);
    2955             :     }
    2956             : 
    2957             :     // We already checked that we are not at the start of input so it must be
    2958             :     // OK to load the previous character.
    2959           0 :     assembler->LoadCurrentCharacter(new_trace.cp_offset() -1, new_trace.backtrack(), false);
    2960             : 
    2961           0 :     if (!assembler->CheckSpecialCharacterClass('n', new_trace.backtrack())) {
    2962             :         // Newline means \n, \r, 0x2028 or 0x2029.
    2963           0 :         if (!compiler->latin1())
    2964           0 :             assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
    2965           0 :         assembler->CheckCharacter('\n', &ok);
    2966           0 :         assembler->CheckNotCharacter('\r', new_trace.backtrack());
    2967             :     }
    2968           0 :     assembler->Bind(&ok);
    2969           0 :     on_success->Emit(compiler, &new_trace);
    2970           0 : }
    2971             : 
    2972             : // Assert that the next character cannot be a part of a surrogate pair.
    2973             : static void
    2974           0 : EmitNotAfterLeadSurrogate(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
    2975             : {
    2976           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    2977             : 
    2978             :     // We will be loading the previous character into the current character
    2979             :     // register.
    2980           0 :     Trace new_trace(*trace);
    2981           0 :     new_trace.InvalidateCurrentCharacter();
    2982             : 
    2983           0 :     jit::Label ok;
    2984           0 :     if (new_trace.cp_offset() == 0)
    2985           0 :         assembler->CheckAtStart(&ok);
    2986             : 
    2987             :     // We already checked that we are not at the start of input so it must be
    2988             :     // OK to load the previous character.
    2989           0 :     assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false);
    2990           0 :     assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax,
    2991           0 :                                      new_trace.backtrack());
    2992             : 
    2993           0 :     assembler->Bind(&ok);
    2994           0 :     on_success->Emit(compiler, &new_trace);
    2995           0 : }
    2996             : 
    2997             : // Assert that the next character is not a trail surrogate that has a
    2998             : // corresponding lead surrogate.
    2999             : static void
    3000           0 : EmitNotInSurrogatePair(RegExpCompiler* compiler, RegExpNode* on_success, Trace* trace)
    3001             : {
    3002           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3003             : 
    3004           0 :     jit::Label ok;
    3005           0 :     assembler->CheckPosition(trace->cp_offset(), &ok);
    3006             : 
    3007             :     // We will be loading the next and previous characters into the current
    3008             :     // character register.
    3009           0 :     Trace new_trace(*trace);
    3010           0 :     new_trace.InvalidateCurrentCharacter();
    3011             : 
    3012           0 :     if (new_trace.cp_offset() == 0)
    3013           0 :         assembler->CheckAtStart(&ok);
    3014             : 
    3015             :     // First check if next character is a trail surrogate.
    3016           0 :     assembler->LoadCurrentCharacter(new_trace.cp_offset(), new_trace.backtrack(), false);
    3017             :     assembler->CheckCharacterNotInRange(unicode::TrailSurrogateMin, unicode::TrailSurrogateMax,
    3018           0 :                                         &ok);
    3019             : 
    3020             :     // Next check if previous character is a lead surrogate.
    3021             :     // We already checked that we are not at the start of input so it must be
    3022             :     // OK to load the previous character.
    3023           0 :     assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, new_trace.backtrack(), false);
    3024           0 :     assembler->CheckCharacterInRange(unicode::LeadSurrogateMin, unicode::LeadSurrogateMax,
    3025           0 :                                      new_trace.backtrack());
    3026             : 
    3027           0 :     assembler->Bind(&ok);
    3028           0 :     on_success->Emit(compiler, &new_trace);
    3029           0 : }
    3030             : 
    3031             : // Check for [0-9A-Z_a-z].
    3032             : static void
    3033           2 : EmitWordCheck(RegExpMacroAssembler* assembler,
    3034             :               jit::Label* word, jit::Label* non_word, bool fall_through_on_word,
    3035             :               bool unicode_ignore_case)
    3036             : {
    3037           4 :     if (!unicode_ignore_case &&
    3038           2 :         assembler->CheckSpecialCharacterClass(fall_through_on_word ? 'w' : 'W',
    3039           2 :                                               fall_through_on_word ? non_word : word))
    3040             :     {
    3041             :         // Optimized implementation available.
    3042           2 :         return;
    3043             :     }
    3044             : 
    3045           0 :     if (unicode_ignore_case) {
    3046           0 :         assembler->CheckCharacter(0x017F, word);
    3047           0 :         assembler->CheckCharacter(0x212A, word);
    3048             :     }
    3049             : 
    3050           0 :     assembler->CheckCharacterGT('z', non_word);
    3051           0 :     assembler->CheckCharacterLT('0', non_word);
    3052           0 :     assembler->CheckCharacterGT('a' - 1, word);
    3053           0 :     assembler->CheckCharacterLT('9' + 1, word);
    3054           0 :     assembler->CheckCharacterLT('A', non_word);
    3055           0 :     assembler->CheckCharacterLT('Z' + 1, word);
    3056             : 
    3057           0 :     if (fall_through_on_word)
    3058           0 :         assembler->CheckNotCharacter('_', non_word);
    3059             :     else
    3060           0 :         assembler->CheckCharacter('_', word);
    3061             : }
    3062             : 
    3063             : // Emit the code to handle \b and \B (word-boundary or non-word-boundary).
    3064             : void
    3065           2 : AssertionNode::EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace)
    3066             : {
    3067           2 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3068           2 :     Trace::TriBool next_is_word_character = Trace::UNKNOWN;
    3069           2 :     bool not_at_start = (trace->at_start() == Trace::FALSE_VALUE);
    3070           2 :     BoyerMooreLookahead* lookahead = bm_info(not_at_start);
    3071           2 :     if (lookahead == nullptr) {
    3072             :         int eats_at_least =
    3073           1 :             Min(kMaxLookaheadForBoyerMoore, EatsAtLeast(kMaxLookaheadForBoyerMoore,
    3074             :                                                         kRecursionBudget,
    3075           2 :                                                         not_at_start));
    3076           1 :         if (eats_at_least >= 1) {
    3077             :             BoyerMooreLookahead* bm =
    3078           1 :                 alloc()->newInfallible<BoyerMooreLookahead>(alloc(), eats_at_least, compiler);
    3079           1 :             FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
    3080           1 :             if (bm->at(0)->is_non_word())
    3081           1 :                 next_is_word_character = Trace::FALSE_VALUE;
    3082           1 :             if (bm->at(0)->is_word()) next_is_word_character = Trace::TRUE_VALUE;
    3083             :         }
    3084             :     } else {
    3085           1 :         if (lookahead->at(0)->is_non_word())
    3086           0 :             next_is_word_character = Trace::FALSE_VALUE;
    3087           1 :         if (lookahead->at(0)->is_word())
    3088           1 :             next_is_word_character = Trace::TRUE_VALUE;
    3089             :     }
    3090           2 :     bool at_boundary = (assertion_type_ == AssertionNode::AT_BOUNDARY);
    3091           2 :     if (next_is_word_character == Trace::UNKNOWN) {
    3092           0 :         jit::Label before_non_word;
    3093           0 :         jit::Label before_word;
    3094           0 :         if (trace->characters_preloaded() != 1) {
    3095           0 :             assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
    3096             :         }
    3097             :         // Fall through on non-word.
    3098           0 :         EmitWordCheck(assembler, &before_word, &before_non_word, false,
    3099           0 :                       compiler->unicode() && compiler->ignore_case());
    3100             :         // Next character is not a word character.
    3101           0 :         assembler->Bind(&before_non_word);
    3102           0 :         jit::Label ok;
    3103           0 :         BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
    3104           0 :         assembler->JumpOrBacktrack(&ok);
    3105             : 
    3106           0 :         assembler->Bind(&before_word);
    3107           0 :         BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
    3108           0 :         assembler->Bind(&ok);
    3109           2 :     } else if (next_is_word_character == Trace::TRUE_VALUE) {
    3110           1 :         BacktrackIfPrevious(compiler, trace, at_boundary ? kIsWord : kIsNonWord);
    3111             :     } else {
    3112           1 :         MOZ_ASSERT(next_is_word_character == Trace::FALSE_VALUE);
    3113           1 :         BacktrackIfPrevious(compiler, trace, at_boundary ? kIsNonWord : kIsWord);
    3114             :     }
    3115           2 : }
    3116             : 
    3117             : void
    3118           2 : AssertionNode::BacktrackIfPrevious(RegExpCompiler* compiler,
    3119             :                                    Trace* trace,
    3120             :                                    AssertionNode::IfPrevious backtrack_if_previous)
    3121             : {
    3122           2 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3123           2 :     Trace new_trace(*trace);
    3124           2 :     new_trace.InvalidateCurrentCharacter();
    3125             : 
    3126           4 :     jit::Label fall_through, dummy;
    3127             : 
    3128           2 :     jit::Label* non_word = backtrack_if_previous == kIsNonWord ? new_trace.backtrack() : &fall_through;
    3129           2 :     jit::Label* word     = backtrack_if_previous == kIsNonWord ? &fall_through : new_trace.backtrack();
    3130             : 
    3131           2 :     if (new_trace.cp_offset() == 0) {
    3132             :         // The start of input counts as a non-word character, so the question is
    3133             :         // decided if we are at the start.
    3134           1 :         assembler->CheckAtStart(non_word);
    3135             :     }
    3136             :     // We already checked that we are not at the start of input so it must be
    3137             :     // OK to load the previous character.
    3138           2 :     assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1, &dummy, false);
    3139           2 :     EmitWordCheck(assembler, word, non_word, backtrack_if_previous == kIsNonWord,
    3140           4 :                   compiler->unicode() && compiler->ignore_case());
    3141             : 
    3142           2 :     assembler->Bind(&fall_through);
    3143           2 :     on_success()->Emit(compiler, &new_trace);
    3144           2 : }
    3145             : 
    3146             : void
    3147           2 : AssertionNode::GetQuickCheckDetails(QuickCheckDetails* details,
    3148             :                                     RegExpCompiler* compiler,
    3149             :                                     int filled_in,
    3150             :                                     bool not_at_start)
    3151             : {
    3152           2 :     if (assertion_type_ == AT_START && not_at_start) {
    3153           0 :         details->set_cannot_match();
    3154           0 :         return;
    3155             :     }
    3156           2 :     return on_success()->GetQuickCheckDetails(details, compiler, filled_in, not_at_start);
    3157             : }
    3158             : 
    3159             : void
    3160          54 : AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace)
    3161             : {
    3162          54 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3163          54 :     switch (assertion_type_) {
    3164             :       case AT_END: {
    3165          62 :         jit::Label ok;
    3166          31 :         assembler->CheckPosition(trace->cp_offset(), &ok);
    3167          31 :         assembler->JumpOrBacktrack(trace->backtrack());
    3168          31 :         assembler->Bind(&ok);
    3169          31 :         break;
    3170             :       }
    3171             :       case AT_START: {
    3172          21 :         if (trace->at_start() == Trace::FALSE_VALUE) {
    3173           0 :             assembler->JumpOrBacktrack(trace->backtrack());
    3174           0 :             return;
    3175             :         }
    3176          21 :         if (trace->at_start() == Trace::UNKNOWN) {
    3177          21 :             assembler->CheckNotAtStart(trace->backtrack());
    3178          21 :             Trace at_start_trace = *trace;
    3179          21 :             at_start_trace.set_at_start(true);
    3180          21 :             on_success()->Emit(compiler, &at_start_trace);
    3181          21 :             return;
    3182             :         }
    3183             :       }
    3184           0 :         break;
    3185             :       case AFTER_NEWLINE:
    3186           0 :         EmitHat(compiler, on_success(), trace);
    3187           0 :         return;
    3188             :       case AT_BOUNDARY:
    3189             :       case AT_NON_BOUNDARY: {
    3190           2 :         EmitBoundaryCheck(compiler, trace);
    3191           2 :         return;
    3192             :       }
    3193             :       case NOT_AFTER_LEAD_SURROGATE:
    3194           0 :         EmitNotAfterLeadSurrogate(compiler, on_success(), trace);
    3195           0 :         return;
    3196             :       case NOT_IN_SURROGATE_PAIR:
    3197           0 :         EmitNotInSurrogatePair(compiler, on_success(), trace);
    3198           0 :         return;
    3199             :     }
    3200          31 :     on_success()->Emit(compiler, trace);
    3201             : }
    3202             : 
    3203             : static bool
    3204        2168 : DeterminedAlready(QuickCheckDetails* quick_check, int offset)
    3205             : {
    3206        2168 :     if (quick_check == nullptr)
    3207           0 :         return false;
    3208        2168 :     if (offset >= quick_check->characters())
    3209        1343 :         return false;
    3210         825 :     return quick_check->positions(offset)->determines_perfectly;
    3211             : }
    3212             : 
    3213             : static void
    3214         496 : UpdateBoundsCheck(int index, int* checked_up_to)
    3215             : {
    3216         496 :     if (index > *checked_up_to)
    3217         145 :         *checked_up_to = index;
    3218         496 : }
    3219             : 
    3220             : static void
    3221           1 : EmitBoundaryTest(RegExpMacroAssembler* masm,
    3222             :                  int border,
    3223             :                  jit::Label* fall_through,
    3224             :                  jit::Label* above_or_equal,
    3225             :                  jit::Label* below)
    3226             : {
    3227           1 :     if (below != fall_through) {
    3228           0 :         masm->CheckCharacterLT(border, below);
    3229           0 :         if (above_or_equal != fall_through)
    3230           0 :             masm->JumpOrBacktrack(above_or_equal);
    3231             :     } else {
    3232           1 :         masm->CheckCharacterGT(border - 1, above_or_equal);
    3233             :     }
    3234           1 : }
    3235             : 
    3236             : static void
    3237         106 : EmitDoubleBoundaryTest(RegExpMacroAssembler* masm,
    3238             :                        int first,
    3239             :                        int last,
    3240             :                        jit::Label* fall_through,
    3241             :                        jit::Label* in_range,
    3242             :                        jit::Label* out_of_range)
    3243             : {
    3244         106 :     if (in_range == fall_through) {
    3245          44 :         if (first == last)
    3246          13 :             masm->CheckNotCharacter(first, out_of_range);
    3247             :         else
    3248          31 :             masm->CheckCharacterNotInRange(first, last, out_of_range);
    3249             :     } else {
    3250          62 :         if (first == last)
    3251          48 :             masm->CheckCharacter(first, in_range);
    3252             :         else
    3253          14 :             masm->CheckCharacterInRange(first, last, in_range);
    3254          62 :         if (out_of_range != fall_through)
    3255           0 :             masm->JumpOrBacktrack(out_of_range);
    3256             :     }
    3257         106 : }
    3258             : 
    3259             : typedef InfallibleVector<int, 4> RangeBoundaryVector;
    3260             : 
    3261             : // even_label is for ranges[i] to ranges[i + 1] where i - start_index is even.
    3262             : // odd_label is for ranges[i] to ranges[i + 1] where i - start_index is odd.
    3263             : static void
    3264           6 : EmitUseLookupTable(RegExpMacroAssembler* masm,
    3265             :                    RangeBoundaryVector& ranges,
    3266             :                    int start_index,
    3267             :                    int end_index,
    3268             :                    int min_char,
    3269             :                    jit::Label* fall_through,
    3270             :                    jit::Label* even_label,
    3271             :                    jit::Label* odd_label)
    3272             : {
    3273             :     static const int kSize = RegExpMacroAssembler::kTableSize;
    3274             :     static const int kMask = RegExpMacroAssembler::kTableMask;
    3275             : 
    3276          12 :     DebugOnly<int> base = (min_char & ~kMask);
    3277             : 
    3278             :     // Assert that everything is on one kTableSize page.
    3279          68 :     for (int i = start_index; i <= end_index; i++)
    3280          62 :         MOZ_ASSERT((ranges[i] & ~kMask) == base);
    3281           6 :     MOZ_ASSERT(start_index == 0 || (ranges[start_index - 1] & ~kMask) <= base);
    3282             : 
    3283             :     char templ[kSize];
    3284             :     jit::Label* on_bit_set;
    3285             :     jit::Label* on_bit_clear;
    3286             :     int bit;
    3287           6 :     if (even_label == fall_through) {
    3288           0 :         on_bit_set = odd_label;
    3289           0 :         on_bit_clear = even_label;
    3290           0 :         bit = 1;
    3291             :     } else {
    3292           6 :         on_bit_set = even_label;
    3293           6 :         on_bit_clear = odd_label;
    3294           6 :         bit = 0;
    3295             :     }
    3296         305 :     for (int i = 0; i < (ranges[start_index] & kMask) && i < kSize; i++)
    3297         299 :         templ[i] = bit;
    3298           6 :     int j = 0;
    3299           6 :     bit ^= 1;
    3300          62 :     for (int i = start_index; i < end_index; i++) {
    3301         390 :         for (j = (ranges[i] & kMask); j < (ranges[i + 1] & kMask); j++) {
    3302         334 :             templ[j] = bit;
    3303             :         }
    3304          56 :         bit ^= 1;
    3305             :     }
    3306         141 :     for (int i = j; i < kSize; i++) {
    3307         135 :         templ[i] = bit;
    3308             :     }
    3309             : 
    3310             :     // TODO(erikcorry): Cache these.
    3311          12 :     RegExpShared::JitCodeTable ba;
    3312             :     {
    3313          12 :         AutoEnterOOMUnsafeRegion oomUnsafe;
    3314           6 :         ba.reset(static_cast<uint8_t*>(js_malloc(kSize)));
    3315           6 :         if (!ba)
    3316           0 :             oomUnsafe.crash("Table malloc");
    3317             :     }
    3318             : 
    3319         774 :     for (int i = 0; i < kSize; i++)
    3320         768 :         ba[i] = templ[i];
    3321             : 
    3322           6 :     masm->CheckBitInTable(Move(ba), on_bit_set);
    3323           6 :     if (on_bit_clear != fall_through)
    3324           6 :         masm->JumpOrBacktrack(on_bit_clear);
    3325           6 : }
    3326             : 
    3327             : static void
    3328          44 : CutOutRange(RegExpMacroAssembler* masm,
    3329             :             RangeBoundaryVector& ranges,
    3330             :             int start_index,
    3331             :             int end_index,
    3332             :             int cut_index,
    3333             :             jit::Label* even_label,
    3334             :             jit::Label* odd_label)
    3335             : {
    3336          44 :     bool odd = (((cut_index - start_index) & 1) == 1);
    3337          44 :     jit::Label* in_range_label = odd ? odd_label : even_label;
    3338          88 :     jit::Label dummy;
    3339          88 :     EmitDoubleBoundaryTest(masm,
    3340          44 :                            ranges[cut_index],
    3341          44 :                            ranges[cut_index + 1] - 1,
    3342             :                            &dummy,
    3343             :                            in_range_label,
    3344          44 :                            &dummy);
    3345          44 :     MOZ_ASSERT(!dummy.used());
    3346             :     // Cut out the single range by rewriting the array.  This creates a new
    3347             :     // range that is a merger of the two ranges on either side of the one we
    3348             :     // are cutting out.  The oddity of the labels is preserved.
    3349          67 :     for (int j = cut_index; j > start_index; j--)
    3350          23 :         ranges[j] = ranges[j - 1];
    3351         134 :     for (int j = cut_index + 1; j < end_index; j++)
    3352          90 :         ranges[j] = ranges[j + 1];
    3353          44 : }
    3354             : 
    3355             : // Unicode case.  Split the search space into kSize spaces that are handled
    3356             : // with recursion.
    3357             : static void
    3358           6 : SplitSearchSpace(RangeBoundaryVector& ranges,
    3359             :                  int start_index,
    3360             :                  int end_index,
    3361             :                  int* new_start_index,
    3362             :                  int* new_end_index,
    3363             :                  int* border)
    3364             : {
    3365             :     static const int kSize = RegExpMacroAssembler::kTableSize;
    3366             :     static const int kMask = RegExpMacroAssembler::kTableMask;
    3367             : 
    3368           6 :     int first = ranges[start_index];
    3369           6 :     int last = ranges[end_index] - 1;
    3370             : 
    3371           6 :     *new_start_index = start_index;
    3372           6 :     *border = (ranges[start_index] & ~kMask) + kSize;
    3373         130 :     while (*new_start_index < end_index) {
    3374          62 :         if (ranges[*new_start_index] > *border)
    3375           0 :             break;
    3376          62 :         (*new_start_index)++;
    3377             :     }
    3378             :     // new_start_index is the index of the first edge that is beyond the
    3379             :     // current kSize space.
    3380             : 
    3381             :     // For very large search spaces we do a binary chop search of the non-ASCII
    3382             :     // space instead of just going to the end of the current kSize space.  The
    3383             :     // heuristics are complicated a little by the fact that any 128-character
    3384             :     // encoding space can be quickly tested with a table lookup, so we don't
    3385             :     // wish to do binary chop search at a smaller granularity than that.  A
    3386             :     // 128-character space can take up a lot of space in the ranges array if,
    3387             :     // for example, we only want to match every second character (eg. the lower
    3388             :     // case characters on some Unicode pages).
    3389           6 :     int binary_chop_index = (end_index + start_index) / 2;
    3390             :     // The first test ensures that we get to the code that handles the ASCII
    3391             :     // range with a single not-taken branch, speeding up this important
    3392             :     // character range (even non-ASCII charset-based text has spaces and
    3393             :     // punctuation).
    3394          12 :     if (*border - 1 > kMaxOneByteCharCode &&  // ASCII case.
    3395           0 :         end_index - start_index > (*new_start_index - start_index) * 2 &&
    3396           0 :         last - first > kSize * 2 &&
    3397           6 :         binary_chop_index > *new_start_index &&
    3398           0 :         ranges[binary_chop_index] >= first + 2 * kSize)
    3399             :     {
    3400           0 :         int scan_forward_for_section_border = binary_chop_index;;
    3401           0 :         int new_border = (ranges[binary_chop_index] | kMask) + 1;
    3402             : 
    3403           0 :         while (scan_forward_for_section_border < end_index) {
    3404           0 :             if (ranges[scan_forward_for_section_border] > new_border) {
    3405           0 :                 *new_start_index = scan_forward_for_section_border;
    3406           0 :                 *border = new_border;
    3407           0 :                 break;
    3408             :             }
    3409           0 :             scan_forward_for_section_border++;
    3410             :         }
    3411             :     }
    3412             : 
    3413           6 :     MOZ_ASSERT(*new_start_index > start_index);
    3414           6 :     *new_end_index = *new_start_index - 1;
    3415           6 :     if (ranges[*new_end_index] == *border)
    3416           0 :         (*new_end_index)--;
    3417           6 :     if (*border >= ranges[end_index]) {
    3418           6 :         *border = ranges[end_index];
    3419           6 :         *new_start_index = end_index;  // Won't be used.
    3420           6 :         *new_end_index = end_index - 1;
    3421             :     }
    3422           6 : }
    3423             : 
    3424             : // Gets a series of segment boundaries representing a character class.  If the
    3425             : // character is in the range between an even and an odd boundary (counting from
    3426             : // start_index) then go to even_label, otherwise go to odd_label.  We already
    3427             : // know that the character is in the range of min_char to max_char inclusive.
    3428             : // Either label can be nullptr indicating backtracking.  Either label can also be
    3429             : // equal to the fall_through label.
    3430             : static void
    3431         119 : GenerateBranches(RegExpMacroAssembler* masm,
    3432             :                  RangeBoundaryVector& ranges,
    3433             :                  int start_index,
    3434             :                  int end_index,
    3435             :                  char16_t min_char,
    3436             :                  char16_t max_char,
    3437             :                  jit::Label* fall_through,
    3438             :                  jit::Label* even_label,
    3439             :                  jit::Label* odd_label)
    3440             : {
    3441         119 :     int first = ranges[start_index];
    3442         119 :     int last = ranges[end_index] - 1;
    3443             : 
    3444         119 :     MOZ_ASSERT(min_char < first);
    3445             : 
    3446             :     // Just need to test if the character is before or on-or-after
    3447             :     // a particular character.
    3448         119 :     if (start_index == end_index) {
    3449           1 :         EmitBoundaryTest(masm, first, fall_through, even_label, odd_label);
    3450           1 :         return;
    3451             :     }
    3452             : 
    3453             :     // Another almost trivial case:  There is one interval in the middle that is
    3454             :     // different from the end intervals.
    3455         118 :     if (start_index + 1 == end_index) {
    3456          62 :         EmitDoubleBoundaryTest(masm, first, last, fall_through, even_label, odd_label);
    3457          62 :         return;
    3458             :     }
    3459             : 
    3460             :     // It's not worth using table lookup if there are very few intervals in the
    3461             :     // character class.
    3462          56 :     if (end_index - start_index <= 6) {
    3463             :         // It is faster to test for individual characters, so we look for those
    3464             :         // first, then try arbitrary ranges in the second round.
    3465             :         static int kNoCutIndex = -1;
    3466          44 :         int cut = kNoCutIndex;
    3467         109 :         for (int i = start_index; i < end_index; i++) {
    3468          98 :             if (ranges[i] == ranges[i + 1] - 1) {
    3469          33 :                 cut = i;
    3470          33 :                 break;
    3471             :             }
    3472             :         }
    3473          44 :         if (cut == kNoCutIndex) cut = start_index;
    3474          44 :         CutOutRange(masm, ranges, start_index, end_index, cut, even_label, odd_label);
    3475          44 :         MOZ_ASSERT(end_index - start_index >= 2);
    3476          44 :         GenerateBranches(masm,
    3477             :                          ranges,
    3478             :                          start_index + 1,
    3479             :                          end_index - 1,
    3480             :                          min_char,
    3481             :                          max_char,
    3482             :                          fall_through,
    3483             :                          even_label,
    3484          44 :                          odd_label);
    3485          44 :         return;
    3486             :     }
    3487             : 
    3488             :     // If there are a lot of intervals in the regexp, then we will use tables to
    3489             :     // determine whether the character is inside or outside the character class.
    3490             :     static const int kBits = RegExpMacroAssembler::kTableSizeBits;
    3491             : 
    3492          12 :     if ((max_char >> kBits) == (min_char >> kBits)) {
    3493           6 :         EmitUseLookupTable(masm,
    3494             :                            ranges,
    3495             :                            start_index,
    3496             :                            end_index,
    3497             :                            min_char,
    3498             :                            fall_through,
    3499             :                            even_label,
    3500           6 :                            odd_label);
    3501           6 :         return;
    3502             :     }
    3503             : 
    3504           6 :     if ((min_char >> kBits) != (first >> kBits)) {
    3505           0 :         masm->CheckCharacterLT(first, odd_label);
    3506           0 :         GenerateBranches(masm,
    3507             :                          ranges,
    3508             :                          start_index + 1,
    3509             :                          end_index,
    3510             :                          first,
    3511             :                          max_char,
    3512             :                          fall_through,
    3513             :                          odd_label,
    3514           0 :                          even_label);
    3515           0 :         return;
    3516             :     }
    3517             : 
    3518           6 :     int new_start_index = 0;
    3519           6 :     int new_end_index = 0;
    3520           6 :     int border = 0;
    3521             : 
    3522             :     SplitSearchSpace(ranges,
    3523             :                      start_index,
    3524             :                      end_index,
    3525             :                      &new_start_index,
    3526             :                      &new_end_index,
    3527           6 :                      &border);
    3528             : 
    3529          12 :     jit::Label handle_rest;
    3530           6 :     jit::Label* above = &handle_rest;
    3531           6 :     if (border == last + 1) {
    3532             :         // We didn't find any section that started after the limit, so everything
    3533             :         // above the border is one of the terminal labels.
    3534           6 :         above = (end_index & 1) != (start_index & 1) ? odd_label : even_label;
    3535           6 :         MOZ_ASSERT(new_end_index == end_index - 1);
    3536             :     }
    3537             : 
    3538           6 :     MOZ_ASSERT(start_index <= new_end_index);
    3539           6 :     MOZ_ASSERT(new_start_index <= end_index);
    3540           6 :     MOZ_ASSERT(start_index < new_start_index);
    3541           6 :     MOZ_ASSERT(new_end_index < end_index);
    3542           6 :     MOZ_ASSERT(new_end_index + 1 == new_start_index ||
    3543             :                (new_end_index + 2 == new_start_index &&
    3544             :                 border == ranges[new_end_index + 1]));
    3545           6 :     MOZ_ASSERT(min_char < border - 1);
    3546           6 :     MOZ_ASSERT(border < max_char);
    3547           6 :     MOZ_ASSERT(ranges[new_end_index] < border);
    3548           6 :     MOZ_ASSERT(border < ranges[new_start_index] ||
    3549             :                (border == ranges[new_start_index] &&
    3550             :                 new_start_index == end_index &&
    3551             :                 new_end_index == end_index - 1 &&
    3552             :                 border == last + 1));
    3553           6 :     MOZ_ASSERT(new_start_index == 0 || border >= ranges[new_start_index - 1]);
    3554             : 
    3555           6 :     masm->CheckCharacterGT(border - 1, above);
    3556          12 :     jit::Label dummy;
    3557           6 :     GenerateBranches(masm,
    3558             :                      ranges,
    3559             :                      start_index,
    3560             :                      new_end_index,
    3561             :                      min_char,
    3562           6 :                      border - 1,
    3563             :                      &dummy,
    3564             :                      even_label,
    3565           6 :                      odd_label);
    3566           6 :     if (handle_rest.used()) {
    3567           0 :         masm->Bind(&handle_rest);
    3568           0 :         bool flip = (new_start_index & 1) != (start_index & 1);
    3569           0 :         GenerateBranches(masm,
    3570             :                          ranges,
    3571             :                          new_start_index,
    3572             :                          end_index,
    3573             :                          border,
    3574             :                          max_char,
    3575             :                          &dummy,
    3576             :                          flip ? odd_label : even_label,
    3577           0 :                          flip ? even_label : odd_label);
    3578             :     }
    3579             : }
    3580             : 
    3581             : static void
    3582         100 : EmitCharClass(LifoAlloc* alloc,
    3583             :               RegExpMacroAssembler* macro_assembler,
    3584             :               RegExpCharacterClass* cc,
    3585             :               bool latin1,
    3586             :               jit::Label* on_failure,
    3587             :               int cp_offset,
    3588             :               bool check_offset,
    3589             :               bool preloaded)
    3590             : {
    3591         100 :     CharacterRangeVector& ranges = cc->ranges(alloc);
    3592         100 :     if (!CharacterRange::IsCanonical(ranges)) {
    3593           6 :         CharacterRange::Canonicalize(ranges);
    3594             :     }
    3595             : 
    3596         100 :     int max_char = MaximumCharacter(latin1);
    3597         100 :     int range_count = ranges.length();
    3598             : 
    3599         100 :     int last_valid_range = range_count - 1;
    3600         196 :     while (last_valid_range >= 0) {
    3601         148 :         CharacterRange& range = ranges[last_valid_range];
    3602         148 :         if (range.from() <= max_char) {
    3603         100 :             break;
    3604             :         }
    3605          48 :         last_valid_range--;
    3606             :     }
    3607             : 
    3608         100 :     if (last_valid_range < 0) {
    3609           0 :         if (!cc->is_negated()) {
    3610           0 :             macro_assembler->JumpOrBacktrack(on_failure);
    3611             :         }
    3612           0 :         if (check_offset) {
    3613           0 :             macro_assembler->CheckPosition(cp_offset, on_failure);
    3614             :         }
    3615          31 :         return;
    3616             :     }
    3617             : 
    3618         150 :     if (last_valid_range == 0 &&
    3619          50 :         ranges[0].IsEverything(max_char)) {
    3620          18 :         if (cc->is_negated()) {
    3621           0 :             macro_assembler->JumpOrBacktrack(on_failure);
    3622             :         } else {
    3623             :             // This is a common case hit by non-anchored expressions.
    3624          18 :             if (check_offset) {
    3625           3 :                 macro_assembler->CheckPosition(cp_offset, on_failure);
    3626             :             }
    3627             :         }
    3628          18 :         return;
    3629             :     }
    3630         114 :     if (last_valid_range == 0 &&
    3631         101 :         !cc->is_negated() &&
    3632          19 :         ranges[0].IsEverything(max_char)) {
    3633             :         // This is a common case hit by non-anchored expressions.
    3634           0 :         if (check_offset) {
    3635           0 :             macro_assembler->CheckPosition(cp_offset, on_failure);
    3636             :         }
    3637           0 :         return;
    3638             :     }
    3639             : 
    3640          82 :     if (!preloaded) {
    3641          74 :         macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
    3642             :     }
    3643             : 
    3644          98 :     if (cc->is_standard(alloc) &&
    3645          16 :         macro_assembler->CheckSpecialCharacterClass(cc->standard_type(),
    3646          16 :                                                     on_failure)) {
    3647          13 :         return;
    3648             :     }
    3649             : 
    3650             :     // A new list with ascending entries.  Each entry is a code unit
    3651             :     // where there is a boundary between code units that are part of
    3652             :     // the class and code units that are not.  Normally we insert an
    3653             :     // entry at zero which goes to the failure label, but if there
    3654             :     // was already one there we fall through for success on that entry.
    3655             :     // Subsequent entries have alternating meaning (success/failure).
    3656             :     RangeBoundaryVector* range_boundaries =
    3657          69 :         alloc->newInfallible<RangeBoundaryVector>(*alloc);
    3658             : 
    3659          69 :     bool zeroth_entry_is_failure = !cc->is_negated();
    3660             : 
    3661          69 :     range_boundaries->reserve(last_valid_range);
    3662         213 :     for (int i = 0; i <= last_valid_range; i++) {
    3663         144 :         CharacterRange& range = ranges[i];
    3664         144 :         if (range.from() == 0) {
    3665           4 :             MOZ_ASSERT(i == 0);
    3666           4 :             zeroth_entry_is_failure = !zeroth_entry_is_failure;
    3667             :         } else {
    3668         140 :             range_boundaries->append(range.from());
    3669             :         }
    3670         144 :         range_boundaries->append(range.to() + 1);
    3671             :     }
    3672          69 :     int end_index = range_boundaries->length() - 1;
    3673          69 :     if ((*range_boundaries)[end_index] > max_char)
    3674           3 :         end_index--;
    3675             : 
    3676         138 :     jit::Label fall_through;
    3677          69 :     GenerateBranches(macro_assembler,
    3678             :                      *range_boundaries,
    3679             :                      0,  // start_index.
    3680             :                      end_index,
    3681             :                      0,  // min_char.
    3682             :                      max_char,
    3683             :                      &fall_through,
    3684             :                      zeroth_entry_is_failure ? &fall_through : on_failure,
    3685          69 :                      zeroth_entry_is_failure ? on_failure : &fall_through);
    3686          69 :     macro_assembler->Bind(&fall_through);
    3687             : }
    3688             : 
    3689             : typedef bool EmitCharacterFunction(RegExpCompiler* compiler,
    3690             :                                    char16_t c,
    3691             :                                    jit::Label* on_failure,
    3692             :                                    int cp_offset,
    3693             :                                    bool check,
    3694             :                                    bool preloaded);
    3695             : 
    3696             : static inline bool
    3697         303 : EmitSimpleCharacter(RegExpCompiler* compiler,
    3698             :                     char16_t c,
    3699             :                     jit::Label* on_failure,
    3700             :                     int cp_offset,
    3701             :                     bool check,
    3702             :                     bool preloaded)
    3703             : {
    3704         303 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3705         303 :     bool bound_checked = false;
    3706         303 :     if (!preloaded) {
    3707         303 :         assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
    3708         303 :         bound_checked = true;
    3709             :     }
    3710         303 :     assembler->CheckNotCharacter(c, on_failure);
    3711         303 :     return bound_checked;
    3712             : }
    3713             : 
    3714             : // Emit character for case independent match, when GetCaseIndependentLetters
    3715             : // returns single character.
    3716             : // This is used by the following 2 cases:
    3717             : //   * non-letters (things that don't have case)
    3718             : //   * letters that map across Latin1 and non-Latin1, and non-Latin1 case is
    3719             : //     filtered out because of Latin1 match
    3720             : static inline bool
    3721          93 : EmitAtomSingle(RegExpCompiler* compiler,
    3722             :                char16_t c,
    3723             :                jit::Label* on_failure,
    3724             :                int cp_offset,
    3725             :                bool check,
    3726             :                bool preloaded)
    3727             : {
    3728          93 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    3729          93 :     bool latin1 = compiler->latin1();
    3730             :     char16_t chars[kEcma262UnCanonicalizeMaxWidth];
    3731          93 :     int length = GetCaseIndependentLetters(c, latin1, compiler->unicode(), chars);
    3732          93 :     if (length != 1)
    3733          93 :         return false;
    3734             : 
    3735           0 :     bool checked = false;
    3736           0 :     if (!preloaded) {
    3737           0 :         macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
    3738           0 :         checked = check;
    3739             :     }
    3740           0 :     macro_assembler->CheckNotCharacter(chars[0], on_failure);
    3741           0 :     return checked;
    3742             : }
    3743             : 
    3744             : static bool
    3745          93 : ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
    3746             :                           bool latin1,
    3747             :                           char16_t c1,
    3748             :                           char16_t c2,
    3749             :                           jit::Label* on_failure)
    3750             : {
    3751          93 :     char16_t char_mask = MaximumCharacter(latin1);
    3752             : 
    3753          93 :     MOZ_ASSERT(c1 != c2);
    3754          93 :     if (c1 > c2) {
    3755          93 :         char16_t tmp = c1;
    3756          93 :         c1 = c2;
    3757          93 :         c2 = tmp;
    3758             :     }
    3759             : 
    3760          93 :     char16_t exor = c1 ^ c2;
    3761             :     // Check whether exor has only one bit set.
    3762          93 :     if (((exor - 1) & exor) == 0) {
    3763             :         // If c1 and c2 differ only by one bit.
    3764          93 :         char16_t mask = char_mask ^ exor;
    3765          93 :         macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
    3766          93 :         return true;
    3767             :     }
    3768             : 
    3769           0 :     char16_t diff = c2 - c1;
    3770           0 :     if (((diff - 1) & diff) == 0 && c1 >= diff) {
    3771             :         // If the characters differ by 2^n but don't differ by one bit then
    3772             :         // subtract the difference from the found character, then do the or
    3773             :         // trick.  We avoid the theoretical case where negative numbers are
    3774             :         // involved in order to simplify code generation.
    3775           0 :         char16_t mask = char_mask ^ diff;
    3776           0 :         macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
    3777             :                                                         diff,
    3778             :                                                         mask,
    3779           0 :                                                         on_failure);
    3780           0 :         return true;
    3781             :     }
    3782           0 :     return false;
    3783             : }
    3784             : 
    3785             : // Emit character for case independent match, when GetCaseIndependentLetters
    3786             : // returns multiple characters.
    3787             : static inline bool
    3788          93 : EmitAtomMulti(RegExpCompiler* compiler,
    3789             :               char16_t c,
    3790             :               jit::Label* on_failure,
    3791             :               int cp_offset,
    3792             :               bool check,
    3793             :               bool preloaded)
    3794             : {
    3795          93 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    3796          93 :     bool latin1 = compiler->latin1();
    3797             :     char16_t chars[kEcma262UnCanonicalizeMaxWidth];
    3798          93 :     int length = GetCaseIndependentLetters(c, latin1, compiler->unicode(), chars);
    3799          93 :     if (length <= 1) return false;
    3800             :     // We may not need to check against the end of the input string
    3801             :     // if this character lies before a character that matched.
    3802          93 :     if (!preloaded)
    3803          93 :         macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
    3804         186 :     jit::Label ok;
    3805             :     MOZ_ASSERT(kEcma262UnCanonicalizeMaxWidth == 4);
    3806          93 :     switch (length) {
    3807             :       case 2: {
    3808         186 :         if (ShortCutEmitCharacterPair(macro_assembler,
    3809             :                                       latin1,
    3810          93 :                                       chars[0],
    3811          93 :                                       chars[1],
    3812             :                                       on_failure)) {
    3813             :         } else {
    3814           0 :             macro_assembler->CheckCharacter(chars[0], &ok);
    3815           0 :             macro_assembler->CheckNotCharacter(chars[1], on_failure);
    3816           0 :             macro_assembler->Bind(&ok);
    3817             :         }
    3818          93 :         break;
    3819             :       }
    3820             :       case 4:
    3821           0 :         macro_assembler->CheckCharacter(chars[3], &ok);
    3822             :         MOZ_FALLTHROUGH;
    3823             :       case 3:
    3824           0 :         macro_assembler->CheckCharacter(chars[0], &ok);
    3825           0 :         macro_assembler->CheckCharacter(chars[1], &ok);
    3826           0 :         macro_assembler->CheckNotCharacter(chars[2], on_failure);
    3827           0 :         macro_assembler->Bind(&ok);
    3828           0 :         break;
    3829             :       default:
    3830           0 :         MOZ_CRASH("Bad length");
    3831             :     }
    3832          93 :     return true;
    3833             : }
    3834             : 
    3835             : // We call this repeatedly to generate code for each pass over the text node.
    3836             : // The passes are in increasing order of difficulty because we hope one
    3837             : // of the first passes will fail in which case we are saved the work of the
    3838             : // later passes.  for example for the case independent regexp /%[asdfghjkl]a/
    3839             : // we will check the '%' in the first pass, the case independent 'a' in the
    3840             : // second pass and the character class in the last pass.
    3841             : //
    3842             : // The passes are done from right to left, so for example to test for /bar/
    3843             : // we will first test for an 'r' with offset 2, then an 'a' with offset 1
    3844             : // and then a 'b' with offset 0.  This means we can avoid the end-of-input
    3845             : // bounds check most of the time.  In the example we only need to check for
    3846             : // end-of-input when loading the putative 'r'.
    3847             : //
    3848             : // A slight complication involves the fact that the first character may already
    3849             : // be fetched into a register by the previous node.  In this case we want to
    3850             : // do the test for that character first.  We do this in separate passes.  The
    3851             : // 'preloaded' argument indicates that we are doing such a 'pass'.  If such a
    3852             : // pass has been performed then subsequent passes will have true in
    3853             : // first_element_checked to indicate that that character does not need to be
    3854             : // checked again.
    3855             : //
    3856             : // In addition to all this we are passed a Trace, which can
    3857             : // contain an AlternativeGeneration object.  In this AlternativeGeneration
    3858             : // object we can see details of any quick check that was already passed in
    3859             : // order to get to the code we are now generating.  The quick check can involve
    3860             : // loading characters, which means we do not need to recheck the bounds
    3861             : // up to the limit the quick check already checked.  In addition the quick
    3862             : // check can have involved a mask and compare operation which may simplify
    3863             : // or obviate the need for further checks at some character positions.
    3864             : void
    3865         785 : TextNode::TextEmitPass(RegExpCompiler* compiler,
    3866             :                        TextEmitPassType pass,
    3867             :                        bool preloaded,
    3868             :                        Trace* trace,
    3869             :                        bool first_element_checked,
    3870             :                        int* checked_up_to)
    3871             : {
    3872         785 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    3873         785 :     bool latin1 = compiler->latin1();
    3874         785 :     jit::Label* backtrack = trace->backtrack();
    3875         785 :     QuickCheckDetails* quick_check = trace->quick_check_performed();
    3876         785 :     int element_count = elements().length();
    3877        1573 :     for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
    3878         788 :         TextElement elm = elements()[i];
    3879         788 :         int cp_offset = trace->cp_offset() + elm.cp_offset();
    3880         788 :         if (elm.text_type() == TextElement::ATOM) {
    3881         453 :             const CharacterVector& quarks = elm.atom()->data();
    3882        2545 :             for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
    3883        2092 :                 if (first_element_checked && i == 0 && j == 0) continue;
    3884        2066 :                 if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
    3885        1262 :                 EmitCharacterFunction* emit_function = nullptr;
    3886        1262 :                 switch (pass) {
    3887             :                   case NON_LATIN1_MATCH:
    3888         377 :                     MOZ_ASSERT(latin1);
    3889         377 :                     if (!IsLatin1Equivalent(quarks[j], compiler)) {
    3890           0 :                         assembler->JumpOrBacktrack(backtrack);
    3891           0 :                         return;
    3892             :                     }
    3893         377 :                     break;
    3894             :                   case CASE_SINGLE_CHARACTER_MATCH:
    3895          93 :                     emit_function = &EmitAtomSingle;
    3896          93 :                     break;
    3897             :                   case SIMPLE_CHARACTER_MATCH:
    3898         303 :                     emit_function = &EmitSimpleCharacter;
    3899         303 :                     break;
    3900             :                   case CASE_MUTLI_CHARACTER_MATCH:
    3901          93 :                     emit_function = &EmitAtomMulti;
    3902          93 :                     break;
    3903             :                   default:
    3904         396 :                     break;
    3905             :                 }
    3906        1262 :                 if (emit_function != nullptr) {
    3907        1467 :                     bool bound_checked = emit_function(compiler,
    3908         489 :                                                        quarks[j],
    3909             :                                                        backtrack,
    3910             :                                                        cp_offset + j,
    3911         489 :                                                        *checked_up_to < cp_offset + j,
    3912         489 :                                                        preloaded);
    3913         489 :                     if (bound_checked) UpdateBoundsCheck(cp_offset + j, checked_up_to);
    3914             :                 }
    3915             :             }
    3916             :         } else {
    3917         335 :             MOZ_ASSERT(TextElement::CHAR_CLASS == elm.text_type());
    3918         335 :             if (pass == CHARACTER_CLASS_MATCH) {
    3919         124 :                 if (first_element_checked && i == 0) continue;
    3920         102 :                 if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
    3921         100 :                 RegExpCharacterClass* cc = elm.char_class();
    3922         200 :                 EmitCharClass(alloc(),
    3923             :                               assembler,
    3924             :                               cc,
    3925             :                               latin1,
    3926             :                               backtrack,
    3927             :                               cp_offset,
    3928         100 :                               *checked_up_to < cp_offset,
    3929         100 :                               preloaded);
    3930         100 :                 UpdateBoundsCheck(cp_offset, checked_up_to);
    3931             :             }
    3932             :         }
    3933             :     }
    3934             : }
    3935             : 
    3936             : int
    3937         647 : TextNode::Length()
    3938             : {
    3939         647 :     TextElement elm = elements()[elements().length() - 1];
    3940         647 :     MOZ_ASSERT(elm.cp_offset() >= 0);
    3941         647 :     return elm.cp_offset() + elm.length();
    3942             : }
    3943             : 
    3944             : bool
    3945        1016 : TextNode::SkipPass(int int_pass, bool ignore_case)
    3946             : {
    3947        1016 :     TextEmitPassType pass = static_cast<TextEmitPassType>(int_pass);
    3948        1016 :     if (ignore_case)
    3949         300 :         return pass == SIMPLE_CHARACTER_MATCH;
    3950         716 :     return pass == CASE_SINGLE_CHARACTER_MATCH || pass == CASE_MUTLI_CHARACTER_MATCH;
    3951             : }
    3952             : 
    3953             : // This generates the code to match a text node.  A text node can contain
    3954             : // straight character sequences (possibly to be matched in a case-independent
    3955             : // way) and character classes.  For efficiency we do not do this in a single
    3956             : // pass from left to right.  Instead we pass over the text node several times,
    3957             : // emitting code for some character positions every time.  See the comment on
    3958             : // TextEmitPass for details.
    3959             : void
    3960         233 : TextNode::Emit(RegExpCompiler* compiler, Trace* trace)
    3961             : {
    3962         233 :     LimitResult limit_result = LimitVersions(compiler, trace);
    3963         234 :     if (limit_result == DONE) return;
    3964         232 :     MOZ_ASSERT(limit_result == CONTINUE);
    3965             : 
    3966         232 :     if (trace->cp_offset() + Length() > RegExpMacroAssembler::kMaxCPOffset) {
    3967           0 :         compiler->SetRegExpTooBig();
    3968           0 :         return;
    3969             :     }
    3970             : 
    3971         232 :     if (compiler->latin1()) {
    3972         202 :         int dummy = 0;
    3973         202 :         TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
    3974             :     }
    3975             : 
    3976         232 :     bool first_elt_done = false;
    3977         232 :     int bound_checked_to = trace->cp_offset() - 1;
    3978         232 :     bound_checked_to += trace->bound_checked_up_to();
    3979             : 
    3980             :     // If a character is preloaded into the current character register then
    3981             :     // check that now.
    3982         232 :     if (trace->characters_preloaded() == 1) {
    3983         110 :         for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
    3984          88 :             if (!SkipPass(pass, compiler->ignore_case())) {
    3985          48 :                 TextEmitPass(compiler,
    3986             :                              static_cast<TextEmitPassType>(pass),
    3987             :                              true,
    3988             :                              trace,
    3989             :                              false,
    3990          48 :                              &bound_checked_to);
    3991             :             }
    3992             :         }
    3993          22 :         first_elt_done = true;
    3994             :     }
    3995             : 
    3996        1160 :     for (int pass = kFirstRealPass; pass <= kLastPass; pass++) {
    3997         928 :         if (!SkipPass(pass, compiler->ignore_case())) {
    3998         535 :             TextEmitPass(compiler,
    3999             :                          static_cast<TextEmitPassType>(pass),
    4000             :                          false,
    4001             :                          trace,
    4002             :                          first_elt_done,
    4003         535 :                          &bound_checked_to);
    4004             :         }
    4005             :     }
    4006             : 
    4007         232 :     Trace successor_trace(*trace);
    4008         232 :     successor_trace.set_at_start(false);
    4009         232 :     successor_trace.AdvanceCurrentPositionInTrace(Length(), compiler);
    4010         464 :     RecursionCheck rc(compiler);
    4011         232 :     on_success()->Emit(compiler, &successor_trace);
    4012             : }
    4013             : 
    4014             : void
    4015         194 : LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
    4016             : {
    4017         194 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4018         194 :     if (trace->stop_node() == this) {
    4019             :         int text_length =
    4020          26 :             GreedyLoopTextLengthForAlternative(&alternatives()[0]);
    4021          26 :         MOZ_ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
    4022             :         // Update the counter-based backtracking info on the stack.  This is an
    4023             :         // optimization for greedy loops (see below).
    4024          26 :         MOZ_ASSERT(trace->cp_offset() == text_length);
    4025          26 :         macro_assembler->AdvanceCurrentPosition(text_length);
    4026          26 :         macro_assembler->JumpOrBacktrack(trace->loop_label());
    4027          26 :         return;
    4028             :     }
    4029         168 :     MOZ_ASSERT(trace->stop_node() == nullptr);
    4030         168 :     if (!trace->is_trivial()) {
    4031          75 :         trace->Flush(compiler, this);
    4032          75 :         return;
    4033             :     }
    4034          93 :     ChoiceNode::Emit(compiler, trace);
    4035             : }
    4036             : 
    4037             : /* Code generation for choice nodes.
    4038             :  *
    4039             :  * We generate quick checks that do a mask and compare to eliminate a
    4040             :  * choice.  If the quick check succeeds then it jumps to the continuation to
    4041             :  * do slow checks and check subsequent nodes.  If it fails (the common case)
    4042             :  * it falls through to the next choice.
    4043             :  *
    4044             :  * Here is the desired flow graph.  Nodes directly below each other imply
    4045             :  * fallthrough.  Alternatives 1 and 2 have quick checks.  Alternative
    4046             :  * 3 doesn't have a quick check so we have to call the slow check.
    4047             :  * Nodes are marked Qn for quick checks and Sn for slow checks.  The entire
    4048             :  * regexp continuation is generated directly after the Sn node, up to the
    4049             :  * next JumpOrBacktrack if we decide to reuse some already generated code.  Some
    4050             :  * nodes expect preload_characters to be preloaded into the current
    4051             :  * character register.  R nodes do this preloading.  Vertices are marked
    4052             :  * F for failures and S for success (possible success in the case of quick
    4053             :  * nodes).  L, V, < and > are used as arrow heads.
    4054             :  *
    4055             :  * ----------> R
    4056             :  *             |
    4057             :  *             V
    4058             :  *            Q1 -----> S1
    4059             :  *             |   S   /
    4060             :  *            F|      /
    4061             :  *             |    F/
    4062             :  *             |    /
    4063             :  *             |   R
    4064             :  *             |  /
    4065             :  *             V L
    4066             :  *            Q2 -----> S2
    4067             :  *             |   S   /
    4068             :  *            F|      /
    4069             :  *             |    F/
    4070             :  *             |    /
    4071             :  *             |   R
    4072             :  *             |  /
    4073             :  *             V L
    4074             :  *            S3
    4075             :  *             |
    4076             :  *            F|
    4077             :  *             |
    4078             :  *             R
    4079             :  *             |
    4080             :  * backtrack   V
    4081             :  * <----------Q4
    4082             :  *   \    F    |
    4083             :  *    \        |S
    4084             :  *     \   F   V
    4085             :  *      \-----S4
    4086             :  *
    4087             :  * For greedy loops we reverse our expectation and expect to match rather
    4088             :  * than fail. Therefore we want the loop code to look like this (U is the
    4089             :  * unwind code that steps back in the greedy loop).  The following alternatives
    4090             :  * look the same as above.
    4091             :  *              _____
    4092             :  *             /     \
    4093             :  *             V     |
    4094             :  * ----------> S1    |
    4095             :  *            /|     |
    4096             :  *           / |S    |
    4097             :  *         F/  \_____/
    4098             :  *         /
    4099             :  *        |<-----------
    4100             :  *        |            \
    4101             :  *        V             \
    4102             :  *        Q2 ---> S2     \
    4103             :  *        |  S   /       |
    4104             :  *       F|     /        |
    4105             :  *        |   F/         |
    4106             :  *        |   /          |
    4107             :  *        |  R           |
    4108             :  *        | /            |
    4109             :  *   F    VL             |
    4110             :  * <------U              |
    4111             :  * back   |S             |
    4112             :  *        \______________/
    4113             :  */
    4114             : 
    4115             : // This class is used when generating the alternatives in a choice node.  It
    4116             : // records the way the alternative is being code generated.
    4117         942 : class irregexp::AlternativeGeneration
    4118             : {
    4119             :   public:
    4120         942 :     AlternativeGeneration()
    4121         942 :       : possible_success(),
    4122             :         expects_preload(false),
    4123             :         after(),
    4124         942 :         quick_check_details()
    4125         942 :     {}
    4126             : 
    4127             :     jit::Label possible_success;
    4128             :     bool expects_preload;
    4129             :     jit::Label after;
    4130             :     QuickCheckDetails quick_check_details;
    4131             : };
    4132             : 
    4133             : void
    4134          16 : ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
    4135             :                           Guard* guard, Trace* trace)
    4136             : {
    4137          16 :     switch (guard->op()) {
    4138             :       case Guard::LT:
    4139          11 :         MOZ_ASSERT(!trace->mentions_reg(guard->reg()));
    4140          11 :         macro_assembler->IfRegisterGE(guard->reg(),
    4141             :                                       guard->value(),
    4142          22 :                                       trace->backtrack());
    4143          11 :         break;
    4144             :       case Guard::GEQ:
    4145           5 :         MOZ_ASSERT(!trace->mentions_reg(guard->reg()));
    4146           5 :         macro_assembler->IfRegisterLT(guard->reg(),
    4147             :                                       guard->value(),
    4148          10 :                                       trace->backtrack());
    4149           5 :         break;
    4150             :     }
    4151          16 : }
    4152             : 
    4153             : int
    4154          93 : ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least)
    4155             : {
    4156          93 :     int preload_characters = Min(4, eats_at_least);
    4157          93 :     if (compiler->macro_assembler()->CanReadUnaligned()) {
    4158          93 :         bool latin1 = compiler->latin1();
    4159          93 :         if (latin1) {
    4160          81 :             if (preload_characters > 4)
    4161           0 :                 preload_characters = 4;
    4162             :             // We can't preload 3 characters because there is no machine instruction
    4163             :             // to do that.  We can't just load 4 because we could be reading
    4164             :             // beyond the end of the string, which could cause a memory fault.
    4165          81 :             if (preload_characters == 3)
    4166           1 :                 preload_characters = 2;
    4167             :         } else {
    4168          12 :             if (preload_characters > 2)
    4169           2 :                 preload_characters = 2;
    4170             :         }
    4171             :     } else {
    4172           0 :         if (preload_characters > 1)
    4173           0 :             preload_characters = 1;
    4174             :     }
    4175          93 :     return preload_characters;
    4176             : }
    4177             : 
    4178             : RegExpNode*
    4179          34 : TextNode::GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler)
    4180             : {
    4181          34 :     if (elements().length() != 1)
    4182           1 :         return nullptr;
    4183             : 
    4184          33 :     TextElement elm = elements()[0];
    4185          33 :     if (elm.text_type() != TextElement::CHAR_CLASS)
    4186          13 :         return nullptr;
    4187             : 
    4188          20 :     RegExpCharacterClass* node = elm.char_class();
    4189          20 :     CharacterRangeVector& ranges = node->ranges(alloc());
    4190             : 
    4191          20 :     if (!CharacterRange::IsCanonical(ranges))
    4192           0 :         CharacterRange::Canonicalize(ranges);
    4193             : 
    4194          20 :     if (node->is_negated())
    4195           0 :         return ranges.length() == 0 ? on_success() : nullptr;
    4196             : 
    4197          20 :     if (ranges.length() != 1)
    4198           2 :         return nullptr;
    4199             : 
    4200          18 :     uint32_t max_char = MaximumCharacter(compiler->latin1());
    4201          18 :     return ranges[0].IsEverything(max_char) ? on_success() : nullptr;
    4202             : }
    4203             : 
    4204             : // Finds the fixed match length of a sequence of nodes that goes from
    4205             : // this alternative and back to this choice node.  If there are variable
    4206             : // length nodes or other complications in the way then return a sentinel
    4207             : // value indicating that a greedy loop cannot be constructed.
    4208             : int
    4209         119 : ChoiceNode::GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative)
    4210             : {
    4211         119 :     int length = 0;
    4212         119 :     RegExpNode* node = alternative->node();
    4213             :     // Later we will generate code for all these text nodes using recursion
    4214             :     // so we have to limit the max number.
    4215         119 :     int recursion_depth = 0;
    4216         293 :     while (node != this) {
    4217         154 :         if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
    4218           0 :             return kNodeIsTooComplexForGreedyLoops;
    4219             :         }
    4220         154 :         int node_length = node->GreedyLoopTextLength();
    4221         154 :         if (node_length == kNodeIsTooComplexForGreedyLoops) {
    4222          67 :             return kNodeIsTooComplexForGreedyLoops;
    4223             :         }
    4224          87 :         length += node_length;
    4225          87 :         SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
    4226          87 :         node = seq_node->on_success();
    4227             :     }
    4228          52 :     return length;
    4229             : }
    4230             : 
    4231             : // Creates a list of AlternativeGenerations.  If the list has a reasonable
    4232             : // size then it is on the stack, otherwise the excess is on the heap.
    4233             : class AlternativeGenerationList
    4234             : {
    4235             :   public:
    4236          93 :     AlternativeGenerationList(LifoAlloc* alloc, size_t count)
    4237          93 :       : alt_gens_(*alloc)
    4238             :     {
    4239          93 :         alt_gens_.reserve(count);
    4240         315 :         for (size_t i = 0; i < count && i < kAFew; i++)
    4241         222 :             alt_gens_.append(a_few_alt_gens_ + i);
    4242         105 :         for (size_t i = kAFew; i < count; i++) {
    4243          24 :             AutoEnterOOMUnsafeRegion oomUnsafe;
    4244          12 :             AlternativeGeneration* gen = js_new<AlternativeGeneration>();
    4245          12 :             if (!gen)
    4246           0 :                 oomUnsafe.crash("AlternativeGenerationList js_new");
    4247          12 :             alt_gens_.append(gen);
    4248             :         }
    4249          93 :     }
    4250             : 
    4251         186 :     ~AlternativeGenerationList() {
    4252         105 :         for (size_t i = kAFew; i < alt_gens_.length(); i++) {
    4253          12 :             js_delete(alt_gens_[i]);
    4254          12 :             alt_gens_[i] = nullptr;
    4255             :         }
    4256          93 :     }
    4257             : 
    4258         438 :     AlternativeGeneration* at(int i) {
    4259         438 :         return alt_gens_[i];
    4260             :     }
    4261             : 
    4262             :   private:
    4263             :     static const size_t kAFew = 10;
    4264             :     InfallibleVector<AlternativeGeneration*, 1> alt_gens_;
    4265             :     AlternativeGeneration a_few_alt_gens_[kAFew];
    4266             : };
    4267             : 
    4268             : void
    4269         149 : ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace)
    4270             : {
    4271         149 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4272         149 :     size_t choice_count = alternatives().length();
    4273             : #ifdef DEBUG
    4274         346 :     for (size_t i = 0; i < choice_count - 1; i++) {
    4275         197 :         const GuardedAlternative& alternative = alternatives()[i];
    4276         197 :         const GuardVector* guards = alternative.guards();
    4277         197 :         if (guards) {
    4278          48 :             for (size_t j = 0; j < guards->length(); j++)
    4279          24 :                 MOZ_ASSERT(!trace->mentions_reg((*guards)[j]->reg()));
    4280             :         }
    4281             :     }
    4282             : #endif
    4283             : 
    4284         149 :     LimitResult limit_result = LimitVersions(compiler, trace);
    4285         205 :     if (limit_result == DONE) return;
    4286          93 :     MOZ_ASSERT(limit_result == CONTINUE);
    4287             : 
    4288          93 :     int new_flush_budget = trace->flush_budget() / choice_count;
    4289          93 :     if (trace->flush_budget() == 0 && trace->actions() != nullptr) {
    4290           0 :         trace->Flush(compiler, this);
    4291           0 :         return;
    4292             :     }
    4293             : 
    4294         186 :     RecursionCheck rc(compiler);
    4295             : 
    4296          93 :     Trace* current_trace = trace;
    4297             : 
    4298          93 :     int text_length = GreedyLoopTextLengthForAlternative(&alternatives()[0]);
    4299          93 :     bool greedy_loop = false;
    4300         186 :     jit::Label greedy_loop_label;
    4301          93 :     Trace counter_backtrack_trace;
    4302          93 :     counter_backtrack_trace.set_backtrack(&greedy_loop_label);
    4303          93 :     if (not_at_start()) counter_backtrack_trace.set_at_start(false);
    4304             : 
    4305          93 :     if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
    4306             :         // Here we have special handling for greedy loops containing only text nodes
    4307             :         // and other simple nodes.  These are handled by pushing the current
    4308             :         // position on the stack and then incrementing the current position each
    4309             :         // time around the switch.  On backtrack we decrement the current position
    4310             :         // and check it against the pushed value.  This avoids pushing backtrack
    4311             :         // information for each iteration of the loop, which could take up a lot of
    4312             :         // space.
    4313          26 :         greedy_loop = true;
    4314          26 :         MOZ_ASSERT(trace->stop_node() == nullptr);
    4315          26 :         macro_assembler->PushCurrentPosition();
    4316          26 :         current_trace = &counter_backtrack_trace;
    4317          52 :         jit::Label greedy_match_failed;
    4318          26 :         Trace greedy_match_trace;
    4319          26 :         if (not_at_start()) greedy_match_trace.set_at_start(false);
    4320          26 :         greedy_match_trace.set_backtrack(&greedy_match_failed);
    4321          52 :         jit::Label loop_label;
    4322          26 :         macro_assembler->Bind(&loop_label);
    4323          26 :         greedy_match_trace.set_stop_node(this);
    4324          26 :         greedy_match_trace.set_loop_label(&loop_label);
    4325          26 :         alternatives()[0].node()->Emit(compiler, &greedy_match_trace);
    4326          26 :         macro_assembler->Bind(&greedy_match_failed);
    4327             :     }
    4328             : 
    4329         186 :     jit::Label second_choice;  // For use in greedy matches.
    4330          93 :     macro_assembler->Bind(&second_choice);
    4331             : 
    4332          93 :     size_t first_normal_choice = greedy_loop ? 1 : 0;
    4333             : 
    4334          93 :     bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;
    4335          93 :     const int kEatsAtLeastNotYetInitialized = -1;
    4336          93 :     int eats_at_least = kEatsAtLeastNotYetInitialized;
    4337             : 
    4338          93 :     bool skip_was_emitted = false;
    4339             : 
    4340          93 :     if (!greedy_loop && choice_count == 2) {
    4341          60 :         GuardedAlternative alt1 = alternatives()[1];
    4342          60 :         if (!alt1.guards() || alt1.guards()->length() == 0) {
    4343          55 :             RegExpNode* eats_anything_node = alt1.node();
    4344          55 :             if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) == this) {
    4345             :                 // At this point we know that we are at a non-greedy loop that will eat
    4346             :                 // any character one at a time.  Any non-anchored regexp has such a
    4347             :                 // loop prepended to it in order to find where it starts.  We look for
    4348             :                 // a pattern of the form ...abc... where we can look 6 characters ahead
    4349             :                 // and step forwards 3 if the character is not one of abc.  Abc need
    4350             :                 // not be atoms, they can be any reasonably limited character class or
    4351             :                 // small alternation.
    4352          18 :                 MOZ_ASSERT(trace->is_trivial());  // This is the case on LoopChoiceNodes.
    4353          18 :                 BoyerMooreLookahead* lookahead = bm_info(not_at_start);
    4354          18 :                 if (lookahead == nullptr) {
    4355          18 :                     eats_at_least = Min(kMaxLookaheadForBoyerMoore,
    4356             :                                         EatsAtLeast(kMaxLookaheadForBoyerMoore,
    4357             :                                                     kRecursionBudget,
    4358          18 :                                                     not_at_start));
    4359          18 :                     if (eats_at_least >= 1) {
    4360             :                         BoyerMooreLookahead* bm =
    4361          18 :                             alloc()->newInfallible<BoyerMooreLookahead>(alloc(), eats_at_least, compiler);
    4362          18 :                         GuardedAlternative alt0 = alternatives()[0];
    4363          18 :                         alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
    4364          18 :                         skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
    4365             :                     }
    4366             :                 } else {
    4367           0 :                     skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
    4368             :                 }
    4369             :             }
    4370             :         }
    4371             :     }
    4372             : 
    4373          93 :     if (eats_at_least == kEatsAtLeastNotYetInitialized) {
    4374             :         // Save some time by looking at most one machine word ahead.
    4375          75 :         eats_at_least =
    4376          75 :             EatsAtLeast(compiler->latin1() ? 4 : 2, kRecursionBudget, not_at_start);
    4377             :     }
    4378          93 :     int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
    4379             : 
    4380         176 :     bool preload_is_current = !skip_was_emitted &&
    4381         176 :         (current_trace->characters_preloaded() == preload_characters);
    4382          93 :     bool preload_has_checked_bounds = preload_is_current;
    4383             : 
    4384         186 :     AlternativeGenerationList alt_gens(alloc(), choice_count);
    4385             : 
    4386             :     // For now we just call all choices one after the other.  The idea ultimately
    4387             :     // is to use the Dispatch table to try only the relevant ones.
    4388         301 :     for (size_t i = first_normal_choice; i < choice_count; i++) {
    4389         208 :         GuardedAlternative alternative = alternatives()[i];
    4390         208 :         AlternativeGeneration* alt_gen = alt_gens.at(i);
    4391         208 :         alt_gen->quick_check_details.set_characters(preload_characters);
    4392         208 :         const GuardVector* guards = alternative.guards();
    4393         208 :         Trace new_trace(*current_trace);
    4394         208 :         new_trace.set_characters_preloaded(preload_is_current ?
    4395             :                                            preload_characters :
    4396         208 :                                            0);
    4397         208 :         if (preload_has_checked_bounds) {
    4398         150 :             new_trace.set_bound_checked_up_to(preload_characters);
    4399             :         }
    4400         208 :         new_trace.quick_check_performed()->Clear();
    4401         208 :         if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
    4402         208 :         alt_gen->expects_preload = preload_is_current;
    4403         208 :         bool generate_full_check_inline = false;
    4404         414 :         if (try_to_emit_quick_check_for_alternative(i) &&
    4405         206 :             alternative.node()->EmitQuickCheck(compiler,
    4406             :                                                &new_trace,
    4407             :                                                preload_has_checked_bounds,
    4408             :                                                &alt_gen->possible_success,
    4409             :                                                &alt_gen->quick_check_details,
    4410         206 :                                                i < choice_count - 1)) {
    4411             :             // Quick check was generated for this choice.
    4412         110 :             preload_is_current = true;
    4413         110 :             preload_has_checked_bounds = true;
    4414             :             // On the last choice in the ChoiceNode we generated the quick
    4415             :             // check to fall through on possible success.  So now we need to
    4416             :             // generate the full check inline.
    4417         110 :             if (i == choice_count - 1) {
    4418          36 :                 macro_assembler->Bind(&alt_gen->possible_success);
    4419          36 :                 new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
    4420          36 :                 new_trace.set_characters_preloaded(preload_characters);
    4421          36 :                 new_trace.set_bound_checked_up_to(preload_characters);
    4422          36 :                 generate_full_check_inline = true;
    4423             :             }
    4424          98 :         } else if (alt_gen->quick_check_details.cannot_match()) {
    4425           0 :             if (i == choice_count - 1 && !greedy_loop) {
    4426           0 :                 macro_assembler->JumpOrBacktrack(trace->backtrack());
    4427             :             }
    4428           0 :             continue;
    4429             :         } else {
    4430             :             // No quick check was generated.  Put the full code here.
    4431             :             // If this is not the first choice then there could be slow checks from
    4432             :             // previous cases that go here when they fail.  There's no reason to
    4433             :             // insist that they preload characters since the slow check we are about
    4434             :             // to generate probably can't use it.
    4435          98 :             if (i != first_normal_choice) {
    4436          56 :                 alt_gen->expects_preload = false;
    4437          56 :                 new_trace.InvalidateCurrentCharacter();
    4438             :             }
    4439          98 :             if (i < choice_count - 1) {
    4440          41 :                 new_trace.set_backtrack(&alt_gen->after);
    4441             :             }
    4442          98 :             generate_full_check_inline = true;
    4443             :         }
    4444         208 :         if (generate_full_check_inline) {
    4445         134 :             if (new_trace.actions() != nullptr)
    4446          41 :                 new_trace.set_flush_budget(new_flush_budget);
    4447         134 :             if (guards) {
    4448          14 :                 for (size_t j = 0; j < guards->length(); j++)
    4449           7 :                     GenerateGuard(macro_assembler, (*guards)[j], &new_trace);
    4450             :             }
    4451         134 :             alternative.node()->Emit(compiler, &new_trace);
    4452         134 :             preload_is_current = false;
    4453             :         }
    4454         208 :         macro_assembler->Bind(&alt_gen->after);
    4455             :     }
    4456          93 :     if (greedy_loop) {
    4457          26 :         macro_assembler->Bind(&greedy_loop_label);
    4458             :         // If we have unwound to the bottom then backtrack.
    4459          26 :         macro_assembler->CheckGreedyLoop(trace->backtrack());
    4460             :         // Otherwise try the second priority at an earlier position.
    4461          26 :         macro_assembler->AdvanceCurrentPosition(-text_length);
    4462          26 :         macro_assembler->JumpOrBacktrack(&second_choice);
    4463             :     }
    4464             : 
    4465             :     // At this point we need to generate slow checks for the alternatives where
    4466             :     // the quick check was inlined.  We can recognize these because the associated
    4467             :     // label was bound.
    4468         208 :     for (size_t i = first_normal_choice; i < choice_count - 1; i++) {
    4469         115 :         AlternativeGeneration* alt_gen = alt_gens.at(i);
    4470         115 :         Trace new_trace(*current_trace);
    4471             :         // If there are actions to be flushed we have to limit how many times
    4472             :         // they are flushed.  Take the budget of the parent trace and distribute
    4473             :         // it fairly amongst the children.
    4474         115 :         if (new_trace.actions() != nullptr) {
    4475          60 :             new_trace.set_flush_budget(new_flush_budget);
    4476             :         }
    4477         115 :         EmitOutOfLineContinuation(compiler,
    4478             :                                   &new_trace,
    4479         115 :                                   alternatives()[i],
    4480             :                                   alt_gen,
    4481             :                                   preload_characters,
    4482         230 :                                   alt_gens.at(i + 1)->expects_preload);
    4483             :     }
    4484             : }
    4485             : 
    4486             : void
    4487         115 : ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
    4488             :                                       Trace* trace,
    4489             :                                       GuardedAlternative alternative,
    4490             :                                       AlternativeGeneration* alt_gen,
    4491             :                                       int preload_characters,
    4492             :                                       bool next_expects_preload)
    4493             : {
    4494         115 :     if (!alt_gen->possible_success.used())
    4495          41 :         return;
    4496             : 
    4497          74 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4498          74 :     macro_assembler->Bind(&alt_gen->possible_success);
    4499          74 :     Trace out_of_line_trace(*trace);
    4500          74 :     out_of_line_trace.set_characters_preloaded(preload_characters);
    4501          74 :     out_of_line_trace.set_quick_check_performed(&alt_gen->quick_check_details);
    4502          74 :     if (not_at_start_) out_of_line_trace.set_at_start(Trace::FALSE_VALUE);
    4503          74 :     const GuardVector* guards = alternative.guards();
    4504          74 :     if (next_expects_preload) {
    4505         118 :         jit::Label reload_current_char;
    4506          59 :         out_of_line_trace.set_backtrack(&reload_current_char);
    4507          59 :         if (guards) {
    4508          18 :             for (size_t j = 0; j < guards->length(); j++)
    4509           9 :                 GenerateGuard(macro_assembler, (*guards)[j], &out_of_line_trace);
    4510             :         }
    4511          59 :         alternative.node()->Emit(compiler, &out_of_line_trace);
    4512          59 :         macro_assembler->Bind(&reload_current_char);
    4513             :         // Reload the current character, since the next quick check expects that.
    4514             :         // We don't need to check bounds here because we only get into this
    4515             :         // code through a quick check which already did the checked load.
    4516          59 :         macro_assembler->LoadCurrentCharacter(trace->cp_offset(),
    4517             :                                               nullptr,
    4518             :                                               false,
    4519         118 :                                               preload_characters);
    4520          59 :         macro_assembler->JumpOrBacktrack(&(alt_gen->after));
    4521             :     } else {
    4522          15 :         out_of_line_trace.set_backtrack(&(alt_gen->after));
    4523          15 :         if (guards) {
    4524           0 :             for (size_t j = 0; j < guards->length(); j++)
    4525           0 :                 GenerateGuard(macro_assembler, (*guards)[j], &out_of_line_trace);
    4526             :         }
    4527          15 :         alternative.node()->Emit(compiler, &out_of_line_trace);
    4528             :     }
    4529             : }
    4530             : 
    4531             : void
    4532         290 : ActionNode::Emit(RegExpCompiler* compiler, Trace* trace)
    4533             : {
    4534         290 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    4535         290 :     LimitResult limit_result = LimitVersions(compiler, trace);
    4536         362 :     if (limit_result == DONE) return;
    4537         248 :     MOZ_ASSERT(limit_result == CONTINUE);
    4538             : 
    4539         466 :     RecursionCheck rc(compiler);
    4540             : 
    4541         248 :     switch (action_type_) {
    4542             :       case STORE_POSITION: {
    4543             :         Trace::DeferredCapture
    4544             :             new_capture(data_.u_position_register.reg,
    4545         164 :                         data_.u_position_register.is_capture,
    4546         164 :                         trace);
    4547         164 :         Trace new_trace = *trace;
    4548         164 :         new_trace.add_action(&new_capture);
    4549         164 :         on_success()->Emit(compiler, &new_trace);
    4550         164 :         break;
    4551             :       }
    4552             :       case INCREMENT_REGISTER: {
    4553             :         Trace::DeferredIncrementRegister
    4554          13 :             new_increment(data_.u_increment_register.reg);
    4555          13 :         Trace new_trace = *trace;
    4556          13 :         new_trace.add_action(&new_increment);
    4557          13 :         on_success()->Emit(compiler, &new_trace);
    4558          13 :         break;
    4559             :       }
    4560             :       case SET_REGISTER: {
    4561             :         Trace::DeferredSetRegister
    4562          11 :             new_set(data_.u_store_register.reg, data_.u_store_register.value);
    4563          11 :         Trace new_trace = *trace;
    4564          11 :         new_trace.add_action(&new_set);
    4565          11 :         on_success()->Emit(compiler, &new_trace);
    4566          11 :         break;
    4567             :       }
    4568             :       case CLEAR_CAPTURES: {
    4569             :         Trace::DeferredClearCaptures
    4570             :             new_capture(Interval(data_.u_clear_captures.range_from,
    4571           6 :                                  data_.u_clear_captures.range_to));
    4572           6 :         Trace new_trace = *trace;
    4573           6 :         new_trace.add_action(&new_capture);
    4574           6 :         on_success()->Emit(compiler, &new_trace);
    4575           6 :         break;
    4576             :       }
    4577             :       case BEGIN_SUBMATCH:
    4578          24 :         if (!trace->is_trivial()) {
    4579          12 :             trace->Flush(compiler, this);
    4580             :         } else {
    4581          12 :             assembler->WriteCurrentPositionToRegister(data_.u_submatch.current_position_register, 0);
    4582          12 :             assembler->WriteBacktrackStackPointerToRegister(data_.u_submatch.stack_pointer_register);
    4583          12 :             on_success()->Emit(compiler, trace);
    4584             :         }
    4585          24 :         break;
    4586             :       case EMPTY_MATCH_CHECK: {
    4587           0 :         int start_pos_reg = data_.u_empty_match_check.start_register;
    4588           0 :         int stored_pos = 0;
    4589           0 :         int rep_reg = data_.u_empty_match_check.repetition_register;
    4590           0 :         bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
    4591           0 :         bool know_dist = trace->GetStoredPosition(start_pos_reg, &stored_pos);
    4592           0 :         if (know_dist && !has_minimum && stored_pos == trace->cp_offset()) {
    4593             :             // If we know we haven't advanced and there is no minimum we
    4594             :             // can just backtrack immediately.
    4595           0 :             assembler->JumpOrBacktrack(trace->backtrack());
    4596           0 :         } else if (know_dist && stored_pos < trace->cp_offset()) {
    4597             :             // If we know we've advanced we can generate the continuation
    4598             :             // immediately.
    4599           0 :             on_success()->Emit(compiler, trace);
    4600           0 :         } else if (!trace->is_trivial()) {
    4601           0 :             trace->Flush(compiler, this);
    4602             :         } else {
    4603           0 :             jit::Label skip_empty_check;
    4604             :             // If we have a minimum number of repetitions we check the current
    4605             :             // number first and skip the empty check if it's not enough.
    4606           0 :             if (has_minimum) {
    4607           0 :                 int limit = data_.u_empty_match_check.repetition_limit;
    4608           0 :                 assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
    4609             :             }
    4610             :             // If the match is empty we bail out, otherwise we fall through
    4611             :             // to the on-success continuation.
    4612           0 :             assembler->IfRegisterEqPos(data_.u_empty_match_check.start_register,
    4613           0 :                                        trace->backtrack());
    4614           0 :             assembler->Bind(&skip_empty_check);
    4615           0 :             on_success()->Emit(compiler, trace);
    4616             :         }
    4617           0 :         break;
    4618             :       }
    4619             :       case POSITIVE_SUBMATCH_SUCCESS: {
    4620          30 :         if (!trace->is_trivial()) {
    4621          20 :             trace->Flush(compiler, this);
    4622          20 :             return;
    4623             :         }
    4624          10 :         assembler->ReadCurrentPositionFromRegister(data_.u_submatch.current_position_register);
    4625          10 :         assembler->ReadBacktrackStackPointerFromRegister(data_.u_submatch.stack_pointer_register);
    4626          10 :         int clear_register_count = data_.u_submatch.clear_register_count;
    4627          10 :         if (clear_register_count == 0) {
    4628          10 :             on_success()->Emit(compiler, trace);
    4629          10 :             return;
    4630             :         }
    4631           0 :         int clear_registers_from = data_.u_submatch.clear_register_from;
    4632           0 :         jit::Label clear_registers_backtrack;
    4633           0 :         Trace new_trace = *trace;
    4634           0 :         new_trace.set_backtrack(&clear_registers_backtrack);
    4635           0 :         on_success()->Emit(compiler, &new_trace);
    4636             : 
    4637           0 :         assembler->Bind(&clear_registers_backtrack);
    4638           0 :         int clear_registers_to = clear_registers_from + clear_register_count - 1;
    4639           0 :         assembler->ClearRegisters(clear_registers_from, clear_registers_to);
    4640             : 
    4641           0 :         MOZ_ASSERT(trace->backtrack() == nullptr);
    4642           0 :         assembler->Backtrack();
    4643           0 :         return;
    4644             :       }
    4645             :       default:
    4646           0 :         MOZ_CRASH("Bad action");
    4647             :     }
    4648             : }
    4649             : 
    4650             : void
    4651           0 : BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace)
    4652             : {
    4653           0 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    4654           0 :     if (!trace->is_trivial()) {
    4655           0 :         trace->Flush(compiler, this);
    4656           0 :         return;
    4657             :     }
    4658             : 
    4659           0 :     LimitResult limit_result = LimitVersions(compiler, trace);
    4660           0 :     if (limit_result == DONE) return;
    4661           0 :     MOZ_ASSERT(limit_result == CONTINUE);
    4662             : 
    4663           0 :     RecursionCheck rc(compiler);
    4664             : 
    4665           0 :     MOZ_ASSERT(start_reg_ + 1 == end_reg_);
    4666           0 :     if (compiler->ignore_case()) {
    4667           0 :         assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
    4668             :                                                    trace->backtrack(),
    4669           0 :                                                    compiler->unicode());
    4670             :     } else {
    4671           0 :         assembler->CheckNotBackReference(start_reg_, trace->backtrack());
    4672             :     }
    4673           0 :     on_success()->Emit(compiler, trace);
    4674             : }
    4675             : 
    4676             : RegExpNode::LimitResult
    4677         672 : RegExpNode::LimitVersions(RegExpCompiler* compiler, Trace* trace)
    4678             : {
    4679             :     // If we are generating a greedy loop then don't stop and don't reuse code.
    4680         672 :     if (trace->stop_node() != nullptr)
    4681          26 :         return CONTINUE;
    4682             : 
    4683         646 :     RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    4684         646 :     if (trace->is_trivial()) {
    4685         209 :         if (label()->bound()) {
    4686             :             // We are being asked to generate a generic version, but that's already
    4687             :             // been done so just go to it.
    4688          76 :             macro_assembler->JumpOrBacktrack(label());
    4689          76 :             return DONE;
    4690             :         }
    4691         133 :         if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
    4692             :             // To avoid too deep recursion we push the node to the work queue and just
    4693             :             // generate a goto here.
    4694           0 :             compiler->AddWork(this);
    4695           0 :             macro_assembler->JumpOrBacktrack(label());
    4696           0 :             return DONE;
    4697             :         }
    4698             :         // Generate generic version of the node and bind the label for later use.
    4699         133 :         macro_assembler->Bind(label());
    4700         133 :         return CONTINUE;
    4701             :     }
    4702             : 
    4703             :     // We are being asked to make a non-generic version.  Keep track of how many
    4704             :     // non-generic versions we generate so as not to overdo it.
    4705         437 :     trace_count_++;
    4706         851 :     if (trace_count_ < kMaxCopiesCodeGenerated &&
    4707         414 :         compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
    4708         414 :         return CONTINUE;
    4709             :     }
    4710             : 
    4711             :     // If we get here code has been generated for this node too many times or
    4712             :     // recursion is too deep.  Time to switch to a generic version.  The code for
    4713             :     // generic versions above can handle deep recursion properly.
    4714          23 :     trace->Flush(compiler, this);
    4715          23 :     return DONE;
    4716             : }
    4717             : 
    4718             : bool
    4719         206 : RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
    4720             :                            Trace* trace,
    4721             :                            bool preload_has_checked_bounds,
    4722             :                            jit::Label* on_possible_success,
    4723             :                            QuickCheckDetails* details,
    4724             :                            bool fall_through_on_failure)
    4725             : {
    4726         206 :     if (details->characters() == 0) return false;
    4727         133 :     GetQuickCheckDetails(
    4728         266 :                          details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
    4729         133 :     if (details->cannot_match()) return false;
    4730         133 :     if (!details->Rationalize(compiler->latin1())) return false;
    4731         110 :     MOZ_ASSERT(details->characters() == 1 ||
    4732             :                compiler->macro_assembler()->CanReadUnaligned());
    4733         110 :     uint32_t mask = details->mask();
    4734         110 :     uint32_t value = details->value();
    4735             : 
    4736         110 :     RegExpMacroAssembler* assembler = compiler->macro_assembler();
    4737             : 
    4738         110 :     if (trace->characters_preloaded() != details->characters()) {
    4739         100 :         assembler->LoadCurrentCharacter(trace->cp_offset(),
    4740             :                                         trace->backtrack(),
    4741          50 :                                         !preload_has_checked_bounds,
    4742         100 :                                         details->characters());
    4743             :     }
    4744             : 
    4745         110 :     bool need_mask = true;
    4746             : 
    4747         110 :     if (details->characters() == 1) {
    4748             :         // If number of characters preloaded is 1 then we used a byte or 16 bit
    4749             :         // load so the value is already masked down.
    4750          30 :         uint32_t char_mask = MaximumCharacter(compiler->latin1());
    4751          30 :         if ((mask & char_mask) == char_mask) need_mask = false;
    4752          30 :         mask &= char_mask;
    4753             :     } else {
    4754             :         // For 2-character preloads in LATIN1 mode or 1-character preloads in
    4755             :         // TWO_BYTE mode we also use a 16 bit load with zero extend.
    4756          80 :         if (details->characters() == 2 && compiler->latin1()) {
    4757          29 :             if ((mask & 0xffff) == 0xffff) need_mask = false;
    4758          51 :         } else if (details->characters() == 1 && !compiler->latin1()) {
    4759           0 :             if ((mask & 0xffff) == 0xffff) need_mask = false;
    4760             :         } else {
    4761          51 :             if (mask == 0xffffffff) need_mask = false;
    4762             :         }
    4763             :     }
    4764             : 
    4765         110 :     if (fall_through_on_failure) {
    4766          74 :         if (need_mask) {
    4767          49 :             assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
    4768             :         } else {
    4769          25 :             assembler->CheckCharacter(value, on_possible_success);
    4770             :         }
    4771             :     } else {
    4772          36 :         if (need_mask) {
    4773          18 :             assembler->CheckNotCharacterAfterAnd(value, mask, trace->backtrack());
    4774             :         } else {
    4775          18 :             assembler->CheckNotCharacter(value, trace->backtrack());
    4776             :         }
    4777             :     }
    4778         110 :     return true;
    4779             : }
    4780             : 
    4781             : bool
    4782          38 : TextNode::FillInBMInfo(int initial_offset,
    4783             :                        int budget,
    4784             :                        BoyerMooreLookahead* bm,
    4785             :                        bool not_at_start)
    4786             : {
    4787          38 :     if (!bm->CheckOverRecursed())
    4788           0 :         return false;
    4789             : 
    4790          38 :     if (initial_offset >= bm->length())
    4791           0 :         return true;
    4792             : 
    4793          38 :     int offset = initial_offset;
    4794          38 :     int max_char = bm->max_char();
    4795          74 :     for (size_t i = 0; i < elements().length(); i++) {
    4796          38 :         if (offset >= bm->length()) {
    4797           0 :             if (initial_offset == 0)
    4798           0 :                 set_bm_info(not_at_start, bm);
    4799           2 :             return true;
    4800             :         }
    4801          38 :         TextElement text = elements()[i];
    4802          38 :         if (text.text_type() == TextElement::ATOM) {
    4803          22 :             RegExpAtom* atom = text.atom();
    4804          77 :             for (int j = 0; j < atom->length(); j++, offset++) {
    4805          57 :                 if (offset >= bm->length()) {
    4806           2 :                     if (initial_offset == 0)
    4807           2 :                         set_bm_info(not_at_start, bm);
    4808           2 :                     return true;
    4809             :                 }
    4810          55 :                 char16_t character = atom->data()[j];
    4811          55 :                 if (bm->compiler()->ignore_case()) {
    4812             :                     char16_t chars[kEcma262UnCanonicalizeMaxWidth];
    4813          60 :                     int length = GetCaseIndependentLetters(character,
    4814          30 :                                                            bm->max_char() == kMaxOneByteCharCode,
    4815          30 :                                                            bm->compiler()->unicode(),
    4816          30 :                                                            chars);
    4817          71 :                     for (int j = 0; j < length; j++)
    4818          41 :                         bm->Set(offset, chars[j]);
    4819             :                 } else {
    4820          25 :                     if (character <= max_char) bm->Set(offset, character);
    4821             :                 }
    4822             :             }
    4823             :         } else {
    4824          16 :             MOZ_ASSERT(TextElement::CHAR_CLASS == text.text_type());
    4825          16 :             RegExpCharacterClass* char_class = text.char_class();
    4826          16 :             const CharacterRangeVector& ranges = char_class->ranges(alloc());
    4827          16 :             if (char_class->is_negated()) {
    4828           2 :                 bm->SetAll(offset);
    4829             :             } else {
    4830          79 :                 for (size_t k = 0; k < ranges.length(); k++) {
    4831          65 :                     const CharacterRange& range = ranges[k];
    4832          65 :                     if (range.from() > max_char)
    4833          30 :                         continue;
    4834          35 :                     int to = Min(max_char, static_cast<int>(range.to()));
    4835          35 :                     bm->SetInterval(offset, Interval(range.from(), to));
    4836             :                 }
    4837             :             }
    4838          16 :             offset++;
    4839             :         }
    4840             :     }
    4841          36 :     if (offset >= bm->length()) {
    4842          30 :         if (initial_offset == 0) set_bm_info(not_at_start, bm);
    4843          30 :         return true;
    4844             :     }
    4845          12 :     if (!on_success()->FillInBMInfo(offset,
    4846             :                                     budget - 1,
    4847             :                                     bm,
    4848           6 :                                     true))   // Not at start after a text node.
    4849           0 :         return false;
    4850           6 :     if (initial_offset == 0)
    4851           4 :         set_bm_info(not_at_start, bm);
    4852           6 :     return true;
    4853             : }
    4854             : 
    4855             : // -------------------------------------------------------------------
    4856             : // QuickCheckDetails
    4857             : 
    4858             : // Takes the left-most 1-bit and smears it out, setting all bits to its right.
    4859             : static inline uint32_t
    4860         162 : SmearBitsRight(uint32_t v)
    4861             : {
    4862         162 :     v |= v >> 1;
    4863         162 :     v |= v >> 2;
    4864         162 :     v |= v >> 4;
    4865         162 :     v |= v >> 8;
    4866         162 :     v |= v >> 16;
    4867         162 :     return v;
    4868             : }
    4869             : 
    4870             : // Here is the meat of GetQuickCheckDetails (see also the comment on the
    4871             : // super-class in the .h file).
    4872             : //
    4873             : // We iterate along the text object, building up for each character a
    4874             : // mask and value that can be used to test for a quick failure to match.
    4875             : // The masks and values for the positions will be combined into a single
    4876             : // machine word for the current character width in order to be used in
    4877             : // generating a quick check.
    4878             : void
    4879         206 : TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
    4880             :                                RegExpCompiler* compiler,
    4881             :                                int characters_filled_in,
    4882             :                                bool not_at_start)
    4883             : {
    4884         206 :     MOZ_ASSERT(characters_filled_in < details->characters());
    4885         206 :     int characters = details->characters();
    4886         206 :     int char_mask = MaximumCharacter(compiler->latin1());
    4887             : 
    4888         238 :     for (size_t k = 0; k < elements().length(); k++) {
    4889         207 :         TextElement elm = elements()[k];
    4890         207 :         if (elm.text_type() == TextElement::ATOM) {
    4891         134 :             const CharacterVector& quarks = elm.atom()->data();
    4892         296 :             for (size_t i = 0; i < (size_t) characters && i < quarks.length(); i++) {
    4893             :                 QuickCheckDetails::Position* pos =
    4894         279 :                     details->positions(characters_filled_in);
    4895         279 :                 char16_t c = quarks[i];
    4896         279 :                 if (c > char_mask) {
    4897             :                     // If we expect a non-LATIN1 character from an LATIN1 string,
    4898             :                     // there is no way we can match. Not even case independent
    4899             :                     // matching can turn an LATIN1 character into non-LATIN1 or
    4900             :                     // vice versa.
    4901           0 :                     details->set_cannot_match();
    4902           0 :                     pos->determines_perfectly = false;
    4903         175 :                     return;
    4904             :                 }
    4905         279 :                 if (compiler->ignore_case()) {
    4906             :                     char16_t chars[kEcma262UnCanonicalizeMaxWidth];
    4907         156 :                     size_t length = GetCaseIndependentLetters(c, compiler->latin1(),
    4908         312 :                                                               compiler->unicode(), chars);
    4909         156 :                     MOZ_ASSERT(length != 0);  // Can only happen if c > char_mask (see above).
    4910         156 :                     if (length == 1) {
    4911             :                         // This letter has no case equivalents, so it's nice and simple
    4912             :                         // and the mask-compare will determine definitely whether we have
    4913             :                         // a match at this character position.
    4914          59 :                         pos->mask = char_mask;
    4915          59 :                         pos->value = c;
    4916          59 :                         pos->determines_perfectly = true;
    4917             :                     } else {
    4918          97 :                         uint32_t common_bits = char_mask;
    4919          97 :                         uint32_t bits = chars[0];
    4920         194 :                         for (size_t j = 1; j < length; j++) {
    4921          97 :                             uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
    4922          97 :                             common_bits ^= differing_bits;
    4923          97 :                             bits &= common_bits;
    4924             :                         }
    4925             :                         // If length is 2 and common bits has only one zero in it then
    4926             :                         // our mask and compare instruction will determine definitely
    4927             :                         // whether we have a match at this character position.  Otherwise
    4928             :                         // it can only be an approximate check.
    4929          97 :                         uint32_t one_zero = (common_bits | ~char_mask);
    4930          97 :                         if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
    4931          97 :                             pos->determines_perfectly = true;
    4932             :                         }
    4933          97 :                         pos->mask = common_bits;
    4934          97 :                         pos->value = bits;
    4935             :                     }
    4936             :                 } else {
    4937             :                     // Don't ignore case.  Nice simple case where the mask-compare will
    4938             :                     // determine definitely whether we have a match at this character
    4939             :                     // position.
    4940         123 :                     pos->mask = char_mask;
    4941         123 :                     pos->value = c;
    4942         123 :                     pos->determines_perfectly = true;
    4943             :                 }
    4944         279 :                 characters_filled_in++;
    4945         279 :                 MOZ_ASSERT(characters_filled_in <= details->characters());
    4946         279 :                 if (characters_filled_in == details->characters()) {
    4947         117 :                     return;
    4948             :                 }
    4949             :             }
    4950             :         } else {
    4951             :             QuickCheckDetails::Position* pos =
    4952          73 :                 details->positions(characters_filled_in);
    4953          73 :             RegExpCharacterClass* tree = elm.char_class();
    4954          73 :             const CharacterRangeVector& ranges = tree->ranges(alloc());
    4955          73 :             if (tree->is_negated()) {
    4956             :                 // A quick check uses multi-character mask and compare.  There is no
    4957             :                 // useful way to incorporate a negative char class into this scheme
    4958             :                 // so we just conservatively create a mask and value that will always
    4959             :                 // succeed.
    4960           6 :                 pos->mask = 0;
    4961           6 :                 pos->value = 0;
    4962             :             } else {
    4963          67 :                 size_t first_range = 0;
    4964          67 :                 while (ranges[first_range].from() > char_mask) {
    4965           0 :                     first_range++;
    4966           0 :                     if (first_range == ranges.length()) {
    4967           0 :                         details->set_cannot_match();
    4968           0 :                         pos->determines_perfectly = false;
    4969           0 :                         return;
    4970             :                     }
    4971             :                 }
    4972          67 :                 CharacterRange range = ranges[first_range];
    4973          67 :                 char16_t from = range.from();
    4974          67 :                 char16_t to = range.to();
    4975          67 :                 if (to > char_mask) {
    4976          21 :                     to = char_mask;
    4977             :                 }
    4978          67 :                 uint32_t differing_bits = (from ^ to);
    4979             :                 // A mask and compare is only perfect if the differing bits form a
    4980             :                 // number like 00011111 with one single block of trailing 1s.
    4981         109 :                 if ((differing_bits & (differing_bits + 1)) == 0 &&
    4982          42 :                     from + differing_bits == to) {
    4983          32 :                     pos->determines_perfectly = true;
    4984             :                 }
    4985          67 :                 uint32_t common_bits = ~SmearBitsRight(differing_bits);
    4986          67 :                 uint32_t bits = (from & common_bits);
    4987         200 :                 for (size_t i = first_range + 1; i < ranges.length(); i++) {
    4988         133 :                     CharacterRange range = ranges[i];
    4989         133 :                     char16_t from = range.from();
    4990         133 :                     char16_t to = range.to();
    4991         133 :                     if (from > char_mask) continue;
    4992          95 :                     if (to > char_mask) to = char_mask;
    4993             :                     // Here we are combining more ranges into the mask and compare
    4994             :                     // value.  With each new range the mask becomes more sparse and
    4995             :                     // so the chances of a false positive rise.  A character class
    4996             :                     // with multiple ranges is assumed never to be equivalent to a
    4997             :                     // mask and compare operation.
    4998          95 :                     pos->determines_perfectly = false;
    4999          95 :                     uint32_t new_common_bits = (from ^ to);
    5000          95 :                     new_common_bits = ~SmearBitsRight(new_common_bits);
    5001          95 :                     common_bits &= new_common_bits;
    5002          95 :                     bits &= new_common_bits;
    5003          95 :                     uint32_t differing_bits = (from & common_bits) ^ bits;
    5004          95 :                     common_bits ^= differing_bits;
    5005          95 :                     bits &= common_bits;
    5006             :                 }
    5007          67 :                 pos->mask = common_bits;
    5008          67 :                 pos->value = bits;
    5009             :             }
    5010          73 :             characters_filled_in++;
    5011          73 :             MOZ_ASSERT(characters_filled_in <= details->characters());
    5012          73 :             if (characters_filled_in == details->characters()) {
    5013          58 :                 return;
    5014             :             }
    5015             :         }
    5016             :     }
    5017          31 :     MOZ_ASSERT(characters_filled_in != details->characters());
    5018          31 :     if (!details->cannot_match()) {
    5019          31 :         on_success()-> GetQuickCheckDetails(details,
    5020             :                                             compiler,
    5021             :                                             characters_filled_in,
    5022          31 :                                             true);
    5023             :     }
    5024             : }
    5025             : 
    5026             : void
    5027         420 : QuickCheckDetails::Clear()
    5028             : {
    5029         657 :     for (int i = 0; i < characters_; i++) {
    5030         237 :         positions_[i].mask = 0;
    5031         237 :         positions_[i].value = 0;
    5032         237 :         positions_[i].determines_perfectly = false;
    5033             :     }
    5034         420 :     characters_ = 0;
    5035         420 : }
    5036             : 
    5037             : void
    5038         232 : QuickCheckDetails::Advance(int by)
    5039             : {
    5040         232 :     MOZ_ASSERT(by >= 0);
    5041         232 :     if (by >= characters_) {
    5042         212 :         Clear();
    5043         212 :         return;
    5044             :     }
    5045          40 :     for (int i = 0; i < characters_ - by; i++) {
    5046          20 :         positions_[i] = positions_[by + i];
    5047             :     }
    5048          50 :     for (int i = characters_ - by; i < characters_; i++) {
    5049          30 :         positions_[i].mask = 0;
    5050          30 :         positions_[i].value = 0;
    5051          30 :         positions_[i].determines_perfectly = false;
    5052             :     }
    5053          20 :     characters_ -= by;
    5054             :     // We could change mask_ and value_ here but we would never advance unless
    5055             :     // they had already been used in a check and they won't be used again because
    5056             :     // it would gain us nothing.  So there's no point.
    5057             : }
    5058             : 
    5059             : bool
    5060         133 : QuickCheckDetails::Rationalize(bool is_latin1)
    5061             : {
    5062         133 :     bool found_useful_op = false;
    5063         133 :     uint32_t char_mask = MaximumCharacter(is_latin1);
    5064             : 
    5065         133 :     mask_ = 0;
    5066         133 :     value_ = 0;
    5067         133 :     int char_shift = 0;
    5068         439 :     for (int i = 0; i < characters_; i++) {
    5069         306 :         Position* pos = &positions_[i];
    5070         306 :         if ((pos->mask & kMaxOneByteCharCode) != 0)
    5071         263 :             found_useful_op = true;
    5072         306 :         mask_ |= (pos->mask & char_mask) << char_shift;
    5073         306 :         value_ |= (pos->value & char_mask) << char_shift;
    5074         306 :         char_shift += is_latin1 ? 8 : 16;
    5075             :     }
    5076         133 :     return found_useful_op;
    5077             : }
    5078             : 
    5079          46 : void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index)
    5080             : {
    5081          46 :     MOZ_ASSERT(characters_ == other->characters_);
    5082          46 :     if (other->cannot_match_)
    5083           0 :         return;
    5084          46 :     if (cannot_match_) {
    5085           0 :         *this = *other;
    5086           0 :         return;
    5087             :     }
    5088         100 :     for (int i = from_index; i < characters_; i++) {
    5089          54 :         QuickCheckDetails::Position* pos = positions(i);
    5090          54 :         QuickCheckDetails::Position* other_pos = other->positions(i);
    5091          58 :         if (pos->mask != other_pos->mask ||
    5092           4 :             pos->value != other_pos->value ||
    5093           0 :             !other_pos->determines_perfectly) {
    5094             :             // Our mask-compare operation will be approximate unless we have the
    5095             :             // exact same operation on both sides of the alternation.
    5096          54 :             pos->determines_perfectly = false;
    5097             :         }
    5098          54 :         pos->mask &= other_pos->mask;
    5099          54 :         pos->value &= pos->mask;
    5100          54 :         other_pos->value &= pos->mask;
    5101          54 :         char16_t differing_bits = (pos->value ^ other_pos->value);
    5102          54 :         pos->mask &= ~differing_bits;
    5103          54 :         pos->value &= pos->mask;
    5104             :     }
    5105             : }

Generated by: LCOV version 1.13