LCOV - code coverage report
Current view: top level - js/src/irregexp - RegExpEngine.h (source / functions) Hit Total Coverage
Test: output.info Lines: 298 351 84.9 %
Date: 2017-07-14 16:53:18 Functions: 194 250 77.6 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2             :  * vim: set ts=8 sts=4 et sw=4 tw=99: */
       3             : 
       4             : // Copyright 2012 the V8 project authors. All rights reserved.
       5             : // Redistribution and use in source and binary forms, with or without
       6             : // modification, are permitted provided that the following conditions are
       7             : // met:
       8             : //
       9             : //     * Redistributions of source code must retain the above copyright
      10             : //       notice, this list of conditions and the following disclaimer.
      11             : //     * Redistributions in binary form must reproduce the above
      12             : //       copyright notice, this list of conditions and the following
      13             : //       disclaimer in the documentation and/or other materials provided
      14             : //       with the distribution.
      15             : //     * Neither the name of Google Inc. nor the names of its
      16             : //       contributors may be used to endorse or promote products derived
      17             : //       from this software without specific prior written permission.
      18             : //
      19             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      20             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      21             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      22             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      23             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      24             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      25             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      26             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      27             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      28             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      29             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             : 
      31             : #ifndef V8_JSREGEXP_H_
      32             : #define V8_JSREGEXP_H_
      33             : 
      34             : #include "jscntxt.h"
      35             : 
      36             : #include "ds/SplayTree.h"
      37             : #include "jit/Label.h"
      38             : #include "vm/RegExpObject.h"
      39             : 
      40             : namespace js {
      41             : 
      42             : class MatchPairs;
      43             : class RegExpShared;
      44             : 
      45             : namespace jit {
      46             :     class Label;
      47             :     class JitCode;
      48             : }
      49             : 
      50             : namespace irregexp {
      51             : 
      52             : class RegExpTree;
      53             : class RegExpMacroAssembler;
      54             : 
      55             : struct RegExpCompileData
      56             : {
      57          40 :     RegExpCompileData()
      58          40 :       : tree(nullptr),
      59             :         simple(true),
      60             :         contains_anchor(false),
      61          40 :         capture_count(0)
      62          40 :     {}
      63             : 
      64             :     RegExpTree* tree;
      65             :     bool simple;
      66             :     bool contains_anchor;
      67             :     int capture_count;
      68             : };
      69             : 
      70             : struct RegExpCode
      71             : {
      72             :     jit::JitCode* jitCode;
      73             :     uint8_t* byteCode;
      74             : 
      75          40 :     RegExpCode()
      76          40 :       : jitCode(nullptr), byteCode(nullptr)
      77          40 :     {}
      78             : 
      79          80 :     bool empty() {
      80          80 :         return !jitCode && !byteCode;
      81             :     }
      82             : 
      83           0 :     void destroy() {
      84           0 :         js_free(byteCode);
      85           0 :     }
      86             : };
      87             : 
      88             : RegExpCode
      89             : CompilePattern(JSContext* cx, HandleRegExpShared shared, RegExpCompileData* data,
      90             :                HandleLinearString sample,  bool is_global, bool ignore_case,
      91             :                bool is_latin1, bool match_only, bool force_bytecode, bool sticky,
      92             :                bool unicode, RegExpShared::JitCodeTables& tables);
      93             : 
      94             : // Note: this may return RegExpRunStatus_Error if an interrupt was requested
      95             : // while the code was executing.
      96             : template <typename CharT>
      97             : RegExpRunStatus
      98             : ExecuteCode(JSContext* cx, jit::JitCode* codeBlock, const CharT* chars, size_t start,
      99             :             size_t length, MatchPairs* matches, size_t* endIndex);
     100             : 
     101             : template <typename CharT>
     102             : RegExpRunStatus
     103             : InterpretCode(JSContext* cx, const uint8_t* byteCode, const CharT* chars, size_t start,
     104             :               size_t length, MatchPairs* matches, size_t* endIndex);
     105             : 
     106             : #define FOR_EACH_NODE_TYPE(VISIT)                                    \
     107             :   VISIT(End)                                                         \
     108             :   VISIT(Action)                                                      \
     109             :   VISIT(Choice)                                                      \
     110             :   VISIT(BackReference)                                               \
     111             :   VISIT(Assertion)                                                   \
     112             :   VISIT(Text)
     113             : 
     114             : #define FOR_EACH_REG_EXP_TREE_TYPE(VISIT)                            \
     115             :   VISIT(Disjunction)                                                 \
     116             :   VISIT(Alternative)                                                 \
     117             :   VISIT(Assertion)                                                   \
     118             :   VISIT(CharacterClass)                                              \
     119             :   VISIT(Atom)                                                        \
     120             :   VISIT(Quantifier)                                                  \
     121             :   VISIT(Capture)                                                     \
     122             :   VISIT(Lookahead)                                                   \
     123             :   VISIT(BackReference)                                               \
     124             :   VISIT(Empty)                                                       \
     125             :   VISIT(Text)
     126             : 
     127             : #define FORWARD_DECLARE(Name) class RegExp##Name;
     128             : FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
     129             : #undef FORWARD_DECLARE
     130             : 
     131             : // InfallibleVector is like Vector, but all its methods are infallible (they
     132             : // crash on OOM). We use this class instead of Vector to avoid a ton of
     133             : // MOZ_MUST_USE warnings in irregexp code (imported from V8).
     134             : template<typename T, size_t N>
     135          93 : class InfallibleVector
     136             : {
     137             :     Vector<T, N, LifoAllocPolicy<Infallible>> vector_;
     138             : 
     139             :     InfallibleVector(const InfallibleVector&) = delete;
     140             :     void operator=(const InfallibleVector&) = delete;
     141             : 
     142             :   public:
     143        3706 :     explicit InfallibleVector(const LifoAllocPolicy<Infallible>& alloc) : vector_(alloc) {}
     144             : 
     145       18465 :     void append(const T& t) { MOZ_ALWAYS_TRUE(vector_.append(t)); }
     146          28 :     void append(const T* begin, size_t length) { MOZ_ALWAYS_TRUE(vector_.append(begin, length)); }
     147             : 
     148           0 :     void clear() { vector_.clear(); }
     149          22 :     void popBack() { vector_.popBack(); }
     150        1265 :     void reserve(size_t n) { MOZ_ALWAYS_TRUE(vector_.reserve(n)); }
     151             : 
     152       16132 :     size_t length() const { return vector_.length(); }
     153         238 :     T popCopy() { return vector_.popCopy(); }
     154             : 
     155          28 :     T* begin() { return vector_.begin(); }
     156           0 :     const T* begin() const { return vector_.begin(); }
     157             : 
     158       31696 :     T& operator[](size_t index) { return vector_[index]; }
     159        2439 :     const T& operator[](size_t index) const { return vector_[index]; }
     160             : 
     161           0 :     InfallibleVector& operator=(InfallibleVector&& rhs) { vector_ = Move(rhs.vector_); return *this; }
     162             : };
     163             : 
     164             : class CharacterRange;
     165             : typedef InfallibleVector<CharacterRange, 1> CharacterRangeVector;
     166             : 
     167             : // Represents code units in the range from from_ to to_, both ends are
     168             : // inclusive.
     169             : class CharacterRange
     170             : {
     171             :   public:
     172             :     CharacterRange()
     173             :       : from_(0), to_(0)
     174             :     {}
     175             : 
     176        3179 :     CharacterRange(char16_t from, char16_t to)
     177        3179 :       : from_(from), to_(to)
     178        3179 :     {}
     179             : 
     180             :     static void AddClassEscape(LifoAlloc* alloc, char16_t type, CharacterRangeVector* ranges);
     181             :     static void AddClassEscapeUnicode(LifoAlloc* alloc, char16_t type,
     182             :                                       CharacterRangeVector* ranges, bool ignoreCase);
     183             : 
     184         589 :     static inline CharacterRange Singleton(char16_t value) {
     185         589 :         return CharacterRange(value, value);
     186             :     }
     187         331 :     static inline CharacterRange Range(char16_t from, char16_t to) {
     188         331 :         MOZ_ASSERT(from <= to);
     189         331 :         return CharacterRange(from, to);
     190             :     }
     191          18 :     static inline CharacterRange Everything() {
     192          18 :         return CharacterRange(0, 0xFFFF);
     193             :     }
     194         493 :     bool Contains(char16_t i) { return from_ <= i && i <= to_; }
     195        1925 :     char16_t from() const { return from_; }
     196           0 :     void set_from(char16_t value) { from_ = value; }
     197        1443 :     char16_t to() const { return to_; }
     198          50 :     void set_to(char16_t value) { to_ = value; }
     199             :     bool is_valid() { return from_ <= to_; }
     200          87 :     bool IsEverything(char16_t max) { return from_ == 0 && to_ >= max; }
     201             :     bool IsSingleton() { return (from_ == to_); }
     202             :     void AddCaseEquivalents(bool is_latin1, bool unicode, CharacterRangeVector* ranges);
     203             : 
     204             :     static void Split(const LifoAlloc* alloc,
     205             :                       CharacterRangeVector base,
     206             :                       const Vector<int>& overlay,
     207             :                       CharacterRangeVector* included,
     208             :                       CharacterRangeVector* excluded);
     209             : 
     210             :     // Whether a range list is in canonical form: Ranges ordered by from value,
     211             :     // and ranges non-overlapping and non-adjacent.
     212             :     static bool IsCanonical(const CharacterRangeVector& ranges);
     213             : 
     214             :     // Convert range list to canonical form. The characters covered by the ranges
     215             :     // will still be the same, but no character is in more than one range, and
     216             :     // adjacent ranges are merged. The resulting list may be shorter than the
     217             :     // original, but cannot be longer.
     218             :     static void Canonicalize(CharacterRangeVector& ranges);
     219             : 
     220             :     // Negate the contents of a character range in canonical form.
     221             :     static void Negate(const LifoAlloc* alloc,
     222             :                        CharacterRangeVector src,
     223             :                        CharacterRangeVector* dst);
     224             : 
     225             :     static const int kStartMarker = (1 << 24);
     226             :     static const int kPayloadMask = (1 << 24) - 1;
     227             : 
     228             :   private:
     229             :     char16_t from_;
     230             :     char16_t to_;
     231             : };
     232             : 
     233             : // A set of unsigned integers that behaves especially well on small
     234             : // integers (< 32).
     235             : class OutSet
     236             : {
     237             :   public:
     238         498 :     OutSet()
     239         498 :       : first_(0), remaining_(nullptr), successors_(nullptr)
     240         498 :     {}
     241             : 
     242             :     OutSet* Extend(LifoAlloc* alloc, unsigned value);
     243             :     bool Get(unsigned value);
     244             :     static const unsigned kFirstLimit = 32;
     245             : 
     246             :   private:
     247             :     typedef InfallibleVector<OutSet*, 1> OutSetVector;
     248             :     typedef InfallibleVector<unsigned, 1> RemainingVector;
     249             : 
     250             :     // Destructively set a value in this set.  In most cases you want
     251             :     // to use Extend instead to ensure that only one instance exists
     252             :     // that contains the same values.
     253             :     void Set(LifoAlloc* alloc, unsigned value);
     254             : 
     255             :     // The successors are a list of sets that contain the same values
     256             :     // as this set and the one more value that is not present in this
     257             :     // set.
     258             :     OutSetVector* successors() { return successors_; }
     259             : 
     260             :     OutSet(uint32_t first, RemainingVector* remaining)
     261             :       : first_(first), remaining_(remaining), successors_(nullptr)
     262             :     {}
     263             : 
     264           0 :     RemainingVector& remaining() { return *remaining_; }
     265             : 
     266             :     uint32_t first_;
     267             :     RemainingVector* remaining_;
     268             :     OutSetVector* successors_;
     269             :     friend class Trace;
     270             : };
     271             : 
     272             : // A mapping from integers, specified as ranges, to a set of integers.
     273             : // Used for mapping character ranges to choices.
     274             : class DispatchTable
     275             : {
     276             :   public:
     277             :     explicit DispatchTable(LifoAlloc* alloc)
     278             :     {}
     279             : 
     280             :     class Entry {
     281             :       public:
     282             :         Entry()
     283             :           : from_(0), to_(0), out_set_(nullptr)
     284             :         {}
     285             : 
     286             :         Entry(char16_t from, char16_t to, OutSet* out_set)
     287             :           : from_(from), to_(to), out_set_(out_set)
     288             :         {}
     289             : 
     290             :         char16_t from() { return from_; }
     291             :         char16_t to() { return to_; }
     292             :         void set_to(char16_t value) { to_ = value; }
     293             :         void AddValue(LifoAlloc* alloc, int value) {
     294             :             out_set_ = out_set_->Extend(alloc, value);
     295             :         }
     296             :         OutSet* out_set() { return out_set_; }
     297             :       private:
     298             :         char16_t from_;
     299             :         char16_t to_;
     300             :         OutSet* out_set_;
     301             :     };
     302             : 
     303             :     void AddRange(LifoAlloc* alloc, CharacterRange range, int value);
     304             :     OutSet* Get(char16_t value);
     305             :     void Dump();
     306             : 
     307             :   private:
     308             :     // There can't be a static empty set since it allocates its
     309             :     // successors in a LifoAlloc and caches them.
     310             :     OutSet* empty() { return &empty_; }
     311             :     OutSet empty_;
     312             : };
     313             : 
     314             : class TextElement
     315             : {
     316             :   public:
     317             :     enum TextType {
     318             :         ATOM,
     319             :         CHAR_CLASS
     320             :     };
     321             : 
     322             :     static TextElement Atom(RegExpAtom* atom);
     323             :     static TextElement CharClass(RegExpCharacterClass* char_class);
     324             : 
     325        4337 :     int cp_offset() const { return cp_offset_; }
     326         212 :     void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
     327             :     int length() const;
     328             : 
     329        4293 :     TextType text_type() const { return text_type_; }
     330             : 
     331        1556 :     RegExpTree* tree() const { return tree_; }
     332             : 
     333        1266 :     RegExpAtom* atom() const {
     334        1266 :         MOZ_ASSERT(text_type() == ATOM);
     335        1266 :         return reinterpret_cast<RegExpAtom*>(tree());
     336             :     }
     337             : 
     338         290 :     RegExpCharacterClass* char_class() const {
     339         290 :         MOZ_ASSERT(text_type() == CHAR_CLASS);
     340         290 :         return reinterpret_cast<RegExpCharacterClass*>(tree());
     341             :     }
     342             : 
     343             :   private:
     344         287 :     TextElement(TextType text_type, RegExpTree* tree)
     345         287 :       : cp_offset_(-1), text_type_(text_type), tree_(tree)
     346         287 :     {}
     347             : 
     348             :     int cp_offset_;
     349             :     TextType text_type_;
     350             :     RegExpTree* tree_;
     351             : };
     352             : 
     353             : typedef InfallibleVector<TextElement, 1> TextElementVector;
     354             : 
     355             : class NodeVisitor;
     356             : class RegExpCompiler;
     357             : class Trace;
     358             : class BoyerMooreLookahead;
     359             : 
     360             : struct NodeInfo
     361             : {
     362         610 :     NodeInfo()
     363         610 :       : being_analyzed(false),
     364             :         been_analyzed(false),
     365             :         follows_word_interest(false),
     366             :         follows_newline_interest(false),
     367             :         follows_start_interest(false),
     368             :         at_end(false),
     369             :         visited(false),
     370         610 :         replacement_calculated(false)
     371         610 :     {}
     372             : 
     373             :     // Returns true if the interests and assumptions of this node
     374             :     // matches the given one.
     375             :     bool Matches(NodeInfo* that) {
     376             :         return (at_end == that->at_end) &&
     377             :             (follows_word_interest == that->follows_word_interest) &&
     378             :             (follows_newline_interest == that->follows_newline_interest) &&
     379             :             (follows_start_interest == that->follows_start_interest);
     380             :     }
     381             : 
     382             :     // Updates the interests of this node given the interests of the
     383             :     // node preceding it.
     384             :     void AddFromPreceding(NodeInfo* that) {
     385             :         at_end |= that->at_end;
     386             :         follows_word_interest |= that->follows_word_interest;
     387             :         follows_newline_interest |= that->follows_newline_interest;
     388             :         follows_start_interest |= that->follows_start_interest;
     389             :     }
     390             : 
     391             :     bool HasLookbehind() {
     392             :         return follows_word_interest ||
     393             :             follows_newline_interest ||
     394             :             follows_start_interest;
     395             :     }
     396             : 
     397             :     // Sets the interests of this node to include the interests of the
     398             :     // following node.
     399         392 :     void AddFromFollowing(NodeInfo* that) {
     400         392 :         follows_word_interest |= that->follows_word_interest;
     401         392 :         follows_newline_interest |= that->follows_newline_interest;
     402         392 :         follows_start_interest |= that->follows_start_interest;
     403         392 :     }
     404             : 
     405             :     void ResetCompilationState() {
     406             :         being_analyzed = false;
     407             :         been_analyzed = false;
     408             :     }
     409             : 
     410             :     bool being_analyzed: 1;
     411             :     bool been_analyzed: 1;
     412             : 
     413             :     // These bits are set of this node has to know what the preceding
     414             :     // character was.
     415             :     bool follows_word_interest: 1;
     416             :     bool follows_newline_interest: 1;
     417             :     bool follows_start_interest: 1;
     418             : 
     419             :     bool at_end: 1;
     420             :     bool visited: 1;
     421             :     bool replacement_calculated: 1;
     422             : };
     423             : 
     424             : // Details of a quick mask-compare check that can look ahead in the
     425             : // input stream.
     426             : class QuickCheckDetails
     427             : {
     428             :   public:
     429        1297 :     QuickCheckDetails()
     430        1297 :       : characters_(0),
     431             :         mask_(0),
     432             :         value_(0),
     433        1297 :         cannot_match_(false)
     434        1297 :     {}
     435             : 
     436          46 :     explicit QuickCheckDetails(int characters)
     437          46 :       : characters_(characters),
     438             :         mask_(0),
     439             :         value_(0),
     440          46 :         cannot_match_(false)
     441          46 :     {}
     442             : 
     443             :     bool Rationalize(bool latin1);
     444             : 
     445             :     // Merge in the information from another branch of an alternation.
     446             :     void Merge(QuickCheckDetails* other, int from_index);
     447             : 
     448             :     // Advance the current position by some amount.
     449             :     void Advance(int by);
     450             : 
     451             :     void Clear();
     452             : 
     453         262 :     bool cannot_match() { return cannot_match_; }
     454           0 :     void set_cannot_match() { cannot_match_ = true; }
     455             : 
     456        4488 :     int characters() { return characters_; }
     457         208 :     void set_characters(int characters) { characters_ = characters; }
     458             : 
     459             :     struct Position {
     460        5372 :         Position() : mask(0), value(0), determines_perfectly(false) { }
     461             :         char16_t mask;
     462             :         char16_t value;
     463             :         bool determines_perfectly;
     464             :     };
     465             : 
     466        1285 :     Position* positions(int index) {
     467        1285 :         MOZ_ASSERT(index >= 0);
     468        1285 :         MOZ_ASSERT(index < characters_);
     469        1285 :         return positions_ + index;
     470             :     }
     471             : 
     472         110 :     uint32_t mask() { return mask_; }
     473         110 :     uint32_t value() { return value_; }
     474             : 
     475             :   private:
     476             :     // How many characters do we have quick check information from.  This is
     477             :     // the same for all branches of a choice node.
     478             :     int characters_;
     479             :     Position positions_[4];
     480             : 
     481             :     // These values are the condensate of the above array after Rationalize().
     482             :     uint32_t mask_;
     483             :     uint32_t value_;
     484             : 
     485             :     // If set to true, there is no way this quick check can match at all.
     486             :     // E.g., if it requires to be at the start of the input, and isn't.
     487             :     bool cannot_match_;
     488             : };
     489             : 
     490             : class RegExpNode
     491             : {
     492             :   public:
     493             :     explicit RegExpNode(LifoAlloc* alloc);
     494           0 :     virtual ~RegExpNode() {}
     495             :     virtual void Accept(NodeVisitor* visitor) = 0;
     496             : 
     497             :     // Generates a goto to this node or actually generates the code at this point.
     498             :     virtual void Emit(RegExpCompiler* compiler, Trace* trace) = 0;
     499             : 
     500             :     // How many characters must this node consume at a minimum in order to
     501             :     // succeed.  If we have found at least 'still_to_find' characters that
     502             :     // must be consumed there is no need to ask any following nodes whether
     503             :     // they are sure to eat any more characters.  The not_at_start argument is
     504             :     // used to indicate that we know we are not at the start of the input.  In
     505             :     // this case anchored branches will always fail and can be ignored when
     506             :     // determining how many characters are consumed on success.
     507             :     virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start) = 0;
     508             : 
     509             :     // Emits some quick code that checks whether the preloaded characters match.
     510             :     // Falls through on certain failure, jumps to the label on possible success.
     511             :     // If the node cannot make a quick check it does nothing and returns false.
     512             :     bool EmitQuickCheck(RegExpCompiler* compiler,
     513             :                         Trace* trace,
     514             :                         bool preload_has_checked_bounds,
     515             :                         jit::Label* on_possible_success,
     516             :                         QuickCheckDetails* details_return,
     517             :                         bool fall_through_on_failure);
     518             : 
     519             :     // For a given number of characters this returns a mask and a value.  The
     520             :     // next n characters are anded with the mask and compared with the value.
     521             :     // A comparison failure indicates the node cannot match the next n characters.
     522             :     // A comparison success indicates the node may match.
     523             :     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     524             :                                       RegExpCompiler* compiler,
     525             :                                       int characters_filled_in,
     526             :                                       bool not_at_start) = 0;
     527             : 
     528             :     static const int kNodeIsTooComplexForGreedyLoops = -1;
     529             : 
     530          18 :     virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
     531             : 
     532             :     // Only returns the successor for a text node of length 1 that matches any
     533             :     // character and that has no guards on it.
     534          21 :     virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(RegExpCompiler* compiler) {
     535          21 :         return nullptr;
     536             :     }
     537             : 
     538             :     static const int kRecursionBudget = 200;
     539             : 
     540             :     // Collects information on the possible code units (mod 128) that can match if
     541             :     // we look forward.  This is used for a Boyer-Moore-like string searching
     542             :     // implementation.  TODO(erikcorry):  This should share more code with
     543             :     // EatsAtLeast, GetQuickCheckDetails.  The budget argument is used to limit
     544             :     // the number of nodes we are willing to look at in order to create this data.
     545           0 :     virtual bool FillInBMInfo(int offset,
     546             :                               int budget,
     547             :                               BoyerMooreLookahead* bm,
     548             :                               bool not_at_start) {
     549           0 :         MOZ_CRASH("Bad call");
     550             :     }
     551             : 
     552             :     // If we know that the input is ASCII then there are some nodes that can
     553             :     // never match.  This method returns a node that can be substituted for
     554             :     // itself, or nullptr if the node can never match.
     555          46 :     virtual RegExpNode* FilterLATIN1(int depth, bool ignore_case, bool unicode) { return this; }
     556             : 
     557             :     // Helper for FilterLATIN1.
     558         130 :     RegExpNode* replacement() {
     559         130 :         MOZ_ASSERT(info()->replacement_calculated);
     560         130 :         return replacement_;
     561             :     }
     562         401 :     RegExpNode* set_replacement(RegExpNode* replacement) {
     563         401 :         info()->replacement_calculated = true;
     564         401 :         replacement_ =  replacement;
     565         401 :         return replacement;  // For convenience.
     566             :     }
     567             : 
     568             :     // We want to avoid recalculating the lookahead info, so we store it on the
     569             :     // node.  Only info that is for this node is stored.  We can tell that the
     570             :     // info is for this node when offset == 0, so the information is calculated
     571             :     // relative to this node.
     572          32 :     void SaveBMInfo(BoyerMooreLookahead* bm, bool not_at_start, int offset) {
     573          32 :         if (offset == 0) set_bm_info(not_at_start, bm);
     574          32 :     }
     575             : 
     576         539 :     jit::Label* label() { return &label_; }
     577             : 
     578             :     // If non-generic code is generated for a node (i.e. the node is not at the
     579             :     // start of the trace) then it cannot be reused.  This variable sets a limit
     580             :     // on how often we allow that to happen before we insist on starting a new
     581             :     // trace and generating generic code for a node that can be reused by flushing
     582             :     // the deferred actions in the current trace and generating a goto.
     583             :     static const int kMaxCopiesCodeGenerated = 10;
     584             : 
     585        5746 :     NodeInfo* info() { return &info_; }
     586             : 
     587          20 :     BoyerMooreLookahead* bm_info(bool not_at_start) {
     588          20 :         return bm_info_[not_at_start ? 1 : 0];
     589             :     }
     590             : 
     591        1270 :     LifoAlloc* alloc() const { return alloc_; }
     592             : 
     593             :   protected:
     594             :     enum LimitResult { DONE, CONTINUE };
     595             :     RegExpNode* replacement_;
     596             : 
     597             :     LimitResult LimitVersions(RegExpCompiler* compiler, Trace* trace);
     598             : 
     599          42 :     void set_bm_info(bool not_at_start, BoyerMooreLookahead* bm) {
     600          42 :         bm_info_[not_at_start ? 1 : 0] = bm;
     601          42 :     }
     602             : 
     603             :   private:
     604             :     static const int kFirstCharBudget = 10;
     605             :     jit::Label label_;
     606             :     NodeInfo info_;
     607             : 
     608             :     // This variable keeps track of how many times code has been generated for
     609             :     // this node (in different traces).  We don't keep track of where the
     610             :     // generated code is located unless the code is generated at the start of
     611             :     // a trace, in which case it is generic and can be reused by flushing the
     612             :     // deferred operations in the current trace and generating a goto.
     613             :     int trace_count_;
     614             :     BoyerMooreLookahead* bm_info_[2];
     615             : 
     616             :     LifoAlloc* alloc_;
     617             : };
     618             : 
     619             : // A simple closed interval.
     620             : class Interval
     621             : {
     622             :   public:
     623         100 :     Interval() : from_(kNone), to_(kNone) { }
     624             : 
     625         115 :     Interval(int from, int to) : from_(from), to_(to) { }
     626             : 
     627          24 :     Interval Union(Interval that) {
     628          24 :         if (that.from_ == kNone)
     629          19 :             return *this;
     630           5 :         else if (from_ == kNone)
     631           4 :             return that;
     632             :         else
     633           1 :             return Interval(Min(from_, that.from_), Max(to_, that.to_));
     634             :     }
     635             : 
     636          24 :     bool Contains(int value) {
     637          24 :         return (from_ <= value) && (value <= to_);
     638             :     }
     639             : 
     640          83 :     bool is_empty() { return from_ == kNone; }
     641             : 
     642        1923 :     int from() const { return from_; }
     643         959 :     int to() const { return to_; }
     644             : 
     645         100 :     static Interval Empty() { return Interval(); }
     646             :     static const int kNone = -1;
     647             : 
     648             :   private:
     649             :     int from_;
     650             :     int to_;
     651             : };
     652             : 
     653           0 : class SeqRegExpNode : public RegExpNode
     654             : {
     655             :   public:
     656         437 :     explicit SeqRegExpNode(RegExpNode* on_success)
     657         437 :       : RegExpNode(on_success->alloc()), on_success_(on_success)
     658         437 :     {}
     659             : 
     660        1629 :     RegExpNode* on_success() { return on_success_; }
     661             :     void set_on_success(RegExpNode* node) { on_success_ = node; }
     662             :     virtual RegExpNode* FilterLATIN1(int depth, bool ignore_case, bool unicode);
     663             :     virtual bool FillInBMInfo(int offset,
     664             :                               int budget,
     665             :                               BoyerMooreLookahead* bm,
     666             :                               bool not_at_start);
     667             : 
     668             :   protected:
     669             :     RegExpNode* FilterSuccessor(int depth, bool ignore_case, bool unicode);
     670             : 
     671             :   private:
     672             :     RegExpNode* on_success_;
     673             : };
     674             : 
     675           0 : class ActionNode : public SeqRegExpNode
     676             : {
     677             :   public:
     678             :     enum ActionType {
     679             :         SET_REGISTER,
     680             :         INCREMENT_REGISTER,
     681             :         STORE_POSITION,
     682             :         BEGIN_SUBMATCH,
     683             :         POSITIVE_SUBMATCH_SUCCESS,
     684             :         EMPTY_MATCH_CHECK,
     685             :         CLEAR_CAPTURES
     686             :     };
     687             : 
     688         178 :     ActionNode(ActionType action_type, RegExpNode* on_success)
     689         178 :       : SeqRegExpNode(on_success),
     690         178 :         action_type_(action_type)
     691         178 :     {}
     692             : 
     693             :     static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
     694             :     static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
     695             :     static ActionNode* StorePosition(int reg,
     696             :                                      bool is_capture,
     697             :                                      RegExpNode* on_success);
     698             :     static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
     699             :     static ActionNode* BeginSubmatch(int stack_pointer_reg,
     700             :                                      int position_reg,
     701             :                                      RegExpNode* on_success);
     702             :     static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
     703             :                                                int restore_reg,
     704             :                                                int clear_capture_count,
     705             :                                                int clear_capture_from,
     706             :                                                RegExpNode* on_success);
     707             :     static ActionNode* EmptyMatchCheck(int start_register,
     708             :                                        int repetition_register,
     709             :                                        int repetition_limit,
     710             :                                        RegExpNode* on_success);
     711             :     virtual void Accept(NodeVisitor* visitor);
     712             :     virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     713             :     virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
     714         119 :     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     715             :                                       RegExpCompiler* compiler,
     716             :                                       int filled_in,
     717             :                                       bool not_at_start) {
     718         238 :         return on_success()->GetQuickCheckDetails(
     719         238 :                                                   details, compiler, filled_in, not_at_start);
     720             :     }
     721             :     virtual bool FillInBMInfo(int offset,
     722             :                               int budget,
     723             :                               BoyerMooreLookahead* bm,
     724             :                               bool not_at_start);
     725             :     ActionType action_type() { return action_type_; }
     726             :     // TODO(erikcorry): We should allow some action nodes in greedy loops.
     727          49 :     virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
     728             : 
     729             :   private:
     730             :     union {
     731             :         struct {
     732             :             int reg;
     733             :             int value;
     734             :         } u_store_register;
     735             :         struct {
     736             :             int reg;
     737             :         } u_increment_register;
     738             :         struct {
     739             :             int reg;
     740             :             bool is_capture;
     741             :         } u_position_register;
     742             :         struct {
     743             :             int stack_pointer_register;
     744             :             int current_position_register;
     745             :             int clear_register_count;
     746             :             int clear_register_from;
     747             :         } u_submatch;
     748             :         struct {
     749             :             int start_register;
     750             :             int repetition_register;
     751             :             int repetition_limit;
     752             :         } u_empty_match_check;
     753             :         struct {
     754             :             int range_from;
     755             :             int range_to;
     756             :         } u_clear_captures;
     757             :     } data_;
     758             :     ActionType action_type_;
     759             :     friend class DotPrinter;
     760             : };
     761             : 
     762           0 : class TextNode : public SeqRegExpNode
     763             : {
     764             :   public:
     765         125 :     TextNode(TextElementVector* elements,
     766             :              RegExpNode* on_success)
     767         125 :       : SeqRegExpNode(on_success),
     768         125 :         elements_(elements)
     769         125 :     {}
     770             : 
     771          90 :     TextNode(RegExpCharacterClass* that,
     772             :              RegExpNode* on_success)
     773          90 :       : SeqRegExpNode(on_success),
     774          90 :         elements_(alloc()->newInfallible<TextElementVector>(*alloc()))
     775             :     {
     776          90 :         elements_->append(TextElement::CharClass(that));
     777          90 :     }
     778             : 
     779             :     virtual void Accept(NodeVisitor* visitor);
     780             :     virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     781             :     virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
     782             :     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     783             :                                       RegExpCompiler* compiler,
     784             :                                       int characters_filled_in,
     785             :                                       bool not_at_start);
     786        4552 :     TextElementVector& elements() { return *elements_; }
     787             :     void MakeCaseIndependent(bool is_latin1, bool unicode);
     788             :     virtual int GreedyLoopTextLength();
     789             :     virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
     790             :                                                          RegExpCompiler* compiler);
     791             :     virtual bool FillInBMInfo(int offset,
     792             :                               int budget,
     793             :                               BoyerMooreLookahead* bm,
     794             :                               bool not_at_start);
     795             :     void CalculateOffsets();
     796             :     virtual RegExpNode* FilterLATIN1(int depth, bool ignore_case, bool unicode);
     797             : 
     798             :   private:
     799             :     enum TextEmitPassType {
     800             :         NON_LATIN1_MATCH,             // Check for characters that can't match.
     801             :         SIMPLE_CHARACTER_MATCH,      // Case-dependent single character check.
     802             :         CASE_SINGLE_CHARACTER_MATCH, // Case-independent single character check.
     803             :         CASE_MUTLI_CHARACTER_MATCH,  // Case-independent single character with
     804             :                                      // multiple variation.
     805             :         CHARACTER_CLASS_MATCH        // Character class.
     806             :     };
     807             :     static bool SkipPass(int pass, bool ignore_case);
     808             :     static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
     809             :     static const int kLastPass = CHARACTER_CLASS_MATCH;
     810             :     void TextEmitPass(RegExpCompiler* compiler,
     811             :                       TextEmitPassType pass,
     812             :                       bool preloaded,
     813             :                       Trace* trace,
     814             :                       bool first_element_checked,
     815             :                       int* checked_up_to);
     816             :     int Length();
     817             :     TextElementVector* elements_;
     818             : };
     819             : 
     820           0 : class AssertionNode : public SeqRegExpNode
     821             : {
     822             :   public:
     823             :     enum AssertionType {
     824             :         AT_END,
     825             :         AT_START,
     826             :         AT_BOUNDARY,
     827             :         AT_NON_BOUNDARY,
     828             :         AFTER_NEWLINE,
     829             :         NOT_AFTER_LEAD_SURROGATE,
     830             :         NOT_IN_SURROGATE_PAIR
     831             :     };
     832          44 :     AssertionNode(AssertionType t, RegExpNode* on_success)
     833          44 :       : SeqRegExpNode(on_success), assertion_type_(t)
     834          44 :     {}
     835             : 
     836          21 :     static AssertionNode* AtEnd(RegExpNode* on_success) {
     837          21 :         return on_success->alloc()->newInfallible<AssertionNode>(AT_END, on_success);
     838             :     }
     839          21 :     static AssertionNode* AtStart(RegExpNode* on_success) {
     840          21 :         return on_success->alloc()->newInfallible<AssertionNode>(AT_START, on_success);
     841             :     }
     842           2 :     static AssertionNode* AtBoundary(RegExpNode* on_success) {
     843           2 :         return on_success->alloc()->newInfallible<AssertionNode>(AT_BOUNDARY, on_success);
     844             :     }
     845           0 :     static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
     846           0 :         return on_success->alloc()->newInfallible<AssertionNode>(AT_NON_BOUNDARY, on_success);
     847             :     }
     848           0 :     static AssertionNode* AfterNewline(RegExpNode* on_success) {
     849           0 :         return on_success->alloc()->newInfallible<AssertionNode>(AFTER_NEWLINE, on_success);
     850             :     }
     851           0 :     static AssertionNode* NotAfterLeadSurrogate(RegExpNode* on_success) {
     852           0 :         return on_success->alloc()->newInfallible<AssertionNode>(NOT_AFTER_LEAD_SURROGATE,
     853           0 :                                                                  on_success);
     854             :     }
     855           0 :     static AssertionNode* NotInSurrogatePair(RegExpNode* on_success) {
     856           0 :         return on_success->alloc()->newInfallible<AssertionNode>(NOT_IN_SURROGATE_PAIR,
     857           0 :                                                                  on_success);
     858             :     }
     859             :     virtual void Accept(NodeVisitor* visitor);
     860             :     virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     861             :     virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
     862             :     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     863             :                                       RegExpCompiler* compiler,
     864             :                                       int filled_in,
     865             :                                       bool not_at_start);
     866             :     virtual bool FillInBMInfo(int offset,
     867             :                               int budget,
     868             :                               BoyerMooreLookahead* bm,
     869             :                               bool not_at_start);
     870          50 :     AssertionType assertion_type() { return assertion_type_; }
     871             : 
     872             :   private:
     873             :     void EmitBoundaryCheck(RegExpCompiler* compiler, Trace* trace);
     874             :     enum IfPrevious { kIsNonWord, kIsWord };
     875             :     void BacktrackIfPrevious(RegExpCompiler* compiler,
     876             :                              Trace* trace,
     877             :                              IfPrevious backtrack_if_previous);
     878             :     AssertionType assertion_type_;
     879             : };
     880             : 
     881           0 : class BackReferenceNode : public SeqRegExpNode
     882             : {
     883             :   public:
     884           0 :     BackReferenceNode(int start_reg,
     885             :                       int end_reg,
     886             :                       RegExpNode* on_success)
     887           0 :       : SeqRegExpNode(on_success),
     888             :         start_reg_(start_reg),
     889           0 :         end_reg_(end_reg)
     890           0 :     {}
     891             : 
     892             :     virtual void Accept(NodeVisitor* visitor);
     893             :     int start_register() { return start_reg_; }
     894             :     int end_register() { return end_reg_; }
     895             :     virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     896             :     virtual int EatsAtLeast(int still_to_find,
     897             :                             int recursion_depth,
     898             :                             bool not_at_start);
     899           0 :     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     900             :                                       RegExpCompiler* compiler,
     901             :                                       int characters_filled_in,
     902             :                                       bool not_at_start) {
     903           0 :         return;
     904             :     }
     905             :     virtual bool FillInBMInfo(int offset,
     906             :                               int budget,
     907             :                               BoyerMooreLookahead* bm,
     908             :                               bool not_at_start);
     909             : 
     910             :   private:
     911             :     int start_reg_;
     912             :     int end_reg_;
     913             : };
     914             : 
     915           0 : class EndNode : public RegExpNode
     916             : {
     917             :   public:
     918             :     enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
     919             : 
     920          43 :     explicit EndNode(LifoAlloc* alloc, Action action)
     921          43 :       : RegExpNode(alloc), action_(action)
     922          43 :     {}
     923             : 
     924             :     virtual void Accept(NodeVisitor* visitor);
     925             :     virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     926         114 :     virtual int EatsAtLeast(int still_to_find,
     927             :                             int recursion_depth,
     928         114 :                             bool not_at_start) { return 0; }
     929           0 :     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
     930             :                                       RegExpCompiler* compiler,
     931             :                                       int characters_filled_in,
     932             :                                       bool not_at_start)
     933             :     {
     934             :         // Returning 0 from EatsAtLeast should ensure we never get here.
     935           0 :         MOZ_CRASH("Bad call");
     936             :     }
     937           0 :     virtual bool FillInBMInfo(int offset,
     938             :                               int budget,
     939             :                               BoyerMooreLookahead* bm,
     940             :                               bool not_at_start) {
     941             :         // Returning 0 from EatsAtLeast should ensure we never get here.
     942           0 :         MOZ_CRASH("Bad call");
     943             :     }
     944             : 
     945             :   private:
     946             :     Action action_;
     947             : };
     948             : 
     949           0 : class NegativeSubmatchSuccess : public EndNode
     950             : {
     951             :   public:
     952           2 :     NegativeSubmatchSuccess(LifoAlloc* alloc,
     953             :                             int stack_pointer_reg,
     954             :                             int position_reg,
     955             :                             int clear_capture_count,
     956             :                             int clear_capture_start)
     957           2 :       : EndNode(alloc, NEGATIVE_SUBMATCH_SUCCESS),
     958             :         stack_pointer_register_(stack_pointer_reg),
     959             :         current_position_register_(position_reg),
     960             :         clear_capture_count_(clear_capture_count),
     961           2 :         clear_capture_start_(clear_capture_start)
     962           2 :     {}
     963             : 
     964             :     virtual void Emit(RegExpCompiler* compiler, Trace* trace);
     965             : 
     966             :   private:
     967             :     int stack_pointer_register_;
     968             :     int current_position_register_;
     969             :     int clear_capture_count_;
     970             :     int clear_capture_start_;
     971             : };
     972             : 
     973             : class Guard
     974             : {
     975             :   public:
     976             :     enum Relation { LT, GEQ };
     977          16 :     Guard(int reg, Relation op, int value)
     978          16 :         : reg_(reg),
     979             :           op_(op),
     980          16 :           value_(value)
     981          16 :     {}
     982             : 
     983          56 :     int reg() { return reg_; }
     984          16 :     Relation op() { return op_; }
     985          16 :     int value() { return value_; }
     986             : 
     987             :   private:
     988             :     int reg_;
     989             :     Relation op_;
     990             :     int value_;
     991             : };
     992             : 
     993             : typedef InfallibleVector<Guard*, 1> GuardVector;
     994             : 
     995             : class GuardedAlternative
     996             : {
     997             :   public:
     998         221 :     explicit GuardedAlternative(RegExpNode* node)
     999         221 :       : node_(node), guards_(nullptr)
    1000         221 :     {}
    1001             : 
    1002             :     void AddGuard(LifoAlloc* alloc, Guard* guard);
    1003        1642 :     RegExpNode* node() const { return node_; }
    1004         169 :     void set_node(RegExpNode* node) { node_ = node; }
    1005         740 :     const GuardVector* guards() const { return guards_; }
    1006             : 
    1007             :   private:
    1008             :     RegExpNode* node_;
    1009             :     GuardVector* guards_;
    1010             : };
    1011             : 
    1012             : typedef InfallibleVector<GuardedAlternative, 0> GuardedAlternativeVector;
    1013             : 
    1014             : class AlternativeGeneration;
    1015             : 
    1016           0 : class ChoiceNode : public RegExpNode
    1017             : {
    1018             :   public:
    1019          86 :     explicit ChoiceNode(LifoAlloc* alloc, int expected_size)
    1020          86 :       : RegExpNode(alloc),
    1021             :         alternatives_(*alloc),
    1022             :         table_(nullptr),
    1023             :         not_at_start_(false),
    1024          86 :         being_calculated_(false)
    1025             :     {
    1026          86 :         alternatives_.reserve(expected_size);
    1027          86 :     }
    1028             : 
    1029             :     virtual void Accept(NodeVisitor* visitor);
    1030         221 :     void AddAlternative(GuardedAlternative node) {
    1031         221 :         alternatives_.append(node);
    1032         221 :     }
    1033             : 
    1034        2699 :     GuardedAlternativeVector& alternatives() { return alternatives_; }
    1035             :     DispatchTable* GetTable(bool ignore_case);
    1036             :     virtual void Emit(RegExpCompiler* compiler, Trace* trace);
    1037             :     virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
    1038             :     int EatsAtLeastHelper(int still_to_find,
    1039             :                           int budget,
    1040             :                           RegExpNode* ignore_this_node,
    1041             :                           bool not_at_start);
    1042             :     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
    1043             :                                       RegExpCompiler* compiler,
    1044             :                                       int characters_filled_in,
    1045             :                                       bool not_at_start);
    1046             :     virtual bool FillInBMInfo(int offset,
    1047             :                               int budget,
    1048             :                               BoyerMooreLookahead* bm,
    1049             :                               bool not_at_start);
    1050             : 
    1051             :     bool being_calculated() { return being_calculated_; }
    1052         119 :     bool not_at_start() { return not_at_start_; }
    1053          22 :     void set_not_at_start() { not_at_start_ = true; }
    1054             :     void set_being_calculated(bool b) { being_calculated_ = b; }
    1055         204 :     virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
    1056             :     virtual RegExpNode* FilterLATIN1(int depth, bool ignore_case, bool unicode);
    1057             : 
    1058             :   protected:
    1059             :     int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
    1060             :     GuardedAlternativeVector alternatives_;
    1061             : 
    1062             :   private:
    1063             :     friend class Analysis;
    1064             :     void GenerateGuard(RegExpMacroAssembler* macro_assembler,
    1065             :                        Guard* guard,
    1066             :                        Trace* trace);
    1067             :     int CalculatePreloadCharacters(RegExpCompiler* compiler, int eats_at_least);
    1068             :     void EmitOutOfLineContinuation(RegExpCompiler* compiler,
    1069             :                                    Trace* trace,
    1070             :                                    GuardedAlternative alternative,
    1071             :                                    AlternativeGeneration* alt_gen,
    1072             :                                    int preload_characters,
    1073             :                                    bool next_expects_preload);
    1074             :     DispatchTable* table_;
    1075             : 
    1076             :     // If true, this node is never checked at the start of the input.
    1077             :     // Allows a new trace to start with at_start() set to false.
    1078             :     bool not_at_start_;
    1079             :     bool being_calculated_;
    1080             : };
    1081             : 
    1082           0 : class NegativeLookaheadChoiceNode : public ChoiceNode
    1083             : {
    1084             :   public:
    1085           2 :     explicit NegativeLookaheadChoiceNode(LifoAlloc* alloc,
    1086             :                                          GuardedAlternative this_must_fail,
    1087             :                                          GuardedAlternative then_do_this)
    1088           2 :       : ChoiceNode(alloc, 2)
    1089             :     {
    1090           2 :         AddAlternative(this_must_fail);
    1091           2 :         AddAlternative(then_do_this);
    1092           2 :     }
    1093             :     virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
    1094             :     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
    1095             :                                       RegExpCompiler* compiler,
    1096             :                                       int characters_filled_in,
    1097             :                                       bool not_at_start);
    1098             :     virtual bool FillInBMInfo(int offset,
    1099             :                               int budget,
    1100             :                               BoyerMooreLookahead* bm,
    1101             :                               bool not_at_start);
    1102             : 
    1103             :     // For a negative lookahead we don't emit the quick check for the
    1104             :     // alternative that is expected to fail.  This is because quick check code
    1105             :     // starts by loading enough characters for the alternative that takes fewest
    1106             :     // characters, but on a negative lookahead the negative branch did not take
    1107             :     // part in that calculation (EatsAtLeast) so the assumptions don't hold.
    1108           4 :     virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
    1109             :     virtual RegExpNode* FilterLATIN1(int depth, bool ignore_case, bool unicode);
    1110             : };
    1111             : 
    1112           0 : class LoopChoiceNode : public ChoiceNode
    1113             : {
    1114             :   public:
    1115          57 :     explicit LoopChoiceNode(LifoAlloc* alloc, bool body_can_be_zero_length)
    1116          57 :       : ChoiceNode(alloc, 2),
    1117             :         loop_node_(nullptr),
    1118             :         continue_node_(nullptr),
    1119          57 :         body_can_be_zero_length_(body_can_be_zero_length)
    1120          57 :     {}
    1121             : 
    1122             :     void AddLoopAlternative(GuardedAlternative alt);
    1123             :     void AddContinueAlternative(GuardedAlternative alt);
    1124             :     virtual void Emit(RegExpCompiler* compiler, Trace* trace);
    1125             :     virtual int EatsAtLeast(int still_to_find,  int budget, bool not_at_start);
    1126             :     virtual void GetQuickCheckDetails(QuickCheckDetails* details,
    1127             :                                       RegExpCompiler* compiler,
    1128             :                                       int characters_filled_in,
    1129             :                                       bool not_at_start);
    1130             :     virtual bool FillInBMInfo(int offset,
    1131             :                               int budget,
    1132             :                               BoyerMooreLookahead* bm,
    1133             :                               bool not_at_start);
    1134         224 :     RegExpNode* loop_node() { return loop_node_; }
    1135             :     RegExpNode* continue_node() { return continue_node_; }
    1136             :     bool body_can_be_zero_length() { return body_can_be_zero_length_; }
    1137             :     virtual void Accept(NodeVisitor* visitor);
    1138             :     virtual RegExpNode* FilterLATIN1(int depth, bool ignore_case, bool unicode);
    1139             : 
    1140             :   private:
    1141             :     // AddAlternative is made private for loop nodes because alternatives
    1142             :     // should not be added freely, we need to keep track of which node
    1143             :     // goes back to the node itself.
    1144         114 :     void AddAlternative(GuardedAlternative node) {
    1145         114 :         ChoiceNode::AddAlternative(node);
    1146         114 :     }
    1147             : 
    1148             :     RegExpNode* loop_node_;
    1149             :     RegExpNode* continue_node_;
    1150             :     bool body_can_be_zero_length_;
    1151             : };
    1152             : 
    1153             : // Improve the speed that we scan for an initial point where a non-anchored
    1154             : // regexp can match by using a Boyer-Moore-like table. This is done by
    1155             : // identifying non-greedy non-capturing loops in the nodes that eat any
    1156             : // character one at a time.  For example in the middle of the regexp
    1157             : // /foo[\s\S]*?bar/ we find such a loop.  There is also such a loop implicitly
    1158             : // inserted at the start of any non-anchored regexp.
    1159             : //
    1160             : // When we have found such a loop we look ahead in the nodes to find the set of
    1161             : // characters that can come at given distances. For example for the regexp
    1162             : // /.?foo/ we know that there are at least 3 characters ahead of us, and the
    1163             : // sets of characters that can occur are [any, [f, o], [o]]. We find a range in
    1164             : // the lookahead info where the set of characters is reasonably constrained. In
    1165             : // our example this is from index 1 to 2 (0 is not constrained). We can now
    1166             : // look 3 characters ahead and if we don't find one of [f, o] (the union of
    1167             : // [f, o] and [o]) then we can skip forwards by the range size (in this case 2).
    1168             : //
    1169             : // For Unicode input strings we do the same, but modulo 128.
    1170             : //
    1171             : // We also look at the first string fed to the regexp and use that to get a hint
    1172             : // of the character frequencies in the inputs. This affects the assessment of
    1173             : // whether the set of characters is 'reasonably constrained'.
    1174             : //
    1175             : // We also have another lookahead mechanism (called quick check in the code),
    1176             : // which uses a wide load of multiple characters followed by a mask and compare
    1177             : // to determine whether a match is possible at this point.
    1178             : enum ContainedInLattice {
    1179             :   kNotYet = 0,
    1180             :   kLatticeIn = 1,
    1181             :   kLatticeOut = 2,
    1182             :   kLatticeUnknown = 3  // Can also mean both in and out.
    1183             : };
    1184             : 
    1185             : inline ContainedInLattice
    1186         392 : Combine(ContainedInLattice a, ContainedInLattice b) {
    1187         392 :     return static_cast<ContainedInLattice>(a | b);
    1188             : }
    1189             : 
    1190             : ContainedInLattice
    1191             : AddRange(ContainedInLattice a,
    1192             :          const int* ranges,
    1193             :          int ranges_size,
    1194             :          Interval new_range);
    1195             : 
    1196             : class BoyerMoorePositionInfo
    1197             : {
    1198             :   public:
    1199          48 :     explicit BoyerMoorePositionInfo(LifoAlloc* alloc, bool unicode_ignore_case)
    1200          48 :       : map_(*alloc),
    1201             :         map_count_(0),
    1202             :         w_(kNotYet),
    1203             :         s_(kNotYet),
    1204             :         d_(kNotYet),
    1205             :         surrogate_(kNotYet),
    1206          48 :         unicode_ignore_case_(unicode_ignore_case)
    1207             :     {
    1208          48 :         map_.reserve(kMapSize);
    1209        6192 :         for (int i = 0; i < kMapSize; i++)
    1210        6144 :             map_.append(false);
    1211          48 :     }
    1212             : 
    1213       19435 :     bool& at(int i) { return map_[i]; }
    1214             : 
    1215             :     static const int kMapSize = 128;
    1216             :     static const int kMask = kMapSize - 1;
    1217             : 
    1218         213 :     int map_count() const { return map_count_; }
    1219             : 
    1220             :     void Set(int character);
    1221             :     void SetInterval(const Interval& interval);
    1222             :     void SetAll();
    1223           2 :     bool is_non_word() { return w_ == kLatticeOut; }
    1224           2 :     bool is_word() { return w_ == kLatticeIn; }
    1225             : 
    1226             :   private:
    1227             :     InfallibleVector<bool, 0> map_;
    1228             :     int map_count_;  // Number of set bits in the map.
    1229             :     ContainedInLattice w_;  // The \w character class.
    1230             :     ContainedInLattice s_;  // The \s character class.
    1231             :     ContainedInLattice d_;  // The \d character class.
    1232             :     ContainedInLattice surrogate_;  // Surrogate UTF-16 code units.
    1233             : 
    1234             :     // True if the RegExp has unicode and ignoreCase flags.
    1235             :     bool unicode_ignore_case_;
    1236             : };
    1237             : 
    1238             : typedef InfallibleVector<BoyerMoorePositionInfo*, 1> BoyerMoorePositionInfoVector;
    1239             : 
    1240             : class BoyerMooreLookahead
    1241             : {
    1242             :   public:
    1243             :     BoyerMooreLookahead(LifoAlloc* alloc, size_t length, RegExpCompiler* compiler);
    1244             : 
    1245         169 :     int length() { return length_; }
    1246          68 :     int max_char() { return max_char_; }
    1247         153 :     RegExpCompiler* compiler() { return compiler_; }
    1248             : 
    1249         192 :     int Count(int map_number) {
    1250         192 :         return bitmaps_[map_number]->map_count();
    1251             :     }
    1252             : 
    1253           4 :     BoyerMoorePositionInfo* at(int i) { return bitmaps_[i]; }
    1254             : 
    1255          66 :     void Set(int map_number, int character) {
    1256          66 :         if (character > max_char_) return;
    1257          66 :         BoyerMoorePositionInfo* info = bitmaps_[map_number];
    1258          66 :         info->Set(character);
    1259             :     }
    1260             : 
    1261          35 :     void SetInterval(int map_number, const Interval& interval) {
    1262          35 :         if (interval.from() > max_char_) return;
    1263          35 :         BoyerMoorePositionInfo* info = bitmaps_[map_number];
    1264          35 :         if (interval.to() > max_char_) {
    1265           0 :             info->SetInterval(Interval(interval.from(), max_char_));
    1266             :         } else {
    1267          35 :             info->SetInterval(interval);
    1268             :         }
    1269             :     }
    1270             : 
    1271           2 :     void SetAll(int map_number) {
    1272           2 :         bitmaps_[map_number]->SetAll();
    1273           2 :     }
    1274             : 
    1275           0 :     void SetRest(int from_map) {
    1276           0 :         for (int i = from_map; i < length_; i++) SetAll(i);
    1277           0 :     }
    1278             :     bool EmitSkipInstructions(RegExpMacroAssembler* masm);
    1279             : 
    1280             :     bool CheckOverRecursed();
    1281             : 
    1282             :   private:
    1283             :     // This is the value obtained by EatsAtLeast.  If we do not have at least this
    1284             :     // many characters left in the sample string then the match is bound to fail.
    1285             :     // Therefore it is OK to read a character this far ahead of the current match
    1286             :     // point.
    1287             :     int length_;
    1288             :     RegExpCompiler* compiler_;
    1289             : 
    1290             :     // 0xff for LATIN1, 0xffff for UTF-16.
    1291             :     int max_char_;
    1292             :     BoyerMoorePositionInfoVector bitmaps_;
    1293             : 
    1294             :     int GetSkipTable(int min_lookahead,
    1295             :                      int max_lookahead,
    1296             :                      uint8_t* boolean_skip_table);
    1297             :     bool FindWorthwhileInterval(int* from, int* to);
    1298             :     int FindBestInterval(int max_number_of_chars, int old_biggest_points, int* from, int* to);
    1299             : };
    1300             : 
    1301             : // There are many ways to generate code for a node.  This class encapsulates
    1302             : // the current way we should be generating.  In other words it encapsulates
    1303             : // the current state of the code generator.  The effect of this is that we
    1304             : // generate code for paths that the matcher can take through the regular
    1305             : // expression.  A given node in the regexp can be code-generated several times
    1306             : // as it can be part of several traces.  For example for the regexp:
    1307             : // /foo(bar|ip)baz/ the code to match baz will be generated twice, once as part
    1308             : // of the foo-bar-baz trace and once as part of the foo-ip-baz trace.  The code
    1309             : // to match foo is generated only once (the traces have a common prefix).  The
    1310             : // code to store the capture is deferred and generated (twice) after the places
    1311             : // where baz has been matched.
    1312             : class Trace
    1313             : {
    1314             :   public:
    1315             :     // A value for a property that is either known to be true, know to be false,
    1316             :     // or not known.
    1317             :     enum TriBool {
    1318             :         UNKNOWN = -1, FALSE_VALUE = 0, TRUE_VALUE = 1
    1319             :     };
    1320             : 
    1321             :     class DeferredAction {
    1322             :       public:
    1323         194 :         DeferredAction(ActionNode::ActionType action_type, int reg)
    1324         194 :           : action_type_(action_type), reg_(reg), next_(nullptr)
    1325         194 :         {}
    1326             : 
    1327         862 :         DeferredAction* next() { return next_; }
    1328             :         bool Mentions(int reg);
    1329        1265 :         int reg() { return reg_; }
    1330        1147 :         ActionNode::ActionType action_type() { return action_type_; }
    1331             :       private:
    1332             :         ActionNode::ActionType action_type_;
    1333             :         int reg_;
    1334             :         DeferredAction* next_;
    1335             :         friend class Trace;
    1336             :     };
    1337             : 
    1338             :     class DeferredCapture : public DeferredAction {
    1339             :       public:
    1340         164 :         DeferredCapture(int reg, bool is_capture, Trace* trace)
    1341         164 :           : DeferredAction(ActionNode::STORE_POSITION, reg),
    1342         164 :             cp_offset_(trace->cp_offset()),
    1343         328 :             is_capture_(is_capture)
    1344         164 :         {}
    1345             : 
    1346         241 :         int cp_offset() { return cp_offset_; }
    1347          79 :         bool is_capture() { return is_capture_; }
    1348             :       private:
    1349             :         int cp_offset_;
    1350             :         bool is_capture_;
    1351             :         void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
    1352             :     };
    1353             : 
    1354             :     class DeferredSetRegister : public DeferredAction {
    1355             :       public:
    1356          11 :         DeferredSetRegister(int reg, int value)
    1357          11 :           : DeferredAction(ActionNode::SET_REGISTER, reg),
    1358          11 :             value_(value)
    1359          11 :         {}
    1360          11 :         int value() { return value_; }
    1361             :       private:
    1362             :         int value_;
    1363             :     };
    1364             : 
    1365             :     class DeferredClearCaptures : public DeferredAction {
    1366             :       public:
    1367           6 :         explicit DeferredClearCaptures(Interval range)
    1368           6 :           : DeferredAction(ActionNode::CLEAR_CAPTURES, -1),
    1369           6 :             range_(range)
    1370           6 :         {}
    1371             : 
    1372          32 :         Interval range() { return range_; }
    1373             :       private:
    1374             :         Interval range_;
    1375             :     };
    1376             : 
    1377             :     class DeferredIncrementRegister : public DeferredAction {
    1378             :       public:
    1379          13 :         explicit DeferredIncrementRegister(int reg)
    1380          13 :           : DeferredAction(ActionNode::INCREMENT_REGISTER, reg)
    1381          13 :         {}
    1382             :     };
    1383             : 
    1384         355 :     Trace()
    1385         355 :       : cp_offset_(0),
    1386             :         actions_(nullptr),
    1387             :         backtrack_(nullptr),
    1388             :         stop_node_(nullptr),
    1389             :         loop_label_(nullptr),
    1390             :         characters_preloaded_(0),
    1391             :         bound_checked_up_to_(0),
    1392             :         flush_budget_(100),
    1393         355 :         at_start_(UNKNOWN)
    1394         355 :     {}
    1395             : 
    1396             :     // End the trace.  This involves flushing the deferred actions in the trace
    1397             :     // and pushing a backtrack location onto the backtrack stack.  Once this is
    1398             :     // done we can start a new trace or go to one that has already been
    1399             :     // generated.
    1400             :     void Flush(RegExpCompiler* compiler, RegExpNode* successor);
    1401             : 
    1402        1586 :     int cp_offset() { return cp_offset_; }
    1403         249 :     DeferredAction* actions() { return actions_; }
    1404             : 
    1405             :     // A trivial trace is one that has no deferred actions or other state that
    1406             :     // affects the assumptions used when generating code.  There is no recorded
    1407             :     // backtrack location in a trivial trace, so with a trivial trace we will
    1408             :     // generate code that, on a failure to match, gets the backtrack location
    1409             :     // from the backtrack stack rather than using a direct jump instruction.  We
    1410             :     // always start code generation with a trivial trace and non-trivial traces
    1411             :     // are created as we emit code for nodes or add to the list of deferred
    1412             :     // actions in the trace.  The location of the code generated for a node using
    1413             :     // a trivial trace is recorded in a label in the node so that gotos can be
    1414             :     // generated to that code.
    1415        1215 :     bool is_trivial() {
    1416        1862 :         return backtrack_ == nullptr &&
    1417        1155 :             actions_ == nullptr &&
    1418         942 :             cp_offset_ == 0 &&
    1419         859 :             characters_preloaded_ == 0 &&
    1420         835 :             bound_checked_up_to_ == 0 &&
    1421        2035 :             quick_check_performed_.characters() == 0 &&
    1422        1625 :             at_start_ == UNKNOWN;
    1423             :     }
    1424             : 
    1425         270 :     TriBool at_start() { return at_start_; }
    1426         319 :     void set_at_start(bool at_start) {
    1427         319 :         at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE;
    1428         319 :     }
    1429        1474 :     jit::Label* backtrack() { return backtrack_; }
    1430          26 :     jit::Label* loop_label() { return loop_label_; }
    1431        1060 :     RegExpNode* stop_node() { return stop_node_; }
    1432         425 :     int characters_preloaded() { return characters_preloaded_; }
    1433         232 :     int bound_checked_up_to() { return bound_checked_up_to_; }
    1434         186 :     int flush_budget() { return flush_budget_; }
    1435         993 :     QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
    1436             :     bool mentions_reg(int reg);
    1437             : 
    1438             :     // Returns true if a deferred position store exists to the specified
    1439             :     // register and stores the offset in the out-parameter.  Otherwise
    1440             :     // returns false.
    1441             :     bool GetStoredPosition(int reg, int* cp_offset);
    1442             : 
    1443             :     // These set methods and AdvanceCurrentPositionInTrace should be used only on
    1444             :     // new traces - the intention is that traces are immutable after creation.
    1445         194 :     void add_action(DeferredAction* new_action) {
    1446         194 :         MOZ_ASSERT(new_action->next_ == nullptr);
    1447         194 :         new_action->next_ = actions_;
    1448         194 :         actions_ = new_action;
    1449         194 :     }
    1450             : 
    1451         234 :     void set_backtrack(jit::Label* backtrack) { backtrack_ = backtrack; }
    1452          26 :     void set_stop_node(RegExpNode* node) { stop_node_ = node; }
    1453          26 :     void set_loop_label(jit::Label* label) { loop_label_ = label; }
    1454         318 :     void set_characters_preloaded(int count) { characters_preloaded_ = count; }
    1455         186 :     void set_bound_checked_up_to(int to) { bound_checked_up_to_ = to; }
    1456         101 :     void set_flush_budget(int to) { flush_budget_ = to; }
    1457         110 :     void set_quick_check_performed(QuickCheckDetails* d) {
    1458         110 :         quick_check_performed_ = *d;
    1459         110 :     }
    1460             :     void InvalidateCurrentCharacter();
    1461             :     void AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler);
    1462             : 
    1463             :   private:
    1464             :     int FindAffectedRegisters(LifoAlloc* alloc, OutSet* affected_registers);
    1465             :     void PerformDeferredActions(LifoAlloc* alloc,
    1466             :                                 RegExpMacroAssembler* macro,
    1467             :                                 int max_register,
    1468             :                                 OutSet& affected_registers,
    1469             :                                 OutSet* registers_to_pop,
    1470             :                                 OutSet* registers_to_clear);
    1471             :     void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
    1472             :                                   int max_register,
    1473             :                                   OutSet& registers_to_pop,
    1474             :                                   OutSet& registers_to_clear);
    1475             :     int cp_offset_;
    1476             :     DeferredAction* actions_;
    1477             :     jit::Label* backtrack_;
    1478             :     RegExpNode* stop_node_;
    1479             :     jit::Label* loop_label_;
    1480             :     int characters_preloaded_;
    1481             :     int bound_checked_up_to_;
    1482             :     QuickCheckDetails quick_check_performed_;
    1483             :     int flush_budget_;
    1484             :     TriBool at_start_;
    1485             : };
    1486             : 
    1487          40 : class NodeVisitor
    1488             : {
    1489             :   public:
    1490          40 :     virtual ~NodeVisitor() { }
    1491             : #define DECLARE_VISIT(Type)                                          \
    1492             :     virtual void Visit##Type(Type##Node* that) = 0;
    1493             :     FOR_EACH_NODE_TYPE(DECLARE_VISIT)
    1494             : #undef DECLARE_VISIT
    1495           0 :     virtual void VisitLoopChoice(LoopChoiceNode* that) { VisitChoice(that); }
    1496             : };
    1497             : 
    1498             : // Assertion propagation moves information about assertions such as
    1499             : // \b to the affected nodes.  For instance, in /.\b./ information must
    1500             : // be propagated to the first '.' that whatever follows needs to know
    1501             : // if it matched a word or a non-word, and to the second '.' that it
    1502             : // has to check if it succeeds a word or non-word.  In this case the
    1503             : // result will be something like:
    1504             : //
    1505             : //   +-------+        +------------+
    1506             : //   |   .   |        |      .     |
    1507             : //   +-------+  --->  +------------+
    1508             : //   | word? |        | check word |
    1509             : //   +-------+        +------------+
    1510          40 : class Analysis : public NodeVisitor
    1511             : {
    1512             :   public:
    1513          40 :     Analysis(JSContext* cx, bool ignore_case, bool is_latin1, bool unicode)
    1514          40 :       : cx(cx),
    1515             :         ignore_case_(ignore_case),
    1516             :         is_latin1_(is_latin1),
    1517             :         unicode_(unicode),
    1518          40 :         error_message_(nullptr)
    1519          40 :     {}
    1520             : 
    1521             :     void EnsureAnalyzed(RegExpNode* node);
    1522             : 
    1523             : #define DECLARE_VISIT(Type)                     \
    1524             :     virtual void Visit##Type(Type##Node* that);
    1525             :     FOR_EACH_NODE_TYPE(DECLARE_VISIT)
    1526             : #undef DECLARE_VISIT
    1527             :     virtual void VisitLoopChoice(LoopChoiceNode* that);
    1528             : 
    1529         643 :     bool has_failed() { return error_message_ != nullptr; }
    1530           0 :     const char* errorMessage() {
    1531           0 :         MOZ_ASSERT(error_message_ != nullptr);
    1532           0 :         return error_message_;
    1533             :     }
    1534           0 :     void failASCII(const char* error_message) {
    1535           0 :         error_message_ = error_message;
    1536           0 :     }
    1537             : 
    1538             :   private:
    1539             :     JSContext* cx;
    1540             :     bool ignore_case_;
    1541             :     bool is_latin1_;
    1542             :     bool unicode_;
    1543             :     const char* error_message_;
    1544             : 
    1545             :     Analysis(Analysis&) = delete;
    1546             :     void operator=(Analysis&) = delete;
    1547             : };
    1548             : 
    1549             : void
    1550             : AddClassNegated(const int* elmv, int elmc, CharacterRangeVector* ranges);
    1551             : 
    1552             : } }  // namespace js::irregexp
    1553             : 
    1554             : #endif  // V8_JSREGEXP_H_

Generated by: LCOV version 1.13