LCOV - output.info - js/src/irregexp/RegExpParser.h

LCOV - code coverage report

Current view:	top level - js/src/irregexp - RegExpParser.h (source / functions)		Hit	Total	Coverage
Test:	output.info	Lines:	63	72	87.5 %
Date:	2017-07-14 16:53:18	Functions:	37	42	88.1 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
       2             :  * vim: set ts=8 sts=4 et sw=4 tw=99: */
       3             : 
       4             : // Copyright 2012 the V8 project authors. All rights reserved.
       5             : // Redistribution and use in source and binary forms, with or without
       6             : // modification, are permitted provided that the following conditions are
       7             : // met:
       8             : //
       9             : //     * Redistributions of source code must retain the above copyright
      10             : //       notice, this list of conditions and the following disclaimer.
      11             : //     * Redistributions in binary form must reproduce the above
      12             : //       copyright notice, this list of conditions and the following
      13             : //       disclaimer in the documentation and/or other materials provided
      14             : //       with the distribution.
      15             : //     * Neither the name of Google Inc. nor the names of its
      16             : //       contributors may be used to endorse or promote products derived
      17             : //       from this software without specific prior written permission.
      18             : //
      19             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      20             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      21             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      22             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      23             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      24             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      25             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      26             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      27             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      28             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      29             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      30             : 
      31             : #ifndef V8_PARSER_H_
      32             : #define V8_PARSER_H_
      33             : 
      34             : #include "mozilla/Range.h"
      35             : 
      36             : #include <stdarg.h>
      37             : 
      38             : #include "irregexp/RegExpAST.h"
      39             : 
      40             : namespace js {
      41             : 
      42             : namespace frontend {
      43             :     class TokenStream;
      44             : }
      45             : 
      46             : namespace irregexp {
      47             : 
      48             : bool
      49             : ParsePattern(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
      50             :              bool multiline, bool match_only, bool unicode, bool ignore_case,
      51             :              bool global, bool sticky, RegExpCompileData* data);
      52             : 
      53             : bool
      54             : ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc, JSAtom* str,
      55             :                    bool unicode);
      56             : 
      57             : bool
      58             : ParsePatternSyntax(frontend::TokenStream& ts, LifoAlloc& alloc,
      59             :                    const mozilla::Range<const char16_t> chars, bool unicode);
      60             : 
      61             : // A BufferedVector is an automatically growing list, just like (and backed
      62             : // by) a Vector, that is optimized for the case of adding and removing
      63             : // a single element. The last element added is stored outside the backing list,
      64             : // and if no more than one element is ever added, the ZoneList isn't even
      65             : // allocated.
      66             : // Elements must not be nullptr pointers.
      67             : template <typename T, int initial_size>
      68             : class BufferedVector
      69             : {
      70             :   public:
      71             :     typedef InfallibleVector<T*, 1> VectorType;
      72             : 
      73        3363 :     BufferedVector() : list_(nullptr), last_(nullptr) {}
      74             : 
      75             :     // Adds element at end of list. This element is buffered and can
      76             :     // be read using last() or removed using RemoveLast until a new Add or until
      77             :     // RemoveLast or GetList has been called.
      78        6263 :     void Add(LifoAlloc* alloc, T* value) {
      79        6263 :         if (last_ != nullptr) {
      80        1919 :             if (list_ == nullptr) {
      81         950 :                 list_ = alloc->newInfallible<VectorType>(*alloc);
      82         950 :                 list_->reserve(initial_size);
      83             :             }
      84        1919 :             list_->append(last_);
      85             :         }
      86        6263 :         last_ = value;
      87        6263 :     }
      88             : 
      89        3230 :     T* last() {
      90        3230 :         MOZ_ASSERT(last_ != nullptr);
      91        3230 :         return last_;
      92             :     }
      93             : 
      94         591 :     T* RemoveLast() {
      95         591 :         MOZ_ASSERT(last_ != nullptr);
      96         591 :         T* result = last_;
      97         591 :         if ((list_ != nullptr) && (list_->length() > 0))
      98         238 :             last_ = list_->popCopy();
      99             :         else
     100         353 :             last_ = nullptr;
     101         591 :         return result;
     102             :     }
     103             : 
     104          75 :     T* Get(int i) {
     105          75 :         MOZ_ASSERT((0 <= i) && (i < length()));
     106          75 :         if (list_ == nullptr) {
     107           0 :             MOZ_ASSERT(0 == i);
     108           0 :             return last_;
     109             :         } else {
     110          75 :             if (size_t(i) == list_->length()) {
     111          36 :                 MOZ_ASSERT(last_ != nullptr);
     112          36 :                 return last_;
     113             :             } else {
     114          39 :                 return (*list_)[i];
     115             :             }
     116             :         }
     117             :     }
     118             : 
     119        2872 :     void Clear() {
     120        2872 :         list_ = nullptr;
     121        2872 :         last_ = nullptr;
     122        2872 :     }
     123             : 
     124        6392 :     int length() {
     125        6392 :         int length = (list_ == nullptr) ? 0 : list_->length();
     126        6392 :         return length + ((last_ == nullptr) ? 0 : 1);
     127             :     }
     128             : 
     129         725 :     VectorType* GetList(LifoAlloc* alloc) {
     130         725 :         if (list_ == nullptr)
     131           0 :             list_ = alloc->newInfallible<VectorType>(*alloc);
     132         725 :         if (last_ != nullptr) {
     133         725 :             list_->append(last_);
     134         725 :             last_ = nullptr;
     135             :         }
     136         725 :         return list_;
     137             :     }
     138             : 
     139             :   private:
     140             :     VectorType* list_;
     141             :     T* last_;
     142             : };
     143             : 
     144             : 
     145             : // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
     146             : class RegExpBuilder
     147             : {
     148             :   public:
     149             :     explicit RegExpBuilder(LifoAlloc* alloc);
     150             :     void AddCharacter(char16_t character);
     151             :     // "Adds" an empty expression. Does nothing except consume a
     152             :     // following quantifier
     153             :     void AddEmpty();
     154             :     void AddAtom(RegExpTree* tree);
     155             :     void AddAssertion(RegExpTree* tree);
     156             :     void NewAlternative();  // '|'
     157             :     void AddQuantifierToAtom(int min, int max, RegExpQuantifier::QuantifierType type);
     158             :     RegExpTree* ToRegExp();
     159             : 
     160             :   private:
     161             :     void FlushCharacters();
     162             :     void FlushText();
     163             :     void FlushTerms();
     164             : 
     165             :     LifoAlloc* alloc;
     166             :     bool pending_empty_;
     167             :     CharacterVector* characters_;
     168             :     BufferedVector<RegExpTree, 2> terms_;
     169             :     BufferedVector<RegExpTree, 2> text_;
     170             :     BufferedVector<RegExpTree, 2> alternatives_;
     171             : 
     172             :     enum LastAdded {
     173             :         ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM
     174             :     };
     175             : #ifdef DEBUG
     176             :     LastAdded last_added_;
     177             : #endif
     178             : };
     179             : 
     180             : // Characters parsed by RegExpParser can be either char16_t or kEndMarker.
     181             : typedef uint32_t widechar;
     182             : 
     183             : template <typename CharT>
     184             : class RegExpParser
     185             : {
     186             :   public:
     187             :     RegExpParser(frontend::TokenStream& ts, LifoAlloc* alloc,
     188             :                  const CharT* chars, const CharT* end, bool multiline_mode, bool unicode,
     189             :                  bool ignore_case);
     190             : 
     191             :     RegExpTree* ParsePattern();
     192             :     RegExpTree* ParseDisjunction();
     193             :     RegExpTree* ParseCharacterClass();
     194             : 
     195             :     // Parses a {...,...} quantifier and stores the range in the given
     196             :     // out parameters.
     197             :     bool ParseIntervalQuantifier(int* min_out, int* max_out);
     198             : 
     199             :     // Tries to parse the input as a single escaped character.  If successful
     200             :     // it stores the result in the output parameter and returns true.
     201             :     // Otherwise it throws an error and returns false.  The character must not
     202             :     // be 'b' or 'B' since they are usually handled specially.
     203             :     bool ParseClassCharacterEscape(widechar* code);
     204             : 
     205             :     // Checks whether the following is a length-digit hexadecimal number,
     206             :     // and sets the value if it is.
     207             :     bool ParseHexEscape(int length, widechar* value);
     208             : 
     209             :     bool ParseBracedHexEscape(widechar* value);
     210             :     bool ParseTrailSurrogate(widechar* value);
     211             :     bool ParseRawSurrogatePair(char16_t* lead, char16_t* trail);
     212             : 
     213             :     widechar ParseOctalLiteral();
     214             : 
     215             :     // Tries to parse the input as a back reference.  If successful it
     216             :     // stores the result in the output parameter and returns true.  If
     217             :     // it fails it will push back the characters read so the same characters
     218             :     // can be reparsed.
     219             :     bool ParseBackReferenceIndex(int* index_out);
     220             : 
     221             :     bool ParseClassAtom(char16_t* char_class, widechar *value);
     222             : 
     223             :   private:
     224             :     void SyntaxError(unsigned errorNumber, ...);
     225             : 
     226             :   public:
     227             :     RegExpTree* ReportError(unsigned errorNumber, const char* param = nullptr);
     228             : 
     229             :     void Advance();
     230         727 :     void Advance(int dist) {
     231         727 :         next_pos_ += dist - 1;
     232         727 :         Advance();
     233         727 :     }
     234             : 
     235           0 :     void Reset(const CharT* pos) {
     236           0 :         next_pos_ = pos;
     237           0 :         has_more_ = (pos < end_);
     238           0 :         Advance();
     239           0 :     }
     240             : 
     241             :     // Reports whether the pattern might be used as a literal search string.
     242             :     // Only use if the result of the parse is a single atom node.
     243          40 :     bool simple() { return simple_; }
     244          40 :     bool contains_anchor() { return contains_anchor_; }
     245         259 :     void set_contains_anchor() { contains_anchor_ = true; }
     246        1039 :     int captures_started() { return captures_ == nullptr ? 0 : captures_->length(); }
     247         261 :     const CharT* position() { return next_pos_ - 1; }
     248             : 
     249             :     static const int kMaxCaptures = 1 << 16;
     250             :     static const widechar kEndMarker = (1 << 21);
     251             : 
     252             :   private:
     253             :     enum SubexpressionType {
     254             :         INITIAL,
     255             :         CAPTURE,  // All positive values represent captures.
     256             :         POSITIVE_LOOKAHEAD,
     257             :         NEGATIVE_LOOKAHEAD,
     258             :         GROUPING
     259             :     };
     260             : 
     261             :     class RegExpParserState {
     262             :       public:
     263        1121 :         RegExpParserState(LifoAlloc* alloc,
     264             :                           RegExpParserState* previous_state,
     265             :                           SubexpressionType group_type,
     266             :                           int disjunction_capture_index)
     267             :             : previous_state_(previous_state),
     268        1121 :               builder_(alloc->newInfallible<RegExpBuilder>(alloc)),
     269             :               group_type_(group_type),
     270        2242 :               disjunction_capture_index_(disjunction_capture_index)
     271        1121 :         {}
     272             :         // Parser state of containing expression, if any.
     273         364 :         RegExpParserState* previous_state() { return previous_state_; }
     274        1121 :         bool IsSubexpression() { return previous_state_ != nullptr; }
     275             :         // RegExpBuilder building this regexp's AST.
     276        1485 :         RegExpBuilder* builder() { return builder_; }
     277             :         // Type of regexp being parsed (parenthesized group or entire regexp).
     278        1485 :         SubexpressionType group_type() { return group_type_; }
     279             :         // Index in captures array of first capture in this sub-expression, if any.
     280             :         // Also the capture index of this sub-expression itself, if group_type
     281             :         // is CAPTURE.
     282         364 :         int capture_index() { return disjunction_capture_index_; }
     283             : 
     284             :       private:
     285             :         // Linked list implementation of stack of states.
     286             :         RegExpParserState* previous_state_;
     287             :         // Builder for the stored disjunction.
     288             :         RegExpBuilder* builder_;
     289             :         // Stored disjunction type (capture, look-ahead or grouping), if any.
     290             :         SubexpressionType group_type_;
     291             :         // Stored disjunction's capture index (if any).
     292             :         int disjunction_capture_index_;
     293             :     };
     294             : 
     295       27481 :     widechar current() { return current_; }
     296        2452 :     bool has_more() { return has_more_; }
     297        2379 :     bool has_next() { return next_pos_ < end_; }
     298        2017 :     widechar Next() {
     299        2017 :         if (has_next())
     300        2017 :             return *next_pos_;
     301           0 :         return kEndMarker;
     302             :     }
     303             :     void ScanForCaptures();
     304             : 
     305             :     frontend::TokenStream& ts;
     306             :     LifoAlloc* alloc;
     307             :     RegExpCaptureVector* captures_;
     308             :     const CharT* const start_;
     309             :     const CharT* next_pos_;
     310             :     const CharT* end_;
     311             :     widechar current_;
     312             :     // The capture count is only valid after we have scanned for captures.
     313             :     int capture_count_;
     314             :     bool has_more_;
     315             :     bool multiline_;
     316             :     bool unicode_;
     317             :     bool ignore_case_;
     318             :     bool simple_;
     319             :     bool contains_anchor_;
     320             :     bool is_scanned_for_captures_;
     321             : };
     322             : 
     323             : } } // namespace js::irregexp
     324             : 
     325             : #endif // V8_PARSER_H_

Generated by: LCOV version 1.13