Line data Source code
1 : /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
2 : * vim: set ts=8 sts=4 et sw=4 tw=99: */
3 :
4 : // Copyright 2012 the V8 project authors. All rights reserved.
5 : // Redistribution and use in source and binary forms, with or without
6 : // modification, are permitted provided that the following conditions are
7 : // met:
8 : //
9 : // * Redistributions of source code must retain the above copyright
10 : // notice, this list of conditions and the following disclaimer.
11 : // * Redistributions in binary form must reproduce the above
12 : // copyright notice, this list of conditions and the following
13 : // disclaimer in the documentation and/or other materials provided
14 : // with the distribution.
15 : // * Neither the name of Google Inc. nor the names of its
16 : // contributors may be used to endorse or promote products derived
17 : // from this software without specific prior written permission.
18 : //
19 : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 :
31 : #include "irregexp/RegExpAST.h"
32 :
33 : using namespace js;
34 : using namespace js::irregexp;
35 :
36 : #define MAKE_ACCEPT(Name) \
37 : void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \
38 : return visitor->Visit##Name(this, data); \
39 : }
40 0 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT)
41 : #undef MAKE_ACCEPT
42 :
43 : #define MAKE_TYPE_CASE(Name) \
44 : RegExp##Name* RegExpTree::As##Name() { \
45 : return nullptr; \
46 : } \
47 : bool RegExpTree::Is##Name() { return false; }
48 1088 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
49 : #undef MAKE_TYPE_CASE
50 :
51 : #define MAKE_TYPE_CASE(Name) \
52 : RegExp##Name* RegExp##Name::As##Name() { \
53 : return this; \
54 : } \
55 : bool RegExp##Name::Is##Name() { return true; }
56 0 : FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
57 : #undef MAKE_TYPE_CASE
58 :
59 : static Interval
60 8 : ListCaptureRegisters(const RegExpTreeVector& children)
61 : {
62 8 : Interval result = Interval::Empty();
63 25 : for (size_t i = 0; i < children.length(); i++)
64 17 : result = result.Union(children[i]->CaptureRegisters());
65 8 : return result;
66 : }
67 :
68 : // ----------------------------------------------------------------------------
69 : // RegExpDisjunction
70 :
71 164 : RegExpDisjunction::RegExpDisjunction(RegExpTreeVector* alternatives)
72 164 : : alternatives_(alternatives)
73 : {
74 164 : MOZ_ASSERT(alternatives->length() > 1);
75 164 : RegExpTree* first_alternative = (*alternatives)[0];
76 164 : min_match_ = first_alternative->min_match();
77 164 : max_match_ = first_alternative->max_match();
78 594 : for (size_t i = 1; i < alternatives->length(); i++) {
79 430 : RegExpTree* alternative = (*alternatives)[i];
80 430 : min_match_ = Min(min_match_, alternative->min_match());
81 430 : max_match_ = Max(max_match_, alternative->max_match());
82 : }
83 164 : }
84 :
85 : Interval
86 2 : RegExpDisjunction::CaptureRegisters()
87 : {
88 2 : return ListCaptureRegisters(alternatives());
89 : }
90 :
91 : bool
92 2 : RegExpDisjunction::IsAnchoredAtStart()
93 : {
94 2 : const RegExpTreeVector& alternatives = this->alternatives();
95 2 : for (size_t i = 0; i < alternatives.length(); i++) {
96 2 : if (!alternatives[i]->IsAnchoredAtStart())
97 2 : return false;
98 : }
99 0 : return true;
100 : }
101 :
102 : bool
103 6 : RegExpDisjunction::IsAnchoredAtEnd()
104 : {
105 6 : const RegExpTreeVector& alternatives = this->alternatives();
106 6 : for (size_t i = 0; i < alternatives.length(); i++) {
107 6 : if (!alternatives[i]->IsAnchoredAtEnd())
108 6 : return false;
109 : }
110 0 : return true;
111 : }
112 :
113 : // ----------------------------------------------------------------------------
114 : // RegExpAlternative
115 :
116 3546 : static int IncreaseBy(int previous, int increase)
117 : {
118 3546 : if (RegExpTree::kInfinity - previous < increase)
119 497 : return RegExpTree::kInfinity;
120 3049 : return previous + increase;
121 : }
122 :
123 561 : RegExpAlternative::RegExpAlternative(RegExpTreeVector* nodes)
124 : : nodes_(nodes),
125 : min_match_(0),
126 561 : max_match_(0)
127 : {
128 561 : MOZ_ASSERT(nodes->length() > 1);
129 2334 : for (size_t i = 0; i < nodes->length(); i++) {
130 1773 : RegExpTree* node = (*nodes)[i];
131 1773 : int node_min_match = node->min_match();
132 1773 : min_match_ = IncreaseBy(min_match_, node_min_match);
133 1773 : int node_max_match = node->max_match();
134 1773 : max_match_ = IncreaseBy(max_match_, node_max_match);
135 : }
136 561 : }
137 :
138 : Interval
139 6 : RegExpAlternative::CaptureRegisters()
140 : {
141 6 : return ListCaptureRegisters(nodes());
142 : }
143 :
144 : bool
145 28 : RegExpAlternative::IsAnchoredAtStart()
146 : {
147 28 : const RegExpTreeVector& nodes = this->nodes();
148 29 : for (size_t i = 0; i < nodes.length(); i++) {
149 29 : RegExpTree* node = nodes[i];
150 29 : if (node->IsAnchoredAtStart()) { return true; }
151 8 : if (node->max_match() > 0) { return false; }
152 : }
153 0 : return false;
154 : }
155 :
156 : bool
157 29 : RegExpAlternative::IsAnchoredAtEnd()
158 : {
159 29 : const RegExpTreeVector& nodes = this->nodes();
160 32 : for (int i = nodes.length() - 1; i >= 0; i--) {
161 32 : RegExpTree* node = nodes[i];
162 32 : if (node->IsAnchoredAtEnd()) { return true; }
163 23 : if (node->max_match() > 0) { return false; }
164 : }
165 0 : return false;
166 : }
167 :
168 : // ----------------------------------------------------------------------------
169 : // RegExpAssertion
170 :
171 : bool
172 22 : RegExpAssertion::IsAnchoredAtStart()
173 : {
174 22 : return assertion_type() == RegExpAssertion::START_OF_INPUT;
175 : }
176 :
177 : bool
178 9 : RegExpAssertion::IsAnchoredAtEnd()
179 : {
180 9 : return assertion_type() == RegExpAssertion::END_OF_INPUT;
181 : }
182 :
183 : // ----------------------------------------------------------------------------
184 : // RegExpCharacterClass
185 :
186 : void
187 41 : RegExpCharacterClass::AppendToText(RegExpText* text)
188 : {
189 41 : text->AddElement(TextElement::CharClass(this));
190 41 : }
191 :
192 : CharacterRangeVector&
193 677 : CharacterSet::ranges(LifoAlloc* alloc)
194 : {
195 677 : if (ranges_ == nullptr) {
196 18 : ranges_ = alloc->newInfallible<CharacterRangeVector>(*alloc);
197 18 : CharacterRange::AddClassEscape(alloc, standard_set_type_, ranges_);
198 : }
199 677 : return *ranges_;
200 : }
201 :
202 : // ----------------------------------------------------------------------------
203 : // RegExpAtom
204 :
205 : void
206 34 : RegExpAtom::AppendToText(RegExpText* text)
207 : {
208 34 : text->AddElement(TextElement::Atom(this));
209 34 : }
210 :
211 : // ----------------------------------------------------------------------------
212 : // RegExpText
213 :
214 : void
215 0 : RegExpText::AppendToText(RegExpText* text)
216 : {
217 0 : for (size_t i = 0; i < elements().length(); i++)
218 0 : text->AddElement(elements()[i]);
219 0 : }
220 :
221 : // ----------------------------------------------------------------------------
222 : // RegExpQuantifier
223 :
224 : Interval
225 6 : RegExpQuantifier::CaptureRegisters()
226 : {
227 6 : return body()->CaptureRegisters();
228 : }
229 :
230 : // ----------------------------------------------------------------------------
231 : // RegExpCapture
232 :
233 : bool
234 1 : RegExpCapture::IsAnchoredAtStart()
235 : {
236 1 : return body()->IsAnchoredAtStart();
237 : }
238 :
239 : bool
240 4 : RegExpCapture::IsAnchoredAtEnd()
241 : {
242 4 : return body()->IsAnchoredAtEnd();
243 : }
244 :
245 : Interval
246 7 : RegExpCapture::CaptureRegisters()
247 : {
248 7 : Interval self(StartRegister(index()), EndRegister(index()));
249 7 : return self.Union(body()->CaptureRegisters());
250 : }
251 :
252 : // ----------------------------------------------------------------------------
253 : // RegExpLookahead
254 :
255 : Interval
256 0 : RegExpLookahead::CaptureRegisters()
257 : {
258 0 : return body()->CaptureRegisters();
259 : }
260 :
261 : bool
262 0 : RegExpLookahead::IsAnchoredAtStart()
263 : {
264 0 : return is_positive() && body()->IsAnchoredAtStart();
265 : }
|